indonesia-distilgpt2 / trainer_state.json
zaenalium's picture
End of training
d16b0f4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 143758,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.993043865384883e-05,
"loss": 3.1775,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 4.986087730769766e-05,
"loss": 2.9882,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 4.979131596154649e-05,
"loss": 2.932,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 4.9721754615395325e-05,
"loss": 2.8774,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 4.965219326924415e-05,
"loss": 2.9227,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 4.9582631923092976e-05,
"loss": 2.9011,
"step": 1200
},
{
"epoch": 0.02,
"learning_rate": 4.9513070576941804e-05,
"loss": 2.8233,
"step": 1400
},
{
"epoch": 0.02,
"learning_rate": 4.944350923079063e-05,
"loss": 2.859,
"step": 1600
},
{
"epoch": 0.03,
"learning_rate": 4.937394788463946e-05,
"loss": 2.8269,
"step": 1800
},
{
"epoch": 0.03,
"learning_rate": 4.93043865384883e-05,
"loss": 2.82,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 4.9234825192337127e-05,
"loss": 2.7556,
"step": 2200
},
{
"epoch": 0.03,
"learning_rate": 4.9165263846185955e-05,
"loss": 2.7388,
"step": 2400
},
{
"epoch": 0.04,
"learning_rate": 4.9095702500034784e-05,
"loss": 2.8441,
"step": 2600
},
{
"epoch": 0.04,
"learning_rate": 4.902614115388361e-05,
"loss": 2.7828,
"step": 2800
},
{
"epoch": 0.04,
"learning_rate": 4.895657980773244e-05,
"loss": 2.7528,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 4.888701846158127e-05,
"loss": 2.7744,
"step": 3200
},
{
"epoch": 0.05,
"learning_rate": 4.88174571154301e-05,
"loss": 2.7752,
"step": 3400
},
{
"epoch": 0.05,
"learning_rate": 4.874789576927893e-05,
"loss": 2.7288,
"step": 3600
},
{
"epoch": 0.05,
"learning_rate": 4.867833442312776e-05,
"loss": 2.7043,
"step": 3800
},
{
"epoch": 0.06,
"learning_rate": 4.8608773076976586e-05,
"loss": 2.7296,
"step": 4000
},
{
"epoch": 0.06,
"learning_rate": 4.8539211730825415e-05,
"loss": 2.6831,
"step": 4200
},
{
"epoch": 0.06,
"learning_rate": 4.846965038467425e-05,
"loss": 2.7519,
"step": 4400
},
{
"epoch": 0.06,
"learning_rate": 4.840008903852308e-05,
"loss": 2.6923,
"step": 4600
},
{
"epoch": 0.07,
"learning_rate": 4.833052769237191e-05,
"loss": 2.6681,
"step": 4800
},
{
"epoch": 0.07,
"learning_rate": 4.826096634622073e-05,
"loss": 2.6918,
"step": 5000
},
{
"epoch": 0.07,
"learning_rate": 4.819140500006956e-05,
"loss": 2.6827,
"step": 5200
},
{
"epoch": 0.08,
"learning_rate": 4.812184365391839e-05,
"loss": 2.6477,
"step": 5400
},
{
"epoch": 0.08,
"learning_rate": 4.8052282307767224e-05,
"loss": 2.6638,
"step": 5600
},
{
"epoch": 0.08,
"learning_rate": 4.798272096161605e-05,
"loss": 2.7062,
"step": 5800
},
{
"epoch": 0.08,
"learning_rate": 4.791315961546488e-05,
"loss": 2.6954,
"step": 6000
},
{
"epoch": 0.09,
"learning_rate": 4.784359826931371e-05,
"loss": 2.7133,
"step": 6200
},
{
"epoch": 0.09,
"learning_rate": 4.777403692316254e-05,
"loss": 2.693,
"step": 6400
},
{
"epoch": 0.09,
"learning_rate": 4.770447557701137e-05,
"loss": 2.632,
"step": 6600
},
{
"epoch": 0.09,
"learning_rate": 4.76349142308602e-05,
"loss": 2.6715,
"step": 6800
},
{
"epoch": 0.1,
"learning_rate": 4.7565352884709026e-05,
"loss": 2.6947,
"step": 7000
},
{
"epoch": 0.1,
"learning_rate": 4.7495791538557855e-05,
"loss": 2.6396,
"step": 7200
},
{
"epoch": 0.1,
"learning_rate": 4.7426230192406684e-05,
"loss": 2.6506,
"step": 7400
},
{
"epoch": 0.11,
"learning_rate": 4.735666884625551e-05,
"loss": 2.6204,
"step": 7600
},
{
"epoch": 0.11,
"learning_rate": 4.728710750010434e-05,
"loss": 2.6298,
"step": 7800
},
{
"epoch": 0.11,
"learning_rate": 4.721754615395318e-05,
"loss": 2.5967,
"step": 8000
},
{
"epoch": 0.11,
"learning_rate": 4.7147984807802006e-05,
"loss": 2.6396,
"step": 8200
},
{
"epoch": 0.12,
"learning_rate": 4.7078423461650835e-05,
"loss": 2.601,
"step": 8400
},
{
"epoch": 0.12,
"learning_rate": 4.7008862115499664e-05,
"loss": 2.6289,
"step": 8600
},
{
"epoch": 0.12,
"learning_rate": 4.693930076934849e-05,
"loss": 2.6143,
"step": 8800
},
{
"epoch": 0.13,
"learning_rate": 4.6869739423197315e-05,
"loss": 2.6672,
"step": 9000
},
{
"epoch": 0.13,
"learning_rate": 4.680017807704615e-05,
"loss": 2.6145,
"step": 9200
},
{
"epoch": 0.13,
"learning_rate": 4.673061673089498e-05,
"loss": 2.626,
"step": 9400
},
{
"epoch": 0.13,
"learning_rate": 4.666105538474381e-05,
"loss": 2.6622,
"step": 9600
},
{
"epoch": 0.14,
"learning_rate": 4.659149403859264e-05,
"loss": 2.578,
"step": 9800
},
{
"epoch": 0.14,
"learning_rate": 4.6521932692441466e-05,
"loss": 2.6476,
"step": 10000
},
{
"epoch": 0.14,
"learning_rate": 4.6452371346290294e-05,
"loss": 2.6051,
"step": 10200
},
{
"epoch": 0.14,
"learning_rate": 4.638281000013912e-05,
"loss": 2.5568,
"step": 10400
},
{
"epoch": 0.15,
"learning_rate": 4.631324865398796e-05,
"loss": 2.5498,
"step": 10600
},
{
"epoch": 0.15,
"learning_rate": 4.624368730783679e-05,
"loss": 2.5283,
"step": 10800
},
{
"epoch": 0.15,
"learning_rate": 4.617412596168561e-05,
"loss": 2.615,
"step": 11000
},
{
"epoch": 0.16,
"learning_rate": 4.610456461553444e-05,
"loss": 2.6121,
"step": 11200
},
{
"epoch": 0.16,
"learning_rate": 4.603500326938327e-05,
"loss": 2.535,
"step": 11400
},
{
"epoch": 0.16,
"learning_rate": 4.59654419232321e-05,
"loss": 2.584,
"step": 11600
},
{
"epoch": 0.16,
"learning_rate": 4.589588057708093e-05,
"loss": 2.5619,
"step": 11800
},
{
"epoch": 0.17,
"learning_rate": 4.582631923092976e-05,
"loss": 2.581,
"step": 12000
},
{
"epoch": 0.17,
"learning_rate": 4.575675788477859e-05,
"loss": 2.5513,
"step": 12200
},
{
"epoch": 0.17,
"learning_rate": 4.568719653862742e-05,
"loss": 2.5844,
"step": 12400
},
{
"epoch": 0.18,
"learning_rate": 4.561763519247625e-05,
"loss": 2.6033,
"step": 12600
},
{
"epoch": 0.18,
"learning_rate": 4.5548073846325076e-05,
"loss": 2.4988,
"step": 12800
},
{
"epoch": 0.18,
"learning_rate": 4.5478512500173905e-05,
"loss": 2.5467,
"step": 13000
},
{
"epoch": 0.18,
"learning_rate": 4.5408951154022734e-05,
"loss": 2.5825,
"step": 13200
},
{
"epoch": 0.19,
"learning_rate": 4.533938980787156e-05,
"loss": 2.5732,
"step": 13400
},
{
"epoch": 0.19,
"learning_rate": 4.526982846172039e-05,
"loss": 2.572,
"step": 13600
},
{
"epoch": 0.19,
"learning_rate": 4.520026711556922e-05,
"loss": 2.4804,
"step": 13800
},
{
"epoch": 0.19,
"learning_rate": 4.513070576941805e-05,
"loss": 2.5576,
"step": 14000
},
{
"epoch": 0.2,
"learning_rate": 4.5061144423266885e-05,
"loss": 2.5474,
"step": 14200
},
{
"epoch": 0.2,
"learning_rate": 4.4991583077115714e-05,
"loss": 2.5262,
"step": 14400
},
{
"epoch": 0.2,
"learning_rate": 4.492202173096454e-05,
"loss": 2.5679,
"step": 14600
},
{
"epoch": 0.21,
"learning_rate": 4.485246038481337e-05,
"loss": 2.5541,
"step": 14800
},
{
"epoch": 0.21,
"learning_rate": 4.4782899038662194e-05,
"loss": 2.4937,
"step": 15000
},
{
"epoch": 0.21,
"learning_rate": 4.471333769251102e-05,
"loss": 2.5643,
"step": 15200
},
{
"epoch": 0.21,
"learning_rate": 4.464377634635986e-05,
"loss": 2.5978,
"step": 15400
},
{
"epoch": 0.22,
"learning_rate": 4.457421500020869e-05,
"loss": 2.4727,
"step": 15600
},
{
"epoch": 0.22,
"learning_rate": 4.4504653654057516e-05,
"loss": 2.613,
"step": 15800
},
{
"epoch": 0.22,
"learning_rate": 4.4435092307906345e-05,
"loss": 2.5453,
"step": 16000
},
{
"epoch": 0.23,
"learning_rate": 4.4365530961755174e-05,
"loss": 2.5362,
"step": 16200
},
{
"epoch": 0.23,
"learning_rate": 4.4295969615604e-05,
"loss": 2.5296,
"step": 16400
},
{
"epoch": 0.23,
"learning_rate": 4.422640826945283e-05,
"loss": 2.5282,
"step": 16600
},
{
"epoch": 0.23,
"learning_rate": 4.415684692330167e-05,
"loss": 2.4973,
"step": 16800
},
{
"epoch": 0.24,
"learning_rate": 4.408728557715049e-05,
"loss": 2.5563,
"step": 17000
},
{
"epoch": 0.24,
"learning_rate": 4.401772423099932e-05,
"loss": 2.5514,
"step": 17200
},
{
"epoch": 0.24,
"learning_rate": 4.394816288484815e-05,
"loss": 2.5327,
"step": 17400
},
{
"epoch": 0.24,
"learning_rate": 4.3878601538696976e-05,
"loss": 2.5251,
"step": 17600
},
{
"epoch": 0.25,
"learning_rate": 4.380904019254581e-05,
"loss": 2.5695,
"step": 17800
},
{
"epoch": 0.25,
"learning_rate": 4.373947884639464e-05,
"loss": 2.4936,
"step": 18000
},
{
"epoch": 0.25,
"learning_rate": 4.366991750024347e-05,
"loss": 2.516,
"step": 18200
},
{
"epoch": 0.26,
"learning_rate": 4.36003561540923e-05,
"loss": 2.5568,
"step": 18400
},
{
"epoch": 0.26,
"learning_rate": 4.353079480794113e-05,
"loss": 2.5253,
"step": 18600
},
{
"epoch": 0.26,
"learning_rate": 4.346123346178995e-05,
"loss": 2.4991,
"step": 18800
},
{
"epoch": 0.26,
"learning_rate": 4.3391672115638785e-05,
"loss": 2.5029,
"step": 19000
},
{
"epoch": 0.27,
"learning_rate": 4.332211076948761e-05,
"loss": 2.5178,
"step": 19200
},
{
"epoch": 0.27,
"learning_rate": 4.325254942333644e-05,
"loss": 2.4964,
"step": 19400
},
{
"epoch": 0.27,
"learning_rate": 4.318298807718527e-05,
"loss": 2.4339,
"step": 19600
},
{
"epoch": 0.28,
"learning_rate": 4.31134267310341e-05,
"loss": 2.4874,
"step": 19800
},
{
"epoch": 0.28,
"learning_rate": 4.304386538488293e-05,
"loss": 2.4894,
"step": 20000
},
{
"epoch": 0.28,
"learning_rate": 4.297430403873176e-05,
"loss": 2.5352,
"step": 20200
},
{
"epoch": 0.28,
"learning_rate": 4.290474269258059e-05,
"loss": 2.5251,
"step": 20400
},
{
"epoch": 0.29,
"learning_rate": 4.283518134642942e-05,
"loss": 2.4545,
"step": 20600
},
{
"epoch": 0.29,
"learning_rate": 4.2765620000278244e-05,
"loss": 2.4831,
"step": 20800
},
{
"epoch": 0.29,
"learning_rate": 4.269605865412707e-05,
"loss": 2.5145,
"step": 21000
},
{
"epoch": 0.29,
"learning_rate": 4.26264973079759e-05,
"loss": 2.4543,
"step": 21200
},
{
"epoch": 0.3,
"learning_rate": 4.255693596182474e-05,
"loss": 2.5056,
"step": 21400
},
{
"epoch": 0.3,
"learning_rate": 4.2487374615673566e-05,
"loss": 2.4933,
"step": 21600
},
{
"epoch": 0.3,
"learning_rate": 4.2417813269522395e-05,
"loss": 2.4786,
"step": 21800
},
{
"epoch": 0.31,
"learning_rate": 4.2348251923371224e-05,
"loss": 2.3971,
"step": 22000
},
{
"epoch": 0.31,
"learning_rate": 4.227869057722005e-05,
"loss": 2.4759,
"step": 22200
},
{
"epoch": 0.31,
"learning_rate": 4.220912923106888e-05,
"loss": 2.4637,
"step": 22400
},
{
"epoch": 0.31,
"learning_rate": 4.213956788491771e-05,
"loss": 2.5033,
"step": 22600
},
{
"epoch": 0.32,
"learning_rate": 4.207000653876654e-05,
"loss": 2.5021,
"step": 22800
},
{
"epoch": 0.32,
"learning_rate": 4.200044519261537e-05,
"loss": 2.4136,
"step": 23000
},
{
"epoch": 0.32,
"learning_rate": 4.19308838464642e-05,
"loss": 2.5604,
"step": 23200
},
{
"epoch": 0.33,
"learning_rate": 4.1861322500313026e-05,
"loss": 2.5054,
"step": 23400
},
{
"epoch": 0.33,
"learning_rate": 4.1791761154161855e-05,
"loss": 2.4761,
"step": 23600
},
{
"epoch": 0.33,
"learning_rate": 4.1722199808010684e-05,
"loss": 2.4802,
"step": 23800
},
{
"epoch": 0.33,
"learning_rate": 4.165263846185952e-05,
"loss": 2.5241,
"step": 24000
},
{
"epoch": 0.34,
"learning_rate": 4.158307711570835e-05,
"loss": 2.4519,
"step": 24200
},
{
"epoch": 0.34,
"learning_rate": 4.151351576955718e-05,
"loss": 2.487,
"step": 24400
},
{
"epoch": 0.34,
"learning_rate": 4.1443954423406006e-05,
"loss": 2.4719,
"step": 24600
},
{
"epoch": 0.35,
"learning_rate": 4.137439307725483e-05,
"loss": 2.5132,
"step": 24800
},
{
"epoch": 0.35,
"learning_rate": 4.130483173110366e-05,
"loss": 2.4432,
"step": 25000
},
{
"epoch": 0.35,
"learning_rate": 4.123527038495249e-05,
"loss": 2.448,
"step": 25200
},
{
"epoch": 0.35,
"learning_rate": 4.116570903880132e-05,
"loss": 2.4659,
"step": 25400
},
{
"epoch": 0.36,
"learning_rate": 4.109614769265015e-05,
"loss": 2.4596,
"step": 25600
},
{
"epoch": 0.36,
"learning_rate": 4.102658634649898e-05,
"loss": 2.473,
"step": 25800
},
{
"epoch": 0.36,
"learning_rate": 4.095702500034781e-05,
"loss": 2.4812,
"step": 26000
},
{
"epoch": 0.36,
"learning_rate": 4.088746365419664e-05,
"loss": 2.43,
"step": 26200
},
{
"epoch": 0.37,
"learning_rate": 4.081790230804547e-05,
"loss": 2.4743,
"step": 26400
},
{
"epoch": 0.37,
"learning_rate": 4.07483409618943e-05,
"loss": 2.4693,
"step": 26600
},
{
"epoch": 0.37,
"learning_rate": 4.0678779615743124e-05,
"loss": 2.4932,
"step": 26800
},
{
"epoch": 0.38,
"learning_rate": 4.060921826959195e-05,
"loss": 2.4459,
"step": 27000
},
{
"epoch": 0.38,
"learning_rate": 4.053965692344078e-05,
"loss": 2.4123,
"step": 27200
},
{
"epoch": 0.38,
"learning_rate": 4.047009557728961e-05,
"loss": 2.4364,
"step": 27400
},
{
"epoch": 0.38,
"learning_rate": 4.0400534231138446e-05,
"loss": 2.4428,
"step": 27600
},
{
"epoch": 0.39,
"learning_rate": 4.0330972884987275e-05,
"loss": 2.4157,
"step": 27800
},
{
"epoch": 0.39,
"learning_rate": 4.0261411538836103e-05,
"loss": 2.4145,
"step": 28000
},
{
"epoch": 0.39,
"learning_rate": 4.019185019268493e-05,
"loss": 2.5131,
"step": 28200
},
{
"epoch": 0.4,
"learning_rate": 4.012228884653376e-05,
"loss": 2.4509,
"step": 28400
},
{
"epoch": 0.4,
"learning_rate": 4.005272750038259e-05,
"loss": 2.4781,
"step": 28600
},
{
"epoch": 0.4,
"learning_rate": 3.998316615423142e-05,
"loss": 2.428,
"step": 28800
},
{
"epoch": 0.4,
"learning_rate": 3.991360480808025e-05,
"loss": 2.4402,
"step": 29000
},
{
"epoch": 0.41,
"learning_rate": 3.9844043461929077e-05,
"loss": 2.3886,
"step": 29200
},
{
"epoch": 0.41,
"learning_rate": 3.9774482115777905e-05,
"loss": 2.4329,
"step": 29400
},
{
"epoch": 0.41,
"learning_rate": 3.9704920769626734e-05,
"loss": 2.4038,
"step": 29600
},
{
"epoch": 0.41,
"learning_rate": 3.963535942347556e-05,
"loss": 2.3857,
"step": 29800
},
{
"epoch": 0.42,
"learning_rate": 3.956579807732439e-05,
"loss": 2.3806,
"step": 30000
},
{
"epoch": 0.42,
"learning_rate": 3.949623673117323e-05,
"loss": 2.4243,
"step": 30200
},
{
"epoch": 0.42,
"learning_rate": 3.9426675385022057e-05,
"loss": 2.4545,
"step": 30400
},
{
"epoch": 0.43,
"learning_rate": 3.9357114038870885e-05,
"loss": 2.5031,
"step": 30600
},
{
"epoch": 0.43,
"learning_rate": 3.928755269271971e-05,
"loss": 2.4046,
"step": 30800
},
{
"epoch": 0.43,
"learning_rate": 3.9217991346568536e-05,
"loss": 2.4326,
"step": 31000
},
{
"epoch": 0.43,
"learning_rate": 3.914843000041737e-05,
"loss": 2.4118,
"step": 31200
},
{
"epoch": 0.44,
"learning_rate": 3.90788686542662e-05,
"loss": 2.4556,
"step": 31400
},
{
"epoch": 0.44,
"learning_rate": 3.900930730811503e-05,
"loss": 2.4579,
"step": 31600
},
{
"epoch": 0.44,
"learning_rate": 3.893974596196386e-05,
"loss": 2.3932,
"step": 31800
},
{
"epoch": 0.45,
"learning_rate": 3.887018461581269e-05,
"loss": 2.4281,
"step": 32000
},
{
"epoch": 0.45,
"learning_rate": 3.8800623269661516e-05,
"loss": 2.4362,
"step": 32200
},
{
"epoch": 0.45,
"learning_rate": 3.8731061923510345e-05,
"loss": 2.4217,
"step": 32400
},
{
"epoch": 0.45,
"learning_rate": 3.866150057735918e-05,
"loss": 2.4305,
"step": 32600
},
{
"epoch": 0.46,
"learning_rate": 3.8591939231208e-05,
"loss": 2.441,
"step": 32800
},
{
"epoch": 0.46,
"learning_rate": 3.852237788505683e-05,
"loss": 2.422,
"step": 33000
},
{
"epoch": 0.46,
"learning_rate": 3.845281653890566e-05,
"loss": 2.3997,
"step": 33200
},
{
"epoch": 0.46,
"learning_rate": 3.838325519275449e-05,
"loss": 2.3749,
"step": 33400
},
{
"epoch": 0.47,
"learning_rate": 3.831369384660332e-05,
"loss": 2.4073,
"step": 33600
},
{
"epoch": 0.47,
"learning_rate": 3.8244132500452154e-05,
"loss": 2.4585,
"step": 33800
},
{
"epoch": 0.47,
"learning_rate": 3.817457115430098e-05,
"loss": 2.4215,
"step": 34000
},
{
"epoch": 0.48,
"learning_rate": 3.810500980814981e-05,
"loss": 2.414,
"step": 34200
},
{
"epoch": 0.48,
"learning_rate": 3.803544846199864e-05,
"loss": 2.4223,
"step": 34400
},
{
"epoch": 0.48,
"learning_rate": 3.796588711584746e-05,
"loss": 2.4012,
"step": 34600
},
{
"epoch": 0.48,
"learning_rate": 3.78963257696963e-05,
"loss": 2.422,
"step": 34800
},
{
"epoch": 0.49,
"learning_rate": 3.782676442354513e-05,
"loss": 2.3821,
"step": 35000
},
{
"epoch": 0.49,
"learning_rate": 3.7757203077393956e-05,
"loss": 2.4588,
"step": 35200
},
{
"epoch": 0.49,
"learning_rate": 3.7687641731242785e-05,
"loss": 2.4303,
"step": 35400
},
{
"epoch": 0.5,
"learning_rate": 3.7618080385091614e-05,
"loss": 2.4043,
"step": 35600
},
{
"epoch": 0.5,
"learning_rate": 3.754851903894044e-05,
"loss": 2.3885,
"step": 35800
},
{
"epoch": 0.5,
"learning_rate": 3.747895769278927e-05,
"loss": 2.452,
"step": 36000
},
{
"epoch": 0.5,
"learning_rate": 3.740939634663811e-05,
"loss": 2.4303,
"step": 36200
},
{
"epoch": 0.51,
"learning_rate": 3.7339835000486936e-05,
"loss": 2.4219,
"step": 36400
},
{
"epoch": 0.51,
"learning_rate": 3.727027365433576e-05,
"loss": 2.4514,
"step": 36600
},
{
"epoch": 0.51,
"learning_rate": 3.720071230818459e-05,
"loss": 2.3961,
"step": 36800
},
{
"epoch": 0.51,
"learning_rate": 3.7131150962033416e-05,
"loss": 2.3741,
"step": 37000
},
{
"epoch": 0.52,
"learning_rate": 3.7061589615882244e-05,
"loss": 2.4553,
"step": 37200
},
{
"epoch": 0.52,
"learning_rate": 3.699202826973108e-05,
"loss": 2.3806,
"step": 37400
},
{
"epoch": 0.52,
"learning_rate": 3.692246692357991e-05,
"loss": 2.416,
"step": 37600
},
{
"epoch": 0.53,
"learning_rate": 3.685290557742874e-05,
"loss": 2.3745,
"step": 37800
},
{
"epoch": 0.53,
"learning_rate": 3.678334423127757e-05,
"loss": 2.4195,
"step": 38000
},
{
"epoch": 0.53,
"learning_rate": 3.6713782885126395e-05,
"loss": 2.4216,
"step": 38200
},
{
"epoch": 0.53,
"learning_rate": 3.6644221538975224e-05,
"loss": 2.3769,
"step": 38400
},
{
"epoch": 0.54,
"learning_rate": 3.657466019282405e-05,
"loss": 2.3851,
"step": 38600
},
{
"epoch": 0.54,
"learning_rate": 3.650509884667288e-05,
"loss": 2.4112,
"step": 38800
},
{
"epoch": 0.54,
"learning_rate": 3.643553750052171e-05,
"loss": 2.4096,
"step": 39000
},
{
"epoch": 0.55,
"learning_rate": 3.636597615437054e-05,
"loss": 2.4354,
"step": 39200
},
{
"epoch": 0.55,
"learning_rate": 3.629641480821937e-05,
"loss": 2.422,
"step": 39400
},
{
"epoch": 0.55,
"learning_rate": 3.62268534620682e-05,
"loss": 2.4195,
"step": 39600
},
{
"epoch": 0.55,
"learning_rate": 3.6157292115917026e-05,
"loss": 2.4506,
"step": 39800
},
{
"epoch": 0.56,
"learning_rate": 3.608773076976586e-05,
"loss": 2.4016,
"step": 40000
},
{
"epoch": 0.56,
"learning_rate": 3.601816942361469e-05,
"loss": 2.3905,
"step": 40200
},
{
"epoch": 0.56,
"learning_rate": 3.594860807746352e-05,
"loss": 2.4293,
"step": 40400
},
{
"epoch": 0.56,
"learning_rate": 3.587904673131234e-05,
"loss": 2.4237,
"step": 40600
},
{
"epoch": 0.57,
"learning_rate": 3.580948538516117e-05,
"loss": 2.3867,
"step": 40800
},
{
"epoch": 0.57,
"learning_rate": 3.5739924039010006e-05,
"loss": 2.3885,
"step": 41000
},
{
"epoch": 0.57,
"learning_rate": 3.5670362692858835e-05,
"loss": 2.4321,
"step": 41200
},
{
"epoch": 0.58,
"learning_rate": 3.5600801346707664e-05,
"loss": 2.3701,
"step": 41400
},
{
"epoch": 0.58,
"learning_rate": 3.553124000055649e-05,
"loss": 2.4581,
"step": 41600
},
{
"epoch": 0.58,
"learning_rate": 3.546167865440532e-05,
"loss": 2.398,
"step": 41800
},
{
"epoch": 0.58,
"learning_rate": 3.539211730825415e-05,
"loss": 2.4155,
"step": 42000
},
{
"epoch": 0.59,
"learning_rate": 3.532255596210298e-05,
"loss": 2.4122,
"step": 42200
},
{
"epoch": 0.59,
"learning_rate": 3.5252994615951815e-05,
"loss": 2.3833,
"step": 42400
},
{
"epoch": 0.59,
"learning_rate": 3.518343326980064e-05,
"loss": 2.4614,
"step": 42600
},
{
"epoch": 0.6,
"learning_rate": 3.5113871923649466e-05,
"loss": 2.3859,
"step": 42800
},
{
"epoch": 0.6,
"learning_rate": 3.5044310577498295e-05,
"loss": 2.3674,
"step": 43000
},
{
"epoch": 0.6,
"learning_rate": 3.4974749231347124e-05,
"loss": 2.4227,
"step": 43200
},
{
"epoch": 0.6,
"learning_rate": 3.490518788519595e-05,
"loss": 2.4031,
"step": 43400
},
{
"epoch": 0.61,
"learning_rate": 3.483562653904479e-05,
"loss": 2.4271,
"step": 43600
},
{
"epoch": 0.61,
"learning_rate": 3.476606519289362e-05,
"loss": 2.4176,
"step": 43800
},
{
"epoch": 0.61,
"learning_rate": 3.4696503846742446e-05,
"loss": 2.4076,
"step": 44000
},
{
"epoch": 0.61,
"learning_rate": 3.4626942500591275e-05,
"loss": 2.3618,
"step": 44200
},
{
"epoch": 0.62,
"learning_rate": 3.4557381154440104e-05,
"loss": 2.3998,
"step": 44400
},
{
"epoch": 0.62,
"learning_rate": 3.448781980828893e-05,
"loss": 2.4145,
"step": 44600
},
{
"epoch": 0.62,
"learning_rate": 3.441825846213776e-05,
"loss": 2.3621,
"step": 44800
},
{
"epoch": 0.63,
"learning_rate": 3.434869711598659e-05,
"loss": 2.4247,
"step": 45000
},
{
"epoch": 0.63,
"learning_rate": 3.427913576983542e-05,
"loss": 2.3934,
"step": 45200
},
{
"epoch": 0.63,
"learning_rate": 3.420957442368425e-05,
"loss": 2.379,
"step": 45400
},
{
"epoch": 0.63,
"learning_rate": 3.414001307753308e-05,
"loss": 2.3932,
"step": 45600
},
{
"epoch": 0.64,
"learning_rate": 3.4070451731381906e-05,
"loss": 2.3922,
"step": 45800
},
{
"epoch": 0.64,
"learning_rate": 3.400089038523074e-05,
"loss": 2.4221,
"step": 46000
},
{
"epoch": 0.64,
"learning_rate": 3.393132903907957e-05,
"loss": 2.3921,
"step": 46200
},
{
"epoch": 0.65,
"learning_rate": 3.38617676929284e-05,
"loss": 2.3484,
"step": 46400
},
{
"epoch": 0.65,
"learning_rate": 3.379220634677722e-05,
"loss": 2.3812,
"step": 46600
},
{
"epoch": 0.65,
"learning_rate": 3.372264500062605e-05,
"loss": 2.3616,
"step": 46800
},
{
"epoch": 0.65,
"learning_rate": 3.365308365447488e-05,
"loss": 2.388,
"step": 47000
},
{
"epoch": 0.66,
"learning_rate": 3.3583522308323714e-05,
"loss": 2.3102,
"step": 47200
},
{
"epoch": 0.66,
"learning_rate": 3.351396096217254e-05,
"loss": 2.3746,
"step": 47400
},
{
"epoch": 0.66,
"learning_rate": 3.344439961602137e-05,
"loss": 2.4212,
"step": 47600
},
{
"epoch": 0.67,
"learning_rate": 3.33748382698702e-05,
"loss": 2.3558,
"step": 47800
},
{
"epoch": 0.67,
"learning_rate": 3.330527692371903e-05,
"loss": 2.393,
"step": 48000
},
{
"epoch": 0.67,
"learning_rate": 3.323571557756786e-05,
"loss": 2.3798,
"step": 48200
},
{
"epoch": 0.67,
"learning_rate": 3.316615423141669e-05,
"loss": 2.4259,
"step": 48400
},
{
"epoch": 0.68,
"learning_rate": 3.3096592885265516e-05,
"loss": 2.3816,
"step": 48600
},
{
"epoch": 0.68,
"learning_rate": 3.3027031539114345e-05,
"loss": 2.3699,
"step": 48800
},
{
"epoch": 0.68,
"learning_rate": 3.2957470192963174e-05,
"loss": 2.4018,
"step": 49000
},
{
"epoch": 0.68,
"learning_rate": 3.2887908846812e-05,
"loss": 2.3356,
"step": 49200
},
{
"epoch": 0.69,
"learning_rate": 3.281834750066083e-05,
"loss": 2.347,
"step": 49400
},
{
"epoch": 0.69,
"learning_rate": 3.274878615450967e-05,
"loss": 2.3598,
"step": 49600
},
{
"epoch": 0.69,
"learning_rate": 3.2679224808358496e-05,
"loss": 2.3773,
"step": 49800
},
{
"epoch": 0.7,
"learning_rate": 3.2609663462207325e-05,
"loss": 2.3704,
"step": 50000
},
{
"epoch": 0.7,
"learning_rate": 3.2540102116056154e-05,
"loss": 2.3317,
"step": 50200
},
{
"epoch": 0.7,
"learning_rate": 3.247054076990498e-05,
"loss": 2.3878,
"step": 50400
},
{
"epoch": 0.7,
"learning_rate": 3.2400979423753805e-05,
"loss": 2.3198,
"step": 50600
},
{
"epoch": 0.71,
"learning_rate": 3.233141807760264e-05,
"loss": 2.4416,
"step": 50800
},
{
"epoch": 0.71,
"learning_rate": 3.226185673145147e-05,
"loss": 2.3275,
"step": 51000
},
{
"epoch": 0.71,
"learning_rate": 3.21922953853003e-05,
"loss": 2.3858,
"step": 51200
},
{
"epoch": 0.72,
"learning_rate": 3.212273403914913e-05,
"loss": 2.3448,
"step": 51400
},
{
"epoch": 0.72,
"learning_rate": 3.2053172692997956e-05,
"loss": 2.3493,
"step": 51600
},
{
"epoch": 0.72,
"learning_rate": 3.1983611346846785e-05,
"loss": 2.3644,
"step": 51800
},
{
"epoch": 0.72,
"learning_rate": 3.1914050000695614e-05,
"loss": 2.339,
"step": 52000
},
{
"epoch": 0.73,
"learning_rate": 3.184448865454445e-05,
"loss": 2.3393,
"step": 52200
},
{
"epoch": 0.73,
"learning_rate": 3.177492730839327e-05,
"loss": 2.3754,
"step": 52400
},
{
"epoch": 0.73,
"learning_rate": 3.17053659622421e-05,
"loss": 2.3251,
"step": 52600
},
{
"epoch": 0.73,
"learning_rate": 3.163580461609093e-05,
"loss": 2.3407,
"step": 52800
},
{
"epoch": 0.74,
"learning_rate": 3.156624326993976e-05,
"loss": 2.3092,
"step": 53000
},
{
"epoch": 0.74,
"learning_rate": 3.149668192378859e-05,
"loss": 2.3626,
"step": 53200
},
{
"epoch": 0.74,
"learning_rate": 3.142712057763742e-05,
"loss": 2.3639,
"step": 53400
},
{
"epoch": 0.75,
"learning_rate": 3.135755923148625e-05,
"loss": 2.3443,
"step": 53600
},
{
"epoch": 0.75,
"learning_rate": 3.128799788533508e-05,
"loss": 2.3274,
"step": 53800
},
{
"epoch": 0.75,
"learning_rate": 3.121843653918391e-05,
"loss": 2.3728,
"step": 54000
},
{
"epoch": 0.75,
"learning_rate": 3.114887519303274e-05,
"loss": 2.3047,
"step": 54200
},
{
"epoch": 0.76,
"learning_rate": 3.107931384688157e-05,
"loss": 2.316,
"step": 54400
},
{
"epoch": 0.76,
"learning_rate": 3.1009752500730396e-05,
"loss": 2.3326,
"step": 54600
},
{
"epoch": 0.76,
"learning_rate": 3.0940191154579225e-05,
"loss": 2.357,
"step": 54800
},
{
"epoch": 0.77,
"learning_rate": 3.0870629808428053e-05,
"loss": 2.367,
"step": 55000
},
{
"epoch": 0.77,
"learning_rate": 3.080106846227688e-05,
"loss": 2.3529,
"step": 55200
},
{
"epoch": 0.77,
"learning_rate": 3.073150711612571e-05,
"loss": 2.3748,
"step": 55400
},
{
"epoch": 0.77,
"learning_rate": 3.066194576997454e-05,
"loss": 2.3208,
"step": 55600
},
{
"epoch": 0.78,
"learning_rate": 3.0592384423823376e-05,
"loss": 2.3327,
"step": 55800
},
{
"epoch": 0.78,
"learning_rate": 3.0522823077672204e-05,
"loss": 2.3327,
"step": 56000
},
{
"epoch": 0.78,
"learning_rate": 3.0453261731521033e-05,
"loss": 2.3588,
"step": 56200
},
{
"epoch": 0.78,
"learning_rate": 3.0383700385369855e-05,
"loss": 2.3343,
"step": 56400
},
{
"epoch": 0.79,
"learning_rate": 3.0314139039218688e-05,
"loss": 2.3095,
"step": 56600
},
{
"epoch": 0.79,
"learning_rate": 3.0244577693067517e-05,
"loss": 2.3547,
"step": 56800
},
{
"epoch": 0.79,
"learning_rate": 3.0175016346916345e-05,
"loss": 2.3346,
"step": 57000
},
{
"epoch": 0.8,
"learning_rate": 3.0105455000765174e-05,
"loss": 2.289,
"step": 57200
},
{
"epoch": 0.8,
"learning_rate": 3.0035893654614006e-05,
"loss": 2.3577,
"step": 57400
},
{
"epoch": 0.8,
"learning_rate": 2.9966332308462835e-05,
"loss": 2.3592,
"step": 57600
},
{
"epoch": 0.8,
"learning_rate": 2.9896770962311664e-05,
"loss": 2.3258,
"step": 57800
},
{
"epoch": 0.81,
"learning_rate": 2.9827209616160496e-05,
"loss": 2.336,
"step": 58000
},
{
"epoch": 0.81,
"learning_rate": 2.9757648270009325e-05,
"loss": 2.3663,
"step": 58200
},
{
"epoch": 0.81,
"learning_rate": 2.968808692385815e-05,
"loss": 2.3952,
"step": 58400
},
{
"epoch": 0.82,
"learning_rate": 2.961852557770698e-05,
"loss": 2.3366,
"step": 58600
},
{
"epoch": 0.82,
"learning_rate": 2.954896423155581e-05,
"loss": 2.316,
"step": 58800
},
{
"epoch": 0.82,
"learning_rate": 2.9479402885404637e-05,
"loss": 2.3167,
"step": 59000
},
{
"epoch": 0.82,
"learning_rate": 2.940984153925347e-05,
"loss": 2.3853,
"step": 59200
},
{
"epoch": 0.83,
"learning_rate": 2.93402801931023e-05,
"loss": 2.3824,
"step": 59400
},
{
"epoch": 0.83,
"learning_rate": 2.9270718846951127e-05,
"loss": 2.385,
"step": 59600
},
{
"epoch": 0.83,
"learning_rate": 2.920115750079996e-05,
"loss": 2.3916,
"step": 59800
},
{
"epoch": 0.83,
"learning_rate": 2.913159615464879e-05,
"loss": 2.3757,
"step": 60000
},
{
"epoch": 0.84,
"learning_rate": 2.9062034808497617e-05,
"loss": 2.3031,
"step": 60200
},
{
"epoch": 0.84,
"learning_rate": 2.8992473462346443e-05,
"loss": 2.3391,
"step": 60400
},
{
"epoch": 0.84,
"learning_rate": 2.892291211619527e-05,
"loss": 2.3327,
"step": 60600
},
{
"epoch": 0.85,
"learning_rate": 2.88533507700441e-05,
"loss": 2.3553,
"step": 60800
},
{
"epoch": 0.85,
"learning_rate": 2.8783789423892933e-05,
"loss": 2.4071,
"step": 61000
},
{
"epoch": 0.85,
"learning_rate": 2.871422807774176e-05,
"loss": 2.2922,
"step": 61200
},
{
"epoch": 0.85,
"learning_rate": 2.864466673159059e-05,
"loss": 2.2956,
"step": 61400
},
{
"epoch": 0.86,
"learning_rate": 2.8575105385439423e-05,
"loss": 2.3518,
"step": 61600
},
{
"epoch": 0.86,
"learning_rate": 2.850554403928825e-05,
"loss": 2.3378,
"step": 61800
},
{
"epoch": 0.86,
"learning_rate": 2.843598269313708e-05,
"loss": 2.3567,
"step": 62000
},
{
"epoch": 0.87,
"learning_rate": 2.836642134698591e-05,
"loss": 2.3835,
"step": 62200
},
{
"epoch": 0.87,
"learning_rate": 2.8296860000834735e-05,
"loss": 2.3527,
"step": 62400
},
{
"epoch": 0.87,
"learning_rate": 2.8227298654683564e-05,
"loss": 2.3609,
"step": 62600
},
{
"epoch": 0.87,
"learning_rate": 2.8157737308532396e-05,
"loss": 2.3263,
"step": 62800
},
{
"epoch": 0.88,
"learning_rate": 2.8088175962381225e-05,
"loss": 2.3604,
"step": 63000
},
{
"epoch": 0.88,
"learning_rate": 2.8018614616230054e-05,
"loss": 2.371,
"step": 63200
},
{
"epoch": 0.88,
"learning_rate": 2.7949053270078886e-05,
"loss": 2.3555,
"step": 63400
},
{
"epoch": 0.88,
"learning_rate": 2.7879491923927715e-05,
"loss": 2.3573,
"step": 63600
},
{
"epoch": 0.89,
"learning_rate": 2.7809930577776543e-05,
"loss": 2.3151,
"step": 63800
},
{
"epoch": 0.89,
"learning_rate": 2.7740369231625372e-05,
"loss": 2.342,
"step": 64000
},
{
"epoch": 0.89,
"learning_rate": 2.7670807885474205e-05,
"loss": 2.3264,
"step": 64200
},
{
"epoch": 0.9,
"learning_rate": 2.7601246539323027e-05,
"loss": 2.3124,
"step": 64400
},
{
"epoch": 0.9,
"learning_rate": 2.753168519317186e-05,
"loss": 2.3672,
"step": 64600
},
{
"epoch": 0.9,
"learning_rate": 2.7462123847020688e-05,
"loss": 2.3758,
"step": 64800
},
{
"epoch": 0.9,
"learning_rate": 2.7392562500869517e-05,
"loss": 2.3426,
"step": 65000
},
{
"epoch": 0.91,
"learning_rate": 2.732300115471835e-05,
"loss": 2.3127,
"step": 65200
},
{
"epoch": 0.91,
"learning_rate": 2.7253439808567178e-05,
"loss": 2.351,
"step": 65400
},
{
"epoch": 0.91,
"learning_rate": 2.7183878462416007e-05,
"loss": 2.3214,
"step": 65600
},
{
"epoch": 0.92,
"learning_rate": 2.7114317116264835e-05,
"loss": 2.3141,
"step": 65800
},
{
"epoch": 0.92,
"learning_rate": 2.7044755770113668e-05,
"loss": 2.3095,
"step": 66000
},
{
"epoch": 0.92,
"learning_rate": 2.6975194423962497e-05,
"loss": 2.3303,
"step": 66200
},
{
"epoch": 0.92,
"learning_rate": 2.6905633077811322e-05,
"loss": 2.3111,
"step": 66400
},
{
"epoch": 0.93,
"learning_rate": 2.683607173166015e-05,
"loss": 2.2752,
"step": 66600
},
{
"epoch": 0.93,
"learning_rate": 2.676651038550898e-05,
"loss": 2.3245,
"step": 66800
},
{
"epoch": 0.93,
"learning_rate": 2.6696949039357812e-05,
"loss": 2.3506,
"step": 67000
},
{
"epoch": 0.93,
"learning_rate": 2.662738769320664e-05,
"loss": 2.2878,
"step": 67200
},
{
"epoch": 0.94,
"learning_rate": 2.655782634705547e-05,
"loss": 2.2858,
"step": 67400
},
{
"epoch": 0.94,
"learning_rate": 2.64882650009043e-05,
"loss": 2.3803,
"step": 67600
},
{
"epoch": 0.94,
"learning_rate": 2.641870365475313e-05,
"loss": 2.334,
"step": 67800
},
{
"epoch": 0.95,
"learning_rate": 2.634914230860196e-05,
"loss": 2.3088,
"step": 68000
},
{
"epoch": 0.95,
"learning_rate": 2.627958096245079e-05,
"loss": 2.3356,
"step": 68200
},
{
"epoch": 0.95,
"learning_rate": 2.6210019616299614e-05,
"loss": 2.321,
"step": 68400
},
{
"epoch": 0.95,
"learning_rate": 2.6140458270148443e-05,
"loss": 2.3396,
"step": 68600
},
{
"epoch": 0.96,
"learning_rate": 2.607089692399727e-05,
"loss": 2.2976,
"step": 68800
},
{
"epoch": 0.96,
"learning_rate": 2.6001335577846104e-05,
"loss": 2.2884,
"step": 69000
},
{
"epoch": 0.96,
"learning_rate": 2.5931774231694933e-05,
"loss": 2.3099,
"step": 69200
},
{
"epoch": 0.97,
"learning_rate": 2.586221288554376e-05,
"loss": 2.3217,
"step": 69400
},
{
"epoch": 0.97,
"learning_rate": 2.5792651539392594e-05,
"loss": 2.2856,
"step": 69600
},
{
"epoch": 0.97,
"learning_rate": 2.5723090193241423e-05,
"loss": 2.3615,
"step": 69800
},
{
"epoch": 0.97,
"learning_rate": 2.565352884709025e-05,
"loss": 2.2836,
"step": 70000
},
{
"epoch": 0.98,
"learning_rate": 2.5583967500939077e-05,
"loss": 2.3666,
"step": 70200
},
{
"epoch": 0.98,
"learning_rate": 2.5514406154787906e-05,
"loss": 2.3123,
"step": 70400
},
{
"epoch": 0.98,
"learning_rate": 2.5444844808636735e-05,
"loss": 2.3703,
"step": 70600
},
{
"epoch": 0.98,
"learning_rate": 2.5375283462485567e-05,
"loss": 2.3,
"step": 70800
},
{
"epoch": 0.99,
"learning_rate": 2.5305722116334396e-05,
"loss": 2.3252,
"step": 71000
},
{
"epoch": 0.99,
"learning_rate": 2.5236160770183225e-05,
"loss": 2.2764,
"step": 71200
},
{
"epoch": 0.99,
"learning_rate": 2.5166599424032057e-05,
"loss": 2.3298,
"step": 71400
},
{
"epoch": 1.0,
"learning_rate": 2.5097038077880886e-05,
"loss": 2.2929,
"step": 71600
},
{
"epoch": 1.0,
"learning_rate": 2.5027476731729715e-05,
"loss": 2.2647,
"step": 71800
},
{
"epoch": 1.0,
"learning_rate": 2.4957915385578544e-05,
"loss": 2.2967,
"step": 72000
},
{
"epoch": 1.0,
"learning_rate": 2.4888354039427372e-05,
"loss": 2.2854,
"step": 72200
},
{
"epoch": 1.01,
"learning_rate": 2.48187926932762e-05,
"loss": 2.2653,
"step": 72400
},
{
"epoch": 1.01,
"learning_rate": 2.4749231347125034e-05,
"loss": 2.2476,
"step": 72600
},
{
"epoch": 1.01,
"learning_rate": 2.467967000097386e-05,
"loss": 2.2801,
"step": 72800
},
{
"epoch": 1.02,
"learning_rate": 2.4610108654822688e-05,
"loss": 2.2836,
"step": 73000
},
{
"epoch": 1.02,
"learning_rate": 2.454054730867152e-05,
"loss": 2.2847,
"step": 73200
},
{
"epoch": 1.02,
"learning_rate": 2.447098596252035e-05,
"loss": 2.3052,
"step": 73400
},
{
"epoch": 1.02,
"learning_rate": 2.4401424616369178e-05,
"loss": 2.285,
"step": 73600
},
{
"epoch": 1.03,
"learning_rate": 2.4331863270218007e-05,
"loss": 2.22,
"step": 73800
},
{
"epoch": 1.03,
"learning_rate": 2.4262301924066836e-05,
"loss": 2.2589,
"step": 74000
},
{
"epoch": 1.03,
"learning_rate": 2.4192740577915664e-05,
"loss": 2.2705,
"step": 74200
},
{
"epoch": 1.04,
"learning_rate": 2.4123179231764497e-05,
"loss": 2.304,
"step": 74400
},
{
"epoch": 1.04,
"learning_rate": 2.4053617885613325e-05,
"loss": 2.2499,
"step": 74600
},
{
"epoch": 1.04,
"learning_rate": 2.398405653946215e-05,
"loss": 2.3109,
"step": 74800
},
{
"epoch": 1.04,
"learning_rate": 2.3914495193310983e-05,
"loss": 2.2772,
"step": 75000
},
{
"epoch": 1.05,
"learning_rate": 2.3844933847159812e-05,
"loss": 2.2835,
"step": 75200
},
{
"epoch": 1.05,
"learning_rate": 2.377537250100864e-05,
"loss": 2.298,
"step": 75400
},
{
"epoch": 1.05,
"learning_rate": 2.370581115485747e-05,
"loss": 2.2486,
"step": 75600
},
{
"epoch": 1.05,
"learning_rate": 2.36362498087063e-05,
"loss": 2.304,
"step": 75800
},
{
"epoch": 1.06,
"learning_rate": 2.3566688462555127e-05,
"loss": 2.2604,
"step": 76000
},
{
"epoch": 1.06,
"learning_rate": 2.3497127116403956e-05,
"loss": 2.2368,
"step": 76200
},
{
"epoch": 1.06,
"learning_rate": 2.342756577025279e-05,
"loss": 2.2736,
"step": 76400
},
{
"epoch": 1.07,
"learning_rate": 2.3358004424101617e-05,
"loss": 2.3323,
"step": 76600
},
{
"epoch": 1.07,
"learning_rate": 2.3288443077950446e-05,
"loss": 2.2373,
"step": 76800
},
{
"epoch": 1.07,
"learning_rate": 2.3218881731799275e-05,
"loss": 2.3072,
"step": 77000
},
{
"epoch": 1.07,
"learning_rate": 2.3149320385648104e-05,
"loss": 2.2919,
"step": 77200
},
{
"epoch": 1.08,
"learning_rate": 2.3079759039496933e-05,
"loss": 2.2874,
"step": 77400
},
{
"epoch": 1.08,
"learning_rate": 2.3010197693345762e-05,
"loss": 2.2723,
"step": 77600
},
{
"epoch": 1.08,
"learning_rate": 2.294063634719459e-05,
"loss": 2.3047,
"step": 77800
},
{
"epoch": 1.09,
"learning_rate": 2.287107500104342e-05,
"loss": 2.2542,
"step": 78000
},
{
"epoch": 1.09,
"learning_rate": 2.2801513654892252e-05,
"loss": 2.2426,
"step": 78200
},
{
"epoch": 1.09,
"learning_rate": 2.273195230874108e-05,
"loss": 2.2554,
"step": 78400
},
{
"epoch": 1.09,
"learning_rate": 2.266239096258991e-05,
"loss": 2.3179,
"step": 78600
},
{
"epoch": 1.1,
"learning_rate": 2.2592829616438738e-05,
"loss": 2.2934,
"step": 78800
},
{
"epoch": 1.1,
"learning_rate": 2.2523268270287567e-05,
"loss": 2.2707,
"step": 79000
},
{
"epoch": 1.1,
"learning_rate": 2.2453706924136396e-05,
"loss": 2.311,
"step": 79200
},
{
"epoch": 1.1,
"learning_rate": 2.2384145577985228e-05,
"loss": 2.3381,
"step": 79400
},
{
"epoch": 1.11,
"learning_rate": 2.2314584231834054e-05,
"loss": 2.247,
"step": 79600
},
{
"epoch": 1.11,
"learning_rate": 2.2245022885682883e-05,
"loss": 2.3296,
"step": 79800
},
{
"epoch": 1.11,
"learning_rate": 2.2175461539531715e-05,
"loss": 2.2792,
"step": 80000
},
{
"epoch": 1.12,
"learning_rate": 2.2105900193380544e-05,
"loss": 2.3004,
"step": 80200
},
{
"epoch": 1.12,
"learning_rate": 2.2036338847229373e-05,
"loss": 2.2845,
"step": 80400
},
{
"epoch": 1.12,
"learning_rate": 2.19667775010782e-05,
"loss": 2.2623,
"step": 80600
},
{
"epoch": 1.12,
"learning_rate": 2.189721615492703e-05,
"loss": 2.2849,
"step": 80800
},
{
"epoch": 1.13,
"learning_rate": 2.182765480877586e-05,
"loss": 2.231,
"step": 81000
},
{
"epoch": 1.13,
"learning_rate": 2.175809346262469e-05,
"loss": 2.2894,
"step": 81200
},
{
"epoch": 1.13,
"learning_rate": 2.168853211647352e-05,
"loss": 2.2979,
"step": 81400
},
{
"epoch": 1.14,
"learning_rate": 2.1618970770322346e-05,
"loss": 2.2961,
"step": 81600
},
{
"epoch": 1.14,
"learning_rate": 2.1549409424171178e-05,
"loss": 2.2625,
"step": 81800
},
{
"epoch": 1.14,
"learning_rate": 2.1479848078020007e-05,
"loss": 2.2742,
"step": 82000
},
{
"epoch": 1.14,
"learning_rate": 2.1410286731868836e-05,
"loss": 2.2718,
"step": 82200
},
{
"epoch": 1.15,
"learning_rate": 2.1340725385717668e-05,
"loss": 2.2341,
"step": 82400
},
{
"epoch": 1.15,
"learning_rate": 2.1271164039566493e-05,
"loss": 2.2424,
"step": 82600
},
{
"epoch": 1.15,
"learning_rate": 2.1201602693415322e-05,
"loss": 2.2147,
"step": 82800
},
{
"epoch": 1.15,
"learning_rate": 2.1132041347264154e-05,
"loss": 2.3273,
"step": 83000
},
{
"epoch": 1.16,
"learning_rate": 2.1062480001112983e-05,
"loss": 2.2852,
"step": 83200
},
{
"epoch": 1.16,
"learning_rate": 2.0992918654961812e-05,
"loss": 2.301,
"step": 83400
},
{
"epoch": 1.16,
"learning_rate": 2.092335730881064e-05,
"loss": 2.2577,
"step": 83600
},
{
"epoch": 1.17,
"learning_rate": 2.085379596265947e-05,
"loss": 2.3224,
"step": 83800
},
{
"epoch": 1.17,
"learning_rate": 2.07842346165083e-05,
"loss": 2.2808,
"step": 84000
},
{
"epoch": 1.17,
"learning_rate": 2.071467327035713e-05,
"loss": 2.369,
"step": 84200
},
{
"epoch": 1.17,
"learning_rate": 2.064511192420596e-05,
"loss": 2.2545,
"step": 84400
},
{
"epoch": 1.18,
"learning_rate": 2.0575550578054785e-05,
"loss": 2.2866,
"step": 84600
},
{
"epoch": 1.18,
"learning_rate": 2.0505989231903618e-05,
"loss": 2.2575,
"step": 84800
},
{
"epoch": 1.18,
"learning_rate": 2.0436427885752446e-05,
"loss": 2.2734,
"step": 85000
},
{
"epoch": 1.19,
"learning_rate": 2.0366866539601275e-05,
"loss": 2.3246,
"step": 85200
},
{
"epoch": 1.19,
"learning_rate": 2.0297305193450104e-05,
"loss": 2.2856,
"step": 85400
},
{
"epoch": 1.19,
"learning_rate": 2.0227743847298933e-05,
"loss": 2.2153,
"step": 85600
},
{
"epoch": 1.19,
"learning_rate": 2.0158182501147762e-05,
"loss": 2.3101,
"step": 85800
},
{
"epoch": 1.2,
"learning_rate": 2.0088621154996594e-05,
"loss": 2.256,
"step": 86000
},
{
"epoch": 1.2,
"learning_rate": 2.0019059808845423e-05,
"loss": 2.2453,
"step": 86200
},
{
"epoch": 1.2,
"learning_rate": 1.9949498462694252e-05,
"loss": 2.2607,
"step": 86400
},
{
"epoch": 1.2,
"learning_rate": 1.987993711654308e-05,
"loss": 2.2636,
"step": 86600
},
{
"epoch": 1.21,
"learning_rate": 1.981037577039191e-05,
"loss": 2.2273,
"step": 86800
},
{
"epoch": 1.21,
"learning_rate": 1.974081442424074e-05,
"loss": 2.2384,
"step": 87000
},
{
"epoch": 1.21,
"learning_rate": 1.9671253078089567e-05,
"loss": 2.234,
"step": 87200
},
{
"epoch": 1.22,
"learning_rate": 1.96016917319384e-05,
"loss": 2.2819,
"step": 87400
},
{
"epoch": 1.22,
"learning_rate": 1.9532130385787225e-05,
"loss": 2.2695,
"step": 87600
},
{
"epoch": 1.22,
"learning_rate": 1.9462569039636054e-05,
"loss": 2.2836,
"step": 87800
},
{
"epoch": 1.22,
"learning_rate": 1.9393007693484886e-05,
"loss": 2.2486,
"step": 88000
},
{
"epoch": 1.23,
"learning_rate": 1.9323446347333715e-05,
"loss": 2.2448,
"step": 88200
},
{
"epoch": 1.23,
"learning_rate": 1.9253885001182544e-05,
"loss": 2.3028,
"step": 88400
},
{
"epoch": 1.23,
"learning_rate": 1.9184323655031373e-05,
"loss": 2.2855,
"step": 88600
},
{
"epoch": 1.24,
"learning_rate": 1.91147623088802e-05,
"loss": 2.2171,
"step": 88800
},
{
"epoch": 1.24,
"learning_rate": 1.904520096272903e-05,
"loss": 2.2834,
"step": 89000
},
{
"epoch": 1.24,
"learning_rate": 1.8975639616577863e-05,
"loss": 2.2743,
"step": 89200
},
{
"epoch": 1.24,
"learning_rate": 1.890607827042669e-05,
"loss": 2.2437,
"step": 89400
},
{
"epoch": 1.25,
"learning_rate": 1.8836516924275517e-05,
"loss": 2.2791,
"step": 89600
},
{
"epoch": 1.25,
"learning_rate": 1.876695557812435e-05,
"loss": 2.2572,
"step": 89800
},
{
"epoch": 1.25,
"learning_rate": 1.8697394231973178e-05,
"loss": 2.3147,
"step": 90000
},
{
"epoch": 1.25,
"learning_rate": 1.8627832885822007e-05,
"loss": 2.2677,
"step": 90200
},
{
"epoch": 1.26,
"learning_rate": 1.855827153967084e-05,
"loss": 2.2691,
"step": 90400
},
{
"epoch": 1.26,
"learning_rate": 1.8488710193519665e-05,
"loss": 2.2629,
"step": 90600
},
{
"epoch": 1.26,
"learning_rate": 1.8419148847368493e-05,
"loss": 2.2621,
"step": 90800
},
{
"epoch": 1.27,
"learning_rate": 1.8349587501217326e-05,
"loss": 2.2666,
"step": 91000
},
{
"epoch": 1.27,
"learning_rate": 1.8280026155066155e-05,
"loss": 2.2626,
"step": 91200
},
{
"epoch": 1.27,
"learning_rate": 1.8210464808914983e-05,
"loss": 2.2453,
"step": 91400
},
{
"epoch": 1.27,
"learning_rate": 1.8140903462763812e-05,
"loss": 2.2943,
"step": 91600
},
{
"epoch": 1.28,
"learning_rate": 1.807134211661264e-05,
"loss": 2.2731,
"step": 91800
},
{
"epoch": 1.28,
"learning_rate": 1.800178077046147e-05,
"loss": 2.2229,
"step": 92000
},
{
"epoch": 1.28,
"learning_rate": 1.7932219424310302e-05,
"loss": 2.2683,
"step": 92200
},
{
"epoch": 1.29,
"learning_rate": 1.786265807815913e-05,
"loss": 2.2642,
"step": 92400
},
{
"epoch": 1.29,
"learning_rate": 1.7793096732007957e-05,
"loss": 2.2761,
"step": 92600
},
{
"epoch": 1.29,
"learning_rate": 1.772353538585679e-05,
"loss": 2.2218,
"step": 92800
},
{
"epoch": 1.29,
"learning_rate": 1.7653974039705618e-05,
"loss": 2.2866,
"step": 93000
},
{
"epoch": 1.3,
"learning_rate": 1.7584412693554447e-05,
"loss": 2.267,
"step": 93200
},
{
"epoch": 1.3,
"learning_rate": 1.751485134740328e-05,
"loss": 2.2521,
"step": 93400
},
{
"epoch": 1.3,
"learning_rate": 1.7445290001252104e-05,
"loss": 2.2212,
"step": 93600
},
{
"epoch": 1.3,
"learning_rate": 1.7375728655100933e-05,
"loss": 2.2599,
"step": 93800
},
{
"epoch": 1.31,
"learning_rate": 1.7306167308949765e-05,
"loss": 2.2767,
"step": 94000
},
{
"epoch": 1.31,
"learning_rate": 1.7236605962798594e-05,
"loss": 2.2473,
"step": 94200
},
{
"epoch": 1.31,
"learning_rate": 1.716704461664742e-05,
"loss": 2.2549,
"step": 94400
},
{
"epoch": 1.32,
"learning_rate": 1.7097483270496252e-05,
"loss": 2.2694,
"step": 94600
},
{
"epoch": 1.32,
"learning_rate": 1.702792192434508e-05,
"loss": 2.2805,
"step": 94800
},
{
"epoch": 1.32,
"learning_rate": 1.695836057819391e-05,
"loss": 2.2728,
"step": 95000
},
{
"epoch": 1.32,
"learning_rate": 1.688879923204274e-05,
"loss": 2.2649,
"step": 95200
},
{
"epoch": 1.33,
"learning_rate": 1.6819237885891567e-05,
"loss": 2.2667,
"step": 95400
},
{
"epoch": 1.33,
"learning_rate": 1.6749676539740396e-05,
"loss": 2.2807,
"step": 95600
},
{
"epoch": 1.33,
"learning_rate": 1.668011519358923e-05,
"loss": 2.2694,
"step": 95800
},
{
"epoch": 1.34,
"learning_rate": 1.6610553847438057e-05,
"loss": 2.2407,
"step": 96000
},
{
"epoch": 1.34,
"learning_rate": 1.6540992501286886e-05,
"loss": 2.2147,
"step": 96200
},
{
"epoch": 1.34,
"learning_rate": 1.6471431155135715e-05,
"loss": 2.266,
"step": 96400
},
{
"epoch": 1.34,
"learning_rate": 1.6401869808984544e-05,
"loss": 2.2406,
"step": 96600
},
{
"epoch": 1.35,
"learning_rate": 1.6332308462833373e-05,
"loss": 2.2371,
"step": 96800
},
{
"epoch": 1.35,
"learning_rate": 1.62627471166822e-05,
"loss": 2.2532,
"step": 97000
},
{
"epoch": 1.35,
"learning_rate": 1.6193185770531034e-05,
"loss": 2.2243,
"step": 97200
},
{
"epoch": 1.36,
"learning_rate": 1.612362442437986e-05,
"loss": 2.2837,
"step": 97400
},
{
"epoch": 1.36,
"learning_rate": 1.605406307822869e-05,
"loss": 2.2991,
"step": 97600
},
{
"epoch": 1.36,
"learning_rate": 1.598450173207752e-05,
"loss": 2.2166,
"step": 97800
},
{
"epoch": 1.36,
"learning_rate": 1.591494038592635e-05,
"loss": 2.2738,
"step": 98000
},
{
"epoch": 1.37,
"learning_rate": 1.5845379039775178e-05,
"loss": 2.2654,
"step": 98200
},
{
"epoch": 1.37,
"learning_rate": 1.5775817693624007e-05,
"loss": 2.2906,
"step": 98400
},
{
"epoch": 1.37,
"learning_rate": 1.5706256347472836e-05,
"loss": 2.2701,
"step": 98600
},
{
"epoch": 1.37,
"learning_rate": 1.5636695001321665e-05,
"loss": 2.2599,
"step": 98800
},
{
"epoch": 1.38,
"learning_rate": 1.5567133655170497e-05,
"loss": 2.2211,
"step": 99000
},
{
"epoch": 1.38,
"learning_rate": 1.5497572309019326e-05,
"loss": 2.2448,
"step": 99200
},
{
"epoch": 1.38,
"learning_rate": 1.542801096286815e-05,
"loss": 2.2691,
"step": 99400
},
{
"epoch": 1.39,
"learning_rate": 1.5358449616716984e-05,
"loss": 2.2314,
"step": 99600
},
{
"epoch": 1.39,
"learning_rate": 1.5288888270565812e-05,
"loss": 2.2875,
"step": 99800
},
{
"epoch": 1.39,
"learning_rate": 1.5219326924414643e-05,
"loss": 2.2662,
"step": 100000
},
{
"epoch": 1.39,
"learning_rate": 1.5149765578263472e-05,
"loss": 2.2468,
"step": 100200
},
{
"epoch": 1.4,
"learning_rate": 1.5080204232112299e-05,
"loss": 2.2398,
"step": 100400
},
{
"epoch": 1.4,
"learning_rate": 1.501064288596113e-05,
"loss": 2.2485,
"step": 100600
},
{
"epoch": 1.4,
"learning_rate": 1.4941081539809958e-05,
"loss": 2.235,
"step": 100800
},
{
"epoch": 1.41,
"learning_rate": 1.4871520193658789e-05,
"loss": 2.2683,
"step": 101000
},
{
"epoch": 1.41,
"learning_rate": 1.480195884750762e-05,
"loss": 2.2748,
"step": 101200
},
{
"epoch": 1.41,
"learning_rate": 1.4732397501356447e-05,
"loss": 2.2722,
"step": 101400
},
{
"epoch": 1.41,
"learning_rate": 1.4662836155205275e-05,
"loss": 2.256,
"step": 101600
},
{
"epoch": 1.42,
"learning_rate": 1.4593274809054106e-05,
"loss": 2.2949,
"step": 101800
},
{
"epoch": 1.42,
"learning_rate": 1.4523713462902935e-05,
"loss": 2.2466,
"step": 102000
},
{
"epoch": 1.42,
"learning_rate": 1.4454152116751765e-05,
"loss": 2.2626,
"step": 102200
},
{
"epoch": 1.42,
"learning_rate": 1.4384590770600593e-05,
"loss": 2.2383,
"step": 102400
},
{
"epoch": 1.43,
"learning_rate": 1.4315029424449421e-05,
"loss": 2.2195,
"step": 102600
},
{
"epoch": 1.43,
"learning_rate": 1.4245468078298252e-05,
"loss": 2.2826,
"step": 102800
},
{
"epoch": 1.43,
"learning_rate": 1.4175906732147083e-05,
"loss": 2.2879,
"step": 103000
},
{
"epoch": 1.44,
"learning_rate": 1.4106345385995911e-05,
"loss": 2.2546,
"step": 103200
},
{
"epoch": 1.44,
"learning_rate": 1.4036784039844739e-05,
"loss": 2.3098,
"step": 103400
},
{
"epoch": 1.44,
"learning_rate": 1.3967222693693569e-05,
"loss": 2.2196,
"step": 103600
},
{
"epoch": 1.44,
"learning_rate": 1.3897661347542398e-05,
"loss": 2.2716,
"step": 103800
},
{
"epoch": 1.45,
"learning_rate": 1.3828100001391229e-05,
"loss": 2.283,
"step": 104000
},
{
"epoch": 1.45,
"learning_rate": 1.3758538655240057e-05,
"loss": 2.2332,
"step": 104200
},
{
"epoch": 1.45,
"learning_rate": 1.3688977309088885e-05,
"loss": 2.2702,
"step": 104400
},
{
"epoch": 1.46,
"learning_rate": 1.3619415962937715e-05,
"loss": 2.2292,
"step": 104600
},
{
"epoch": 1.46,
"learning_rate": 1.3549854616786544e-05,
"loss": 2.2587,
"step": 104800
},
{
"epoch": 1.46,
"learning_rate": 1.3480293270635375e-05,
"loss": 2.2908,
"step": 105000
},
{
"epoch": 1.46,
"learning_rate": 1.3410731924484205e-05,
"loss": 2.2374,
"step": 105200
},
{
"epoch": 1.47,
"learning_rate": 1.3341170578333032e-05,
"loss": 2.202,
"step": 105400
},
{
"epoch": 1.47,
"learning_rate": 1.3271609232181861e-05,
"loss": 2.2496,
"step": 105600
},
{
"epoch": 1.47,
"learning_rate": 1.3202047886030692e-05,
"loss": 2.2488,
"step": 105800
},
{
"epoch": 1.47,
"learning_rate": 1.313248653987952e-05,
"loss": 2.2002,
"step": 106000
},
{
"epoch": 1.48,
"learning_rate": 1.3062925193728351e-05,
"loss": 2.2189,
"step": 106200
},
{
"epoch": 1.48,
"learning_rate": 1.2993363847577178e-05,
"loss": 2.2373,
"step": 106400
},
{
"epoch": 1.48,
"learning_rate": 1.2923802501426007e-05,
"loss": 2.2146,
"step": 106600
},
{
"epoch": 1.49,
"learning_rate": 1.2854241155274838e-05,
"loss": 2.2442,
"step": 106800
},
{
"epoch": 1.49,
"learning_rate": 1.2784679809123668e-05,
"loss": 2.2113,
"step": 107000
},
{
"epoch": 1.49,
"learning_rate": 1.2715118462972497e-05,
"loss": 2.2127,
"step": 107200
},
{
"epoch": 1.49,
"learning_rate": 1.2645557116821324e-05,
"loss": 2.2608,
"step": 107400
},
{
"epoch": 1.5,
"learning_rate": 1.2575995770670155e-05,
"loss": 2.2913,
"step": 107600
},
{
"epoch": 1.5,
"learning_rate": 1.2506434424518984e-05,
"loss": 2.2128,
"step": 107800
},
{
"epoch": 1.5,
"learning_rate": 1.2436873078367812e-05,
"loss": 2.2995,
"step": 108000
},
{
"epoch": 1.51,
"learning_rate": 1.2367311732216643e-05,
"loss": 2.1982,
"step": 108200
},
{
"epoch": 1.51,
"learning_rate": 1.2297750386065472e-05,
"loss": 2.2296,
"step": 108400
},
{
"epoch": 1.51,
"learning_rate": 1.22281890399143e-05,
"loss": 2.2605,
"step": 108600
},
{
"epoch": 1.51,
"learning_rate": 1.2158627693763131e-05,
"loss": 2.2234,
"step": 108800
},
{
"epoch": 1.52,
"learning_rate": 1.2089066347611958e-05,
"loss": 2.2545,
"step": 109000
},
{
"epoch": 1.52,
"learning_rate": 1.2019505001460789e-05,
"loss": 2.2522,
"step": 109200
},
{
"epoch": 1.52,
"learning_rate": 1.1949943655309618e-05,
"loss": 2.2098,
"step": 109400
},
{
"epoch": 1.52,
"learning_rate": 1.1880382309158447e-05,
"loss": 2.2733,
"step": 109600
},
{
"epoch": 1.53,
"learning_rate": 1.1810820963007277e-05,
"loss": 2.2238,
"step": 109800
},
{
"epoch": 1.53,
"learning_rate": 1.1741259616856106e-05,
"loss": 2.2313,
"step": 110000
},
{
"epoch": 1.53,
"learning_rate": 1.1671698270704935e-05,
"loss": 2.2784,
"step": 110200
},
{
"epoch": 1.54,
"learning_rate": 1.1602136924553766e-05,
"loss": 2.2708,
"step": 110400
},
{
"epoch": 1.54,
"learning_rate": 1.1532575578402593e-05,
"loss": 2.2571,
"step": 110600
},
{
"epoch": 1.54,
"learning_rate": 1.1463014232251423e-05,
"loss": 2.2817,
"step": 110800
},
{
"epoch": 1.54,
"learning_rate": 1.1393452886100252e-05,
"loss": 2.2155,
"step": 111000
},
{
"epoch": 1.55,
"learning_rate": 1.1323891539949081e-05,
"loss": 2.2027,
"step": 111200
},
{
"epoch": 1.55,
"learning_rate": 1.1254330193797912e-05,
"loss": 2.2216,
"step": 111400
},
{
"epoch": 1.55,
"learning_rate": 1.118476884764674e-05,
"loss": 2.2222,
"step": 111600
},
{
"epoch": 1.56,
"learning_rate": 1.111520750149557e-05,
"loss": 2.2636,
"step": 111800
},
{
"epoch": 1.56,
"learning_rate": 1.1045646155344398e-05,
"loss": 2.2968,
"step": 112000
},
{
"epoch": 1.56,
"learning_rate": 1.0976084809193229e-05,
"loss": 2.3037,
"step": 112200
},
{
"epoch": 1.56,
"learning_rate": 1.0906523463042057e-05,
"loss": 2.2548,
"step": 112400
},
{
"epoch": 1.57,
"learning_rate": 1.0836962116890886e-05,
"loss": 2.2708,
"step": 112600
},
{
"epoch": 1.57,
"learning_rate": 1.0767400770739717e-05,
"loss": 2.2342,
"step": 112800
},
{
"epoch": 1.57,
"learning_rate": 1.0697839424588544e-05,
"loss": 2.2659,
"step": 113000
},
{
"epoch": 1.57,
"learning_rate": 1.0628278078437375e-05,
"loss": 2.2394,
"step": 113200
},
{
"epoch": 1.58,
"learning_rate": 1.0558716732286205e-05,
"loss": 2.29,
"step": 113400
},
{
"epoch": 1.58,
"learning_rate": 1.0489155386135032e-05,
"loss": 2.2658,
"step": 113600
},
{
"epoch": 1.58,
"learning_rate": 1.0419594039983863e-05,
"loss": 2.2412,
"step": 113800
},
{
"epoch": 1.59,
"learning_rate": 1.0350032693832692e-05,
"loss": 2.2438,
"step": 114000
},
{
"epoch": 1.59,
"learning_rate": 1.028047134768152e-05,
"loss": 2.2179,
"step": 114200
},
{
"epoch": 1.59,
"learning_rate": 1.0210910001530351e-05,
"loss": 2.2669,
"step": 114400
},
{
"epoch": 1.59,
"learning_rate": 1.014134865537918e-05,
"loss": 2.1845,
"step": 114600
},
{
"epoch": 1.6,
"learning_rate": 1.0071787309228009e-05,
"loss": 2.239,
"step": 114800
},
{
"epoch": 1.6,
"learning_rate": 1.0002225963076838e-05,
"loss": 2.2727,
"step": 115000
},
{
"epoch": 1.6,
"learning_rate": 9.932664616925667e-06,
"loss": 2.2217,
"step": 115200
},
{
"epoch": 1.61,
"learning_rate": 9.863103270774495e-06,
"loss": 2.2662,
"step": 115400
},
{
"epoch": 1.61,
"learning_rate": 9.793541924623326e-06,
"loss": 2.2163,
"step": 115600
},
{
"epoch": 1.61,
"learning_rate": 9.723980578472155e-06,
"loss": 2.242,
"step": 115800
},
{
"epoch": 1.61,
"learning_rate": 9.654419232320984e-06,
"loss": 2.2967,
"step": 116000
},
{
"epoch": 1.62,
"learning_rate": 9.584857886169814e-06,
"loss": 2.2005,
"step": 116200
},
{
"epoch": 1.62,
"learning_rate": 9.515296540018641e-06,
"loss": 2.244,
"step": 116400
},
{
"epoch": 1.62,
"learning_rate": 9.445735193867472e-06,
"loss": 2.2641,
"step": 116600
},
{
"epoch": 1.62,
"learning_rate": 9.376173847716303e-06,
"loss": 2.2491,
"step": 116800
},
{
"epoch": 1.63,
"learning_rate": 9.30661250156513e-06,
"loss": 2.1801,
"step": 117000
},
{
"epoch": 1.63,
"learning_rate": 9.23705115541396e-06,
"loss": 2.2559,
"step": 117200
},
{
"epoch": 1.63,
"learning_rate": 9.167489809262789e-06,
"loss": 2.2147,
"step": 117400
},
{
"epoch": 1.64,
"learning_rate": 9.097928463111618e-06,
"loss": 2.2392,
"step": 117600
},
{
"epoch": 1.64,
"learning_rate": 9.028367116960449e-06,
"loss": 2.2059,
"step": 117800
},
{
"epoch": 1.64,
"learning_rate": 8.958805770809277e-06,
"loss": 2.2391,
"step": 118000
},
{
"epoch": 1.64,
"learning_rate": 8.889244424658106e-06,
"loss": 2.2566,
"step": 118200
},
{
"epoch": 1.65,
"learning_rate": 8.819683078506935e-06,
"loss": 2.1766,
"step": 118400
},
{
"epoch": 1.65,
"learning_rate": 8.750121732355766e-06,
"loss": 2.2256,
"step": 118600
},
{
"epoch": 1.65,
"learning_rate": 8.680560386204594e-06,
"loss": 2.2554,
"step": 118800
},
{
"epoch": 1.66,
"learning_rate": 8.610999040053423e-06,
"loss": 2.2409,
"step": 119000
},
{
"epoch": 1.66,
"learning_rate": 8.541437693902254e-06,
"loss": 2.2004,
"step": 119200
},
{
"epoch": 1.66,
"learning_rate": 8.471876347751081e-06,
"loss": 2.2624,
"step": 119400
},
{
"epoch": 1.66,
"learning_rate": 8.402315001599912e-06,
"loss": 2.2336,
"step": 119600
},
{
"epoch": 1.67,
"learning_rate": 8.33275365544874e-06,
"loss": 2.2002,
"step": 119800
},
{
"epoch": 1.67,
"learning_rate": 8.26319230929757e-06,
"loss": 2.2472,
"step": 120000
},
{
"epoch": 1.67,
"learning_rate": 8.1936309631464e-06,
"loss": 2.2573,
"step": 120200
},
{
"epoch": 1.68,
"learning_rate": 8.124069616995229e-06,
"loss": 2.2176,
"step": 120400
},
{
"epoch": 1.68,
"learning_rate": 8.054508270844058e-06,
"loss": 2.2385,
"step": 120600
},
{
"epoch": 1.68,
"learning_rate": 7.984946924692888e-06,
"loss": 2.2575,
"step": 120800
},
{
"epoch": 1.68,
"learning_rate": 7.915385578541715e-06,
"loss": 2.2158,
"step": 121000
},
{
"epoch": 1.69,
"learning_rate": 7.845824232390546e-06,
"loss": 2.2307,
"step": 121200
},
{
"epoch": 1.69,
"learning_rate": 7.776262886239375e-06,
"loss": 2.2312,
"step": 121400
},
{
"epoch": 1.69,
"learning_rate": 7.706701540088204e-06,
"loss": 2.251,
"step": 121600
},
{
"epoch": 1.69,
"learning_rate": 7.637140193937034e-06,
"loss": 2.2864,
"step": 121800
},
{
"epoch": 1.7,
"learning_rate": 7.567578847785862e-06,
"loss": 2.2243,
"step": 122000
},
{
"epoch": 1.7,
"learning_rate": 7.498017501634693e-06,
"loss": 2.252,
"step": 122200
},
{
"epoch": 1.7,
"learning_rate": 7.428456155483521e-06,
"loss": 2.2009,
"step": 122400
},
{
"epoch": 1.71,
"learning_rate": 7.35889480933235e-06,
"loss": 2.2145,
"step": 122600
},
{
"epoch": 1.71,
"learning_rate": 7.28933346318118e-06,
"loss": 2.2335,
"step": 122800
},
{
"epoch": 1.71,
"learning_rate": 7.219772117030009e-06,
"loss": 2.2516,
"step": 123000
},
{
"epoch": 1.71,
"learning_rate": 7.150210770878839e-06,
"loss": 2.2146,
"step": 123200
},
{
"epoch": 1.72,
"learning_rate": 7.0806494247276675e-06,
"loss": 2.2304,
"step": 123400
},
{
"epoch": 1.72,
"learning_rate": 7.011088078576497e-06,
"loss": 2.1978,
"step": 123600
},
{
"epoch": 1.72,
"learning_rate": 6.941526732425327e-06,
"loss": 2.2488,
"step": 123800
},
{
"epoch": 1.73,
"learning_rate": 6.871965386274155e-06,
"loss": 2.2347,
"step": 124000
},
{
"epoch": 1.73,
"learning_rate": 6.8024040401229855e-06,
"loss": 2.2496,
"step": 124200
},
{
"epoch": 1.73,
"learning_rate": 6.7328426939718135e-06,
"loss": 2.2366,
"step": 124400
},
{
"epoch": 1.73,
"learning_rate": 6.663281347820643e-06,
"loss": 2.1691,
"step": 124600
},
{
"epoch": 1.74,
"learning_rate": 6.593720001669472e-06,
"loss": 2.205,
"step": 124800
},
{
"epoch": 1.74,
"learning_rate": 6.524158655518302e-06,
"loss": 2.2107,
"step": 125000
},
{
"epoch": 1.74,
"learning_rate": 6.4545973093671315e-06,
"loss": 2.2188,
"step": 125200
},
{
"epoch": 1.74,
"learning_rate": 6.38503596321596e-06,
"loss": 2.2381,
"step": 125400
},
{
"epoch": 1.75,
"learning_rate": 6.31547461706479e-06,
"loss": 2.2638,
"step": 125600
},
{
"epoch": 1.75,
"learning_rate": 6.245913270913619e-06,
"loss": 2.2688,
"step": 125800
},
{
"epoch": 1.75,
"learning_rate": 6.176351924762449e-06,
"loss": 2.1799,
"step": 126000
},
{
"epoch": 1.76,
"learning_rate": 6.1067905786112775e-06,
"loss": 2.2184,
"step": 126200
},
{
"epoch": 1.76,
"learning_rate": 6.037229232460106e-06,
"loss": 2.2297,
"step": 126400
},
{
"epoch": 1.76,
"learning_rate": 5.967667886308936e-06,
"loss": 2.2882,
"step": 126600
},
{
"epoch": 1.76,
"learning_rate": 5.898106540157766e-06,
"loss": 2.2388,
"step": 126800
},
{
"epoch": 1.77,
"learning_rate": 5.828545194006595e-06,
"loss": 2.2356,
"step": 127000
},
{
"epoch": 1.77,
"learning_rate": 5.7589838478554234e-06,
"loss": 2.2427,
"step": 127200
},
{
"epoch": 1.77,
"learning_rate": 5.689422501704253e-06,
"loss": 2.2371,
"step": 127400
},
{
"epoch": 1.78,
"learning_rate": 5.619861155553083e-06,
"loss": 2.2639,
"step": 127600
},
{
"epoch": 1.78,
"learning_rate": 5.550299809401912e-06,
"loss": 2.2532,
"step": 127800
},
{
"epoch": 1.78,
"learning_rate": 5.480738463250741e-06,
"loss": 2.2085,
"step": 128000
},
{
"epoch": 1.78,
"learning_rate": 5.41117711709957e-06,
"loss": 2.263,
"step": 128200
},
{
"epoch": 1.79,
"learning_rate": 5.341615770948399e-06,
"loss": 2.1695,
"step": 128400
},
{
"epoch": 1.79,
"learning_rate": 5.272054424797229e-06,
"loss": 2.2055,
"step": 128600
},
{
"epoch": 1.79,
"learning_rate": 5.2024930786460585e-06,
"loss": 2.2349,
"step": 128800
},
{
"epoch": 1.79,
"learning_rate": 5.132931732494887e-06,
"loss": 2.2024,
"step": 129000
},
{
"epoch": 1.8,
"learning_rate": 5.063370386343716e-06,
"loss": 2.2263,
"step": 129200
},
{
"epoch": 1.8,
"learning_rate": 4.993809040192546e-06,
"loss": 2.2084,
"step": 129400
},
{
"epoch": 1.8,
"learning_rate": 4.924247694041376e-06,
"loss": 2.2397,
"step": 129600
},
{
"epoch": 1.81,
"learning_rate": 4.8546863478902045e-06,
"loss": 2.2455,
"step": 129800
},
{
"epoch": 1.81,
"learning_rate": 4.785125001739034e-06,
"loss": 2.2159,
"step": 130000
},
{
"epoch": 1.81,
"learning_rate": 4.715563655587863e-06,
"loss": 2.1978,
"step": 130200
},
{
"epoch": 1.81,
"learning_rate": 4.646002309436692e-06,
"loss": 2.2635,
"step": 130400
},
{
"epoch": 1.82,
"learning_rate": 4.576440963285522e-06,
"loss": 2.2005,
"step": 130600
},
{
"epoch": 1.82,
"learning_rate": 4.506879617134351e-06,
"loss": 2.2388,
"step": 130800
},
{
"epoch": 1.82,
"learning_rate": 4.43731827098318e-06,
"loss": 2.2142,
"step": 131000
},
{
"epoch": 1.83,
"learning_rate": 4.36775692483201e-06,
"loss": 2.2437,
"step": 131200
},
{
"epoch": 1.83,
"learning_rate": 4.298195578680839e-06,
"loss": 2.2269,
"step": 131400
},
{
"epoch": 1.83,
"learning_rate": 4.228634232529668e-06,
"loss": 2.1651,
"step": 131600
},
{
"epoch": 1.83,
"learning_rate": 4.159072886378497e-06,
"loss": 2.2552,
"step": 131800
},
{
"epoch": 1.84,
"learning_rate": 4.089511540227327e-06,
"loss": 2.2328,
"step": 132000
},
{
"epoch": 1.84,
"learning_rate": 4.019950194076156e-06,
"loss": 2.2267,
"step": 132200
},
{
"epoch": 1.84,
"learning_rate": 3.950388847924985e-06,
"loss": 2.138,
"step": 132400
},
{
"epoch": 1.84,
"learning_rate": 3.8808275017738145e-06,
"loss": 2.1773,
"step": 132600
},
{
"epoch": 1.85,
"learning_rate": 3.811266155622644e-06,
"loss": 2.1719,
"step": 132800
},
{
"epoch": 1.85,
"learning_rate": 3.741704809471473e-06,
"loss": 2.2124,
"step": 133000
},
{
"epoch": 1.85,
"learning_rate": 3.6721434633203023e-06,
"loss": 2.2293,
"step": 133200
},
{
"epoch": 1.86,
"learning_rate": 3.6025821171691316e-06,
"loss": 2.1802,
"step": 133400
},
{
"epoch": 1.86,
"learning_rate": 3.533020771017961e-06,
"loss": 2.1823,
"step": 133600
},
{
"epoch": 1.86,
"learning_rate": 3.4634594248667906e-06,
"loss": 2.2114,
"step": 133800
},
{
"epoch": 1.86,
"learning_rate": 3.39389807871562e-06,
"loss": 2.2102,
"step": 134000
},
{
"epoch": 1.87,
"learning_rate": 3.3243367325644487e-06,
"loss": 2.2313,
"step": 134200
},
{
"epoch": 1.87,
"learning_rate": 3.254775386413278e-06,
"loss": 2.2214,
"step": 134400
},
{
"epoch": 1.87,
"learning_rate": 3.1852140402621073e-06,
"loss": 2.2491,
"step": 134600
},
{
"epoch": 1.88,
"learning_rate": 3.1156526941109365e-06,
"loss": 2.1957,
"step": 134800
},
{
"epoch": 1.88,
"learning_rate": 3.046091347959766e-06,
"loss": 2.251,
"step": 135000
},
{
"epoch": 1.88,
"learning_rate": 2.976530001808595e-06,
"loss": 2.1941,
"step": 135200
},
{
"epoch": 1.88,
"learning_rate": 2.9069686556574244e-06,
"loss": 2.231,
"step": 135400
},
{
"epoch": 1.89,
"learning_rate": 2.8374073095062537e-06,
"loss": 2.1893,
"step": 135600
},
{
"epoch": 1.89,
"learning_rate": 2.767845963355083e-06,
"loss": 2.2114,
"step": 135800
},
{
"epoch": 1.89,
"learning_rate": 2.6982846172039122e-06,
"loss": 2.2375,
"step": 136000
},
{
"epoch": 1.89,
"learning_rate": 2.6287232710527415e-06,
"loss": 2.1917,
"step": 136200
},
{
"epoch": 1.9,
"learning_rate": 2.559161924901571e-06,
"loss": 2.2227,
"step": 136400
},
{
"epoch": 1.9,
"learning_rate": 2.4896005787504e-06,
"loss": 2.2534,
"step": 136600
},
{
"epoch": 1.9,
"learning_rate": 2.4200392325992294e-06,
"loss": 2.2529,
"step": 136800
},
{
"epoch": 1.91,
"learning_rate": 2.3504778864480586e-06,
"loss": 2.2046,
"step": 137000
},
{
"epoch": 1.91,
"learning_rate": 2.280916540296888e-06,
"loss": 2.262,
"step": 137200
},
{
"epoch": 1.91,
"learning_rate": 2.211355194145717e-06,
"loss": 2.1998,
"step": 137400
},
{
"epoch": 1.91,
"learning_rate": 2.1417938479945465e-06,
"loss": 2.2093,
"step": 137600
},
{
"epoch": 1.92,
"learning_rate": 2.0722325018433758e-06,
"loss": 2.204,
"step": 137800
},
{
"epoch": 1.92,
"learning_rate": 2.002671155692205e-06,
"loss": 2.234,
"step": 138000
},
{
"epoch": 1.92,
"learning_rate": 1.9331098095410343e-06,
"loss": 2.2064,
"step": 138200
},
{
"epoch": 1.93,
"learning_rate": 1.8635484633898636e-06,
"loss": 2.2288,
"step": 138400
},
{
"epoch": 1.93,
"learning_rate": 1.7939871172386929e-06,
"loss": 2.1994,
"step": 138600
},
{
"epoch": 1.93,
"learning_rate": 1.7244257710875224e-06,
"loss": 2.2235,
"step": 138800
},
{
"epoch": 1.93,
"learning_rate": 1.6548644249363515e-06,
"loss": 2.2251,
"step": 139000
},
{
"epoch": 1.94,
"learning_rate": 1.5853030787851805e-06,
"loss": 2.2068,
"step": 139200
},
{
"epoch": 1.94,
"learning_rate": 1.51574173263401e-06,
"loss": 2.1716,
"step": 139400
},
{
"epoch": 1.94,
"learning_rate": 1.4461803864828393e-06,
"loss": 2.229,
"step": 139600
},
{
"epoch": 1.94,
"learning_rate": 1.3766190403316686e-06,
"loss": 2.2169,
"step": 139800
},
{
"epoch": 1.95,
"learning_rate": 1.3070576941804979e-06,
"loss": 2.1852,
"step": 140000
},
{
"epoch": 1.95,
"learning_rate": 1.2374963480293271e-06,
"loss": 2.2213,
"step": 140200
},
{
"epoch": 1.95,
"learning_rate": 1.1679350018781564e-06,
"loss": 2.2633,
"step": 140400
},
{
"epoch": 1.96,
"learning_rate": 1.0983736557269857e-06,
"loss": 2.1825,
"step": 140600
},
{
"epoch": 1.96,
"learning_rate": 1.028812309575815e-06,
"loss": 2.2269,
"step": 140800
},
{
"epoch": 1.96,
"learning_rate": 9.592509634246443e-07,
"loss": 2.2141,
"step": 141000
},
{
"epoch": 1.96,
"learning_rate": 8.896896172734734e-07,
"loss": 2.2351,
"step": 141200
},
{
"epoch": 1.97,
"learning_rate": 8.201282711223028e-07,
"loss": 2.2102,
"step": 141400
},
{
"epoch": 1.97,
"learning_rate": 7.505669249711321e-07,
"loss": 2.1856,
"step": 141600
},
{
"epoch": 1.97,
"learning_rate": 6.810055788199613e-07,
"loss": 2.2066,
"step": 141800
},
{
"epoch": 1.98,
"learning_rate": 6.114442326687907e-07,
"loss": 2.2295,
"step": 142000
},
{
"epoch": 1.98,
"learning_rate": 5.418828865176198e-07,
"loss": 2.1887,
"step": 142200
},
{
"epoch": 1.98,
"learning_rate": 4.7232154036644923e-07,
"loss": 2.1985,
"step": 142400
},
{
"epoch": 1.98,
"learning_rate": 4.0276019421527846e-07,
"loss": 2.2791,
"step": 142600
},
{
"epoch": 1.99,
"learning_rate": 3.3319884806410774e-07,
"loss": 2.2265,
"step": 142800
},
{
"epoch": 1.99,
"learning_rate": 2.63637501912937e-07,
"loss": 2.2137,
"step": 143000
},
{
"epoch": 1.99,
"learning_rate": 1.940761557617663e-07,
"loss": 2.253,
"step": 143200
},
{
"epoch": 2.0,
"learning_rate": 1.2451480961059558e-07,
"loss": 2.2167,
"step": 143400
},
{
"epoch": 2.0,
"learning_rate": 5.495346345942487e-08,
"loss": 2.2104,
"step": 143600
},
{
"epoch": 2.0,
"step": 143758,
"total_flos": 1.502524253059154e+17,
"train_loss": 2.352252098808129,
"train_runtime": 164976.46,
"train_samples_per_second": 3.486,
"train_steps_per_second": 0.871
}
],
"logging_steps": 200,
"max_steps": 143758,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 1.502524253059154e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}