PetBERT / trainer_state.json
seanfarrell's picture
Updated files with BERT-base backend
c2f47bc
raw
history blame
147 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 625000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9996000000000003e-05,
"loss": 4.9384,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.9992e-05,
"loss": 3.2955,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 1.9988000000000002e-05,
"loss": 2.879,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 1.9984e-05,
"loss": 2.6655,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 1.9980000000000002e-05,
"loss": 2.5291,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 1.9976000000000003e-05,
"loss": 2.4219,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 1.9972e-05,
"loss": 2.3595,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 1.9968e-05,
"loss": 2.2788,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 1.9964e-05,
"loss": 2.239,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 1.9960000000000002e-05,
"loss": 2.1891,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 1.9956000000000003e-05,
"loss": 2.1492,
"step": 5500
},
{
"epoch": 0.05,
"learning_rate": 1.9952e-05,
"loss": 2.1163,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 1.9948e-05,
"loss": 2.086,
"step": 6500
},
{
"epoch": 0.06,
"learning_rate": 1.9944e-05,
"loss": 2.0627,
"step": 7000
},
{
"epoch": 0.06,
"learning_rate": 1.9940000000000002e-05,
"loss": 2.0396,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 1.9936000000000004e-05,
"loss": 2.0061,
"step": 8000
},
{
"epoch": 0.07,
"learning_rate": 1.9932e-05,
"loss": 1.9973,
"step": 8500
},
{
"epoch": 0.07,
"learning_rate": 1.9928e-05,
"loss": 1.9651,
"step": 9000
},
{
"epoch": 0.08,
"learning_rate": 1.9924e-05,
"loss": 1.9509,
"step": 9500
},
{
"epoch": 0.08,
"learning_rate": 1.9920000000000002e-05,
"loss": 1.937,
"step": 10000
},
{
"epoch": 0.08,
"learning_rate": 1.9916e-05,
"loss": 1.9226,
"step": 10500
},
{
"epoch": 0.09,
"learning_rate": 1.9912000000000002e-05,
"loss": 1.9087,
"step": 11000
},
{
"epoch": 0.09,
"learning_rate": 1.9908e-05,
"loss": 1.8888,
"step": 11500
},
{
"epoch": 0.1,
"learning_rate": 1.9904e-05,
"loss": 1.879,
"step": 12000
},
{
"epoch": 0.1,
"learning_rate": 1.9900000000000003e-05,
"loss": 1.8612,
"step": 12500
},
{
"epoch": 0.1,
"learning_rate": 1.9896e-05,
"loss": 1.8466,
"step": 13000
},
{
"epoch": 0.11,
"learning_rate": 1.9892000000000002e-05,
"loss": 1.8376,
"step": 13500
},
{
"epoch": 0.11,
"learning_rate": 1.9888e-05,
"loss": 1.8353,
"step": 14000
},
{
"epoch": 0.12,
"learning_rate": 1.9884e-05,
"loss": 1.8239,
"step": 14500
},
{
"epoch": 0.12,
"learning_rate": 1.9880000000000003e-05,
"loss": 1.8111,
"step": 15000
},
{
"epoch": 0.12,
"learning_rate": 1.9876e-05,
"loss": 1.8014,
"step": 15500
},
{
"epoch": 0.13,
"learning_rate": 1.9872000000000002e-05,
"loss": 1.7967,
"step": 16000
},
{
"epoch": 0.13,
"learning_rate": 1.9868e-05,
"loss": 1.78,
"step": 16500
},
{
"epoch": 0.14,
"learning_rate": 1.9864e-05,
"loss": 1.7791,
"step": 17000
},
{
"epoch": 0.14,
"learning_rate": 1.9860000000000003e-05,
"loss": 1.7657,
"step": 17500
},
{
"epoch": 0.14,
"learning_rate": 1.9856e-05,
"loss": 1.7641,
"step": 18000
},
{
"epoch": 0.15,
"learning_rate": 1.9852000000000002e-05,
"loss": 1.769,
"step": 18500
},
{
"epoch": 0.15,
"learning_rate": 1.9848e-05,
"loss": 1.7516,
"step": 19000
},
{
"epoch": 0.16,
"learning_rate": 1.9844000000000002e-05,
"loss": 1.7521,
"step": 19500
},
{
"epoch": 0.16,
"learning_rate": 1.9840000000000003e-05,
"loss": 1.738,
"step": 20000
},
{
"epoch": 0.16,
"learning_rate": 1.9836e-05,
"loss": 1.7292,
"step": 20500
},
{
"epoch": 0.17,
"learning_rate": 1.9832000000000003e-05,
"loss": 1.7123,
"step": 21000
},
{
"epoch": 0.17,
"learning_rate": 1.9828e-05,
"loss": 1.7123,
"step": 21500
},
{
"epoch": 0.18,
"learning_rate": 1.9824000000000002e-05,
"loss": 1.7095,
"step": 22000
},
{
"epoch": 0.18,
"learning_rate": 1.982e-05,
"loss": 1.71,
"step": 22500
},
{
"epoch": 0.18,
"learning_rate": 1.9816e-05,
"loss": 1.7009,
"step": 23000
},
{
"epoch": 0.19,
"learning_rate": 1.9812000000000003e-05,
"loss": 1.6947,
"step": 23500
},
{
"epoch": 0.19,
"learning_rate": 1.9808e-05,
"loss": 1.6951,
"step": 24000
},
{
"epoch": 0.2,
"learning_rate": 1.9804000000000002e-05,
"loss": 1.6828,
"step": 24500
},
{
"epoch": 0.2,
"learning_rate": 1.98e-05,
"loss": 1.676,
"step": 25000
},
{
"epoch": 0.2,
"learning_rate": 1.9796e-05,
"loss": 1.6734,
"step": 25500
},
{
"epoch": 0.21,
"learning_rate": 1.9792000000000003e-05,
"loss": 1.6642,
"step": 26000
},
{
"epoch": 0.21,
"learning_rate": 1.9788e-05,
"loss": 1.6649,
"step": 26500
},
{
"epoch": 0.22,
"learning_rate": 1.9784000000000002e-05,
"loss": 1.667,
"step": 27000
},
{
"epoch": 0.22,
"learning_rate": 1.978e-05,
"loss": 1.6556,
"step": 27500
},
{
"epoch": 0.22,
"learning_rate": 1.9776000000000002e-05,
"loss": 1.6532,
"step": 28000
},
{
"epoch": 0.23,
"learning_rate": 1.9772000000000003e-05,
"loss": 1.6545,
"step": 28500
},
{
"epoch": 0.23,
"learning_rate": 1.9768e-05,
"loss": 1.6481,
"step": 29000
},
{
"epoch": 0.24,
"learning_rate": 1.9764000000000003e-05,
"loss": 1.647,
"step": 29500
},
{
"epoch": 0.24,
"learning_rate": 1.976e-05,
"loss": 1.6385,
"step": 30000
},
{
"epoch": 0.24,
"learning_rate": 1.9756000000000002e-05,
"loss": 1.6337,
"step": 30500
},
{
"epoch": 0.25,
"learning_rate": 1.9752000000000003e-05,
"loss": 1.6296,
"step": 31000
},
{
"epoch": 0.25,
"learning_rate": 1.9748e-05,
"loss": 1.6339,
"step": 31500
},
{
"epoch": 0.26,
"learning_rate": 1.9744e-05,
"loss": 1.63,
"step": 32000
},
{
"epoch": 0.26,
"learning_rate": 1.974e-05,
"loss": 1.6224,
"step": 32500
},
{
"epoch": 0.26,
"learning_rate": 1.9736000000000002e-05,
"loss": 1.6207,
"step": 33000
},
{
"epoch": 0.27,
"learning_rate": 1.9732000000000004e-05,
"loss": 1.6101,
"step": 33500
},
{
"epoch": 0.27,
"learning_rate": 1.9728e-05,
"loss": 1.6095,
"step": 34000
},
{
"epoch": 0.28,
"learning_rate": 1.9724e-05,
"loss": 1.6029,
"step": 34500
},
{
"epoch": 0.28,
"learning_rate": 1.972e-05,
"loss": 1.6028,
"step": 35000
},
{
"epoch": 0.28,
"learning_rate": 1.9716000000000002e-05,
"loss": 1.5977,
"step": 35500
},
{
"epoch": 0.29,
"learning_rate": 1.9712000000000004e-05,
"loss": 1.5929,
"step": 36000
},
{
"epoch": 0.29,
"learning_rate": 1.9708000000000002e-05,
"loss": 1.5942,
"step": 36500
},
{
"epoch": 0.3,
"learning_rate": 1.9704e-05,
"loss": 1.5908,
"step": 37000
},
{
"epoch": 0.3,
"learning_rate": 1.97e-05,
"loss": 1.5926,
"step": 37500
},
{
"epoch": 0.3,
"learning_rate": 1.9696000000000003e-05,
"loss": 1.5793,
"step": 38000
},
{
"epoch": 0.31,
"learning_rate": 1.9692000000000004e-05,
"loss": 1.5769,
"step": 38500
},
{
"epoch": 0.31,
"learning_rate": 1.9688000000000002e-05,
"loss": 1.5778,
"step": 39000
},
{
"epoch": 0.32,
"learning_rate": 1.9684e-05,
"loss": 1.58,
"step": 39500
},
{
"epoch": 0.32,
"learning_rate": 1.968e-05,
"loss": 1.569,
"step": 40000
},
{
"epoch": 0.32,
"learning_rate": 1.9676000000000003e-05,
"loss": 1.5757,
"step": 40500
},
{
"epoch": 0.33,
"learning_rate": 1.9672e-05,
"loss": 1.5706,
"step": 41000
},
{
"epoch": 0.33,
"learning_rate": 1.9668000000000002e-05,
"loss": 1.5658,
"step": 41500
},
{
"epoch": 0.34,
"learning_rate": 1.9664e-05,
"loss": 1.5606,
"step": 42000
},
{
"epoch": 0.34,
"learning_rate": 1.966e-05,
"loss": 1.5525,
"step": 42500
},
{
"epoch": 0.34,
"learning_rate": 1.9656000000000003e-05,
"loss": 1.5628,
"step": 43000
},
{
"epoch": 0.35,
"learning_rate": 1.9652e-05,
"loss": 1.5532,
"step": 43500
},
{
"epoch": 0.35,
"learning_rate": 1.9648000000000002e-05,
"loss": 1.5476,
"step": 44000
},
{
"epoch": 0.36,
"learning_rate": 1.9644e-05,
"loss": 1.5552,
"step": 44500
},
{
"epoch": 0.36,
"learning_rate": 1.9640000000000002e-05,
"loss": 1.5474,
"step": 45000
},
{
"epoch": 0.36,
"learning_rate": 1.9636000000000003e-05,
"loss": 1.544,
"step": 45500
},
{
"epoch": 0.37,
"learning_rate": 1.9632e-05,
"loss": 1.5491,
"step": 46000
},
{
"epoch": 0.37,
"learning_rate": 1.9628000000000002e-05,
"loss": 1.5405,
"step": 46500
},
{
"epoch": 0.38,
"learning_rate": 1.9624e-05,
"loss": 1.5407,
"step": 47000
},
{
"epoch": 0.38,
"learning_rate": 1.9620000000000002e-05,
"loss": 1.539,
"step": 47500
},
{
"epoch": 0.38,
"learning_rate": 1.9616000000000003e-05,
"loss": 1.5269,
"step": 48000
},
{
"epoch": 0.39,
"learning_rate": 1.9612e-05,
"loss": 1.5229,
"step": 48500
},
{
"epoch": 0.39,
"learning_rate": 1.9608000000000003e-05,
"loss": 1.5364,
"step": 49000
},
{
"epoch": 0.4,
"learning_rate": 1.9604e-05,
"loss": 1.5312,
"step": 49500
},
{
"epoch": 0.4,
"learning_rate": 1.9600000000000002e-05,
"loss": 1.535,
"step": 50000
},
{
"epoch": 0.4,
"learning_rate": 1.9596e-05,
"loss": 1.5234,
"step": 50500
},
{
"epoch": 0.41,
"learning_rate": 1.9592e-05,
"loss": 1.5171,
"step": 51000
},
{
"epoch": 0.41,
"learning_rate": 1.9588000000000003e-05,
"loss": 1.5166,
"step": 51500
},
{
"epoch": 0.42,
"learning_rate": 1.9584e-05,
"loss": 1.5168,
"step": 52000
},
{
"epoch": 0.42,
"learning_rate": 1.9580000000000002e-05,
"loss": 1.5112,
"step": 52500
},
{
"epoch": 0.42,
"learning_rate": 1.9576e-05,
"loss": 1.5218,
"step": 53000
},
{
"epoch": 0.43,
"learning_rate": 1.9572e-05,
"loss": 1.51,
"step": 53500
},
{
"epoch": 0.43,
"learning_rate": 1.9568000000000003e-05,
"loss": 1.5093,
"step": 54000
},
{
"epoch": 0.44,
"learning_rate": 1.9564e-05,
"loss": 1.5038,
"step": 54500
},
{
"epoch": 0.44,
"learning_rate": 1.9560000000000002e-05,
"loss": 1.5069,
"step": 55000
},
{
"epoch": 0.44,
"learning_rate": 1.9556e-05,
"loss": 1.5045,
"step": 55500
},
{
"epoch": 0.45,
"learning_rate": 1.9552000000000002e-05,
"loss": 1.4999,
"step": 56000
},
{
"epoch": 0.45,
"learning_rate": 1.9548000000000003e-05,
"loss": 1.5019,
"step": 56500
},
{
"epoch": 0.46,
"learning_rate": 1.9544e-05,
"loss": 1.5001,
"step": 57000
},
{
"epoch": 0.46,
"learning_rate": 1.9540000000000003e-05,
"loss": 1.5014,
"step": 57500
},
{
"epoch": 0.46,
"learning_rate": 1.9536e-05,
"loss": 1.4977,
"step": 58000
},
{
"epoch": 0.47,
"learning_rate": 1.9532000000000002e-05,
"loss": 1.4924,
"step": 58500
},
{
"epoch": 0.47,
"learning_rate": 1.9528000000000003e-05,
"loss": 1.4853,
"step": 59000
},
{
"epoch": 0.48,
"learning_rate": 1.9524e-05,
"loss": 1.4857,
"step": 59500
},
{
"epoch": 0.48,
"learning_rate": 1.9520000000000003e-05,
"loss": 1.4909,
"step": 60000
},
{
"epoch": 0.48,
"learning_rate": 1.9516e-05,
"loss": 1.4927,
"step": 60500
},
{
"epoch": 0.49,
"learning_rate": 1.9512000000000002e-05,
"loss": 1.4781,
"step": 61000
},
{
"epoch": 0.49,
"learning_rate": 1.9508000000000004e-05,
"loss": 1.4841,
"step": 61500
},
{
"epoch": 0.5,
"learning_rate": 1.9504e-05,
"loss": 1.48,
"step": 62000
},
{
"epoch": 0.5,
"learning_rate": 1.95e-05,
"loss": 1.4897,
"step": 62500
},
{
"epoch": 0.5,
"learning_rate": 1.9496e-05,
"loss": 1.4823,
"step": 63000
},
{
"epoch": 0.51,
"learning_rate": 1.9492000000000002e-05,
"loss": 1.4801,
"step": 63500
},
{
"epoch": 0.51,
"learning_rate": 1.9488000000000004e-05,
"loss": 1.4817,
"step": 64000
},
{
"epoch": 0.52,
"learning_rate": 1.9484000000000002e-05,
"loss": 1.4841,
"step": 64500
},
{
"epoch": 0.52,
"learning_rate": 1.948e-05,
"loss": 1.4823,
"step": 65000
},
{
"epoch": 0.52,
"learning_rate": 1.9476e-05,
"loss": 1.4758,
"step": 65500
},
{
"epoch": 0.53,
"learning_rate": 1.9472000000000003e-05,
"loss": 1.4731,
"step": 66000
},
{
"epoch": 0.53,
"learning_rate": 1.9468000000000004e-05,
"loss": 1.4695,
"step": 66500
},
{
"epoch": 0.54,
"learning_rate": 1.9464000000000002e-05,
"loss": 1.4712,
"step": 67000
},
{
"epoch": 0.54,
"learning_rate": 1.946e-05,
"loss": 1.4635,
"step": 67500
},
{
"epoch": 0.54,
"learning_rate": 1.9456e-05,
"loss": 1.4711,
"step": 68000
},
{
"epoch": 0.55,
"learning_rate": 1.9452000000000003e-05,
"loss": 1.472,
"step": 68500
},
{
"epoch": 0.55,
"learning_rate": 1.9448e-05,
"loss": 1.4643,
"step": 69000
},
{
"epoch": 0.56,
"learning_rate": 1.9444000000000002e-05,
"loss": 1.4622,
"step": 69500
},
{
"epoch": 0.56,
"learning_rate": 1.944e-05,
"loss": 1.4658,
"step": 70000
},
{
"epoch": 0.56,
"learning_rate": 1.9436e-05,
"loss": 1.4688,
"step": 70500
},
{
"epoch": 0.57,
"learning_rate": 1.9432000000000003e-05,
"loss": 1.463,
"step": 71000
},
{
"epoch": 0.57,
"learning_rate": 1.9428e-05,
"loss": 1.4603,
"step": 71500
},
{
"epoch": 0.58,
"learning_rate": 1.9424e-05,
"loss": 1.4553,
"step": 72000
},
{
"epoch": 0.58,
"learning_rate": 1.942e-05,
"loss": 1.4544,
"step": 72500
},
{
"epoch": 0.58,
"learning_rate": 1.9416000000000002e-05,
"loss": 1.4559,
"step": 73000
},
{
"epoch": 0.59,
"learning_rate": 1.9412000000000003e-05,
"loss": 1.4488,
"step": 73500
},
{
"epoch": 0.59,
"learning_rate": 1.9408e-05,
"loss": 1.4493,
"step": 74000
},
{
"epoch": 0.6,
"learning_rate": 1.9404e-05,
"loss": 1.4501,
"step": 74500
},
{
"epoch": 0.6,
"learning_rate": 1.94e-05,
"loss": 1.4479,
"step": 75000
},
{
"epoch": 0.6,
"learning_rate": 1.9396000000000002e-05,
"loss": 1.4461,
"step": 75500
},
{
"epoch": 0.61,
"learning_rate": 1.9392000000000003e-05,
"loss": 1.4441,
"step": 76000
},
{
"epoch": 0.61,
"learning_rate": 1.9388e-05,
"loss": 1.4549,
"step": 76500
},
{
"epoch": 0.62,
"learning_rate": 1.9384e-05,
"loss": 1.4426,
"step": 77000
},
{
"epoch": 0.62,
"learning_rate": 1.938e-05,
"loss": 1.4469,
"step": 77500
},
{
"epoch": 0.62,
"learning_rate": 1.9376000000000002e-05,
"loss": 1.431,
"step": 78000
},
{
"epoch": 0.63,
"learning_rate": 1.9372000000000004e-05,
"loss": 1.4444,
"step": 78500
},
{
"epoch": 0.63,
"learning_rate": 1.9368e-05,
"loss": 1.4401,
"step": 79000
},
{
"epoch": 0.64,
"learning_rate": 1.9364e-05,
"loss": 1.4343,
"step": 79500
},
{
"epoch": 0.64,
"learning_rate": 1.936e-05,
"loss": 1.4358,
"step": 80000
},
{
"epoch": 0.64,
"learning_rate": 1.9356000000000002e-05,
"loss": 1.4324,
"step": 80500
},
{
"epoch": 0.65,
"learning_rate": 1.9352e-05,
"loss": 1.4299,
"step": 81000
},
{
"epoch": 0.65,
"learning_rate": 1.9348000000000002e-05,
"loss": 1.4306,
"step": 81500
},
{
"epoch": 0.66,
"learning_rate": 1.9344e-05,
"loss": 1.4315,
"step": 82000
},
{
"epoch": 0.66,
"learning_rate": 1.934e-05,
"loss": 1.4348,
"step": 82500
},
{
"epoch": 0.66,
"learning_rate": 1.9336000000000003e-05,
"loss": 1.4284,
"step": 83000
},
{
"epoch": 0.67,
"learning_rate": 1.9332e-05,
"loss": 1.4277,
"step": 83500
},
{
"epoch": 0.67,
"learning_rate": 1.9328000000000002e-05,
"loss": 1.4362,
"step": 84000
},
{
"epoch": 0.68,
"learning_rate": 1.9324e-05,
"loss": 1.4311,
"step": 84500
},
{
"epoch": 0.68,
"learning_rate": 1.932e-05,
"loss": 1.4264,
"step": 85000
},
{
"epoch": 0.68,
"learning_rate": 1.9316000000000003e-05,
"loss": 1.4241,
"step": 85500
},
{
"epoch": 0.69,
"learning_rate": 1.9312e-05,
"loss": 1.4169,
"step": 86000
},
{
"epoch": 0.69,
"learning_rate": 1.9308000000000002e-05,
"loss": 1.4226,
"step": 86500
},
{
"epoch": 0.7,
"learning_rate": 1.9304e-05,
"loss": 1.4244,
"step": 87000
},
{
"epoch": 0.7,
"learning_rate": 1.93e-05,
"loss": 1.4194,
"step": 87500
},
{
"epoch": 0.7,
"learning_rate": 1.9296000000000003e-05,
"loss": 1.4198,
"step": 88000
},
{
"epoch": 0.71,
"learning_rate": 1.9292e-05,
"loss": 1.4175,
"step": 88500
},
{
"epoch": 0.71,
"learning_rate": 1.9288000000000002e-05,
"loss": 1.4189,
"step": 89000
},
{
"epoch": 0.72,
"learning_rate": 1.9284e-05,
"loss": 1.4248,
"step": 89500
},
{
"epoch": 0.72,
"learning_rate": 1.9280000000000002e-05,
"loss": 1.4194,
"step": 90000
},
{
"epoch": 0.72,
"learning_rate": 1.9276e-05,
"loss": 1.4118,
"step": 90500
},
{
"epoch": 0.73,
"learning_rate": 1.9272e-05,
"loss": 1.4141,
"step": 91000
},
{
"epoch": 0.73,
"learning_rate": 1.9268000000000003e-05,
"loss": 1.4151,
"step": 91500
},
{
"epoch": 0.74,
"learning_rate": 1.9264e-05,
"loss": 1.4161,
"step": 92000
},
{
"epoch": 0.74,
"learning_rate": 1.9260000000000002e-05,
"loss": 1.412,
"step": 92500
},
{
"epoch": 0.74,
"learning_rate": 1.9256e-05,
"loss": 1.4086,
"step": 93000
},
{
"epoch": 0.75,
"learning_rate": 1.9252e-05,
"loss": 1.4062,
"step": 93500
},
{
"epoch": 0.75,
"learning_rate": 1.9248000000000003e-05,
"loss": 1.4112,
"step": 94000
},
{
"epoch": 0.76,
"learning_rate": 1.9244000000000004e-05,
"loss": 1.399,
"step": 94500
},
{
"epoch": 0.76,
"learning_rate": 1.9240000000000002e-05,
"loss": 1.4006,
"step": 95000
},
{
"epoch": 0.76,
"learning_rate": 1.9236e-05,
"loss": 1.4046,
"step": 95500
},
{
"epoch": 0.77,
"learning_rate": 1.9232e-05,
"loss": 1.4005,
"step": 96000
},
{
"epoch": 0.77,
"learning_rate": 1.9228000000000003e-05,
"loss": 1.4049,
"step": 96500
},
{
"epoch": 0.78,
"learning_rate": 1.9224000000000004e-05,
"loss": 1.4011,
"step": 97000
},
{
"epoch": 0.78,
"learning_rate": 1.9220000000000002e-05,
"loss": 1.3991,
"step": 97500
},
{
"epoch": 0.78,
"learning_rate": 1.9216e-05,
"loss": 1.3992,
"step": 98000
},
{
"epoch": 0.79,
"learning_rate": 1.9212000000000002e-05,
"loss": 1.405,
"step": 98500
},
{
"epoch": 0.79,
"learning_rate": 1.9208000000000003e-05,
"loss": 1.4007,
"step": 99000
},
{
"epoch": 0.8,
"learning_rate": 1.9204e-05,
"loss": 1.3973,
"step": 99500
},
{
"epoch": 0.8,
"learning_rate": 1.9200000000000003e-05,
"loss": 1.3959,
"step": 100000
},
{
"epoch": 0.8,
"learning_rate": 1.9196e-05,
"loss": 1.4014,
"step": 100500
},
{
"epoch": 0.81,
"learning_rate": 1.9192000000000002e-05,
"loss": 1.3928,
"step": 101000
},
{
"epoch": 0.81,
"learning_rate": 1.9188000000000003e-05,
"loss": 1.3946,
"step": 101500
},
{
"epoch": 0.82,
"learning_rate": 1.9184e-05,
"loss": 1.3904,
"step": 102000
},
{
"epoch": 0.82,
"learning_rate": 1.918e-05,
"loss": 1.3904,
"step": 102500
},
{
"epoch": 0.82,
"learning_rate": 1.9176e-05,
"loss": 1.3949,
"step": 103000
},
{
"epoch": 0.83,
"learning_rate": 1.9172000000000002e-05,
"loss": 1.3948,
"step": 103500
},
{
"epoch": 0.83,
"learning_rate": 1.9168000000000004e-05,
"loss": 1.3859,
"step": 104000
},
{
"epoch": 0.84,
"learning_rate": 1.9164e-05,
"loss": 1.3803,
"step": 104500
},
{
"epoch": 0.84,
"learning_rate": 1.916e-05,
"loss": 1.3885,
"step": 105000
},
{
"epoch": 0.84,
"learning_rate": 1.9156e-05,
"loss": 1.3773,
"step": 105500
},
{
"epoch": 0.85,
"learning_rate": 1.9152000000000002e-05,
"loss": 1.3812,
"step": 106000
},
{
"epoch": 0.85,
"learning_rate": 1.9148000000000004e-05,
"loss": 1.3844,
"step": 106500
},
{
"epoch": 0.86,
"learning_rate": 1.9144000000000002e-05,
"loss": 1.386,
"step": 107000
},
{
"epoch": 0.86,
"learning_rate": 1.914e-05,
"loss": 1.381,
"step": 107500
},
{
"epoch": 0.86,
"learning_rate": 1.9136e-05,
"loss": 1.388,
"step": 108000
},
{
"epoch": 0.87,
"learning_rate": 1.9132000000000002e-05,
"loss": 1.3822,
"step": 108500
},
{
"epoch": 0.87,
"learning_rate": 1.9128e-05,
"loss": 1.3875,
"step": 109000
},
{
"epoch": 0.88,
"learning_rate": 1.9124000000000002e-05,
"loss": 1.3869,
"step": 109500
},
{
"epoch": 0.88,
"learning_rate": 1.912e-05,
"loss": 1.3877,
"step": 110000
},
{
"epoch": 0.88,
"learning_rate": 1.9116e-05,
"loss": 1.3814,
"step": 110500
},
{
"epoch": 0.89,
"learning_rate": 1.9112000000000003e-05,
"loss": 1.3852,
"step": 111000
},
{
"epoch": 0.89,
"learning_rate": 1.9108e-05,
"loss": 1.3797,
"step": 111500
},
{
"epoch": 0.9,
"learning_rate": 1.9104000000000002e-05,
"loss": 1.3692,
"step": 112000
},
{
"epoch": 0.9,
"learning_rate": 1.91e-05,
"loss": 1.3773,
"step": 112500
},
{
"epoch": 0.9,
"learning_rate": 1.9096e-05,
"loss": 1.3822,
"step": 113000
},
{
"epoch": 0.91,
"learning_rate": 1.9092000000000003e-05,
"loss": 1.3831,
"step": 113500
},
{
"epoch": 0.91,
"learning_rate": 1.9088e-05,
"loss": 1.3746,
"step": 114000
},
{
"epoch": 0.92,
"learning_rate": 1.9084000000000002e-05,
"loss": 1.3697,
"step": 114500
},
{
"epoch": 0.92,
"learning_rate": 1.908e-05,
"loss": 1.3753,
"step": 115000
},
{
"epoch": 0.92,
"learning_rate": 1.9076e-05,
"loss": 1.3764,
"step": 115500
},
{
"epoch": 0.93,
"learning_rate": 1.9072000000000003e-05,
"loss": 1.3688,
"step": 116000
},
{
"epoch": 0.93,
"learning_rate": 1.9068e-05,
"loss": 1.3679,
"step": 116500
},
{
"epoch": 0.94,
"learning_rate": 1.9064000000000002e-05,
"loss": 1.3676,
"step": 117000
},
{
"epoch": 0.94,
"learning_rate": 1.906e-05,
"loss": 1.3733,
"step": 117500
},
{
"epoch": 0.94,
"learning_rate": 1.9056000000000002e-05,
"loss": 1.3679,
"step": 118000
},
{
"epoch": 0.95,
"learning_rate": 1.9052000000000003e-05,
"loss": 1.3595,
"step": 118500
},
{
"epoch": 0.95,
"learning_rate": 1.9048e-05,
"loss": 1.3723,
"step": 119000
},
{
"epoch": 0.96,
"learning_rate": 1.9044000000000003e-05,
"loss": 1.3695,
"step": 119500
},
{
"epoch": 0.96,
"learning_rate": 1.904e-05,
"loss": 1.368,
"step": 120000
},
{
"epoch": 0.96,
"learning_rate": 1.9036000000000002e-05,
"loss": 1.3656,
"step": 120500
},
{
"epoch": 0.97,
"learning_rate": 1.9032e-05,
"loss": 1.364,
"step": 121000
},
{
"epoch": 0.97,
"learning_rate": 1.9028e-05,
"loss": 1.357,
"step": 121500
},
{
"epoch": 0.98,
"learning_rate": 1.9024000000000003e-05,
"loss": 1.3588,
"step": 122000
},
{
"epoch": 0.98,
"learning_rate": 1.902e-05,
"loss": 1.361,
"step": 122500
},
{
"epoch": 0.98,
"learning_rate": 1.9016000000000002e-05,
"loss": 1.3631,
"step": 123000
},
{
"epoch": 0.99,
"learning_rate": 1.9012e-05,
"loss": 1.3566,
"step": 123500
},
{
"epoch": 0.99,
"learning_rate": 1.9008e-05,
"loss": 1.3568,
"step": 124000
},
{
"epoch": 1.0,
"learning_rate": 1.9004000000000003e-05,
"loss": 1.3615,
"step": 124500
},
{
"epoch": 1.0,
"learning_rate": 1.9e-05,
"loss": 1.3577,
"step": 125000
},
{
"epoch": 1.0,
"eval_loss": 1.2994823455810547,
"eval_runtime": 80.8005,
"eval_samples_per_second": 166.002,
"eval_steps_per_second": 2.599,
"step": 125000
},
{
"epoch": 1.0,
"learning_rate": 1.8996000000000002e-05,
"loss": 1.3472,
"step": 125500
},
{
"epoch": 1.01,
"learning_rate": 1.8992e-05,
"loss": 1.3557,
"step": 126000
},
{
"epoch": 1.01,
"learning_rate": 1.8988000000000002e-05,
"loss": 1.3538,
"step": 126500
},
{
"epoch": 1.02,
"learning_rate": 1.8984000000000003e-05,
"loss": 1.3584,
"step": 127000
},
{
"epoch": 1.02,
"learning_rate": 1.898e-05,
"loss": 1.3484,
"step": 127500
},
{
"epoch": 1.02,
"learning_rate": 1.8976000000000003e-05,
"loss": 1.351,
"step": 128000
},
{
"epoch": 1.03,
"learning_rate": 1.8972e-05,
"loss": 1.3485,
"step": 128500
},
{
"epoch": 1.03,
"learning_rate": 1.8968000000000002e-05,
"loss": 1.3497,
"step": 129000
},
{
"epoch": 1.04,
"learning_rate": 1.8964000000000003e-05,
"loss": 1.3455,
"step": 129500
},
{
"epoch": 1.04,
"learning_rate": 1.896e-05,
"loss": 1.3437,
"step": 130000
},
{
"epoch": 1.04,
"learning_rate": 1.8956e-05,
"loss": 1.3544,
"step": 130500
},
{
"epoch": 1.05,
"learning_rate": 1.8952e-05,
"loss": 1.3606,
"step": 131000
},
{
"epoch": 1.05,
"learning_rate": 1.8948000000000002e-05,
"loss": 1.3514,
"step": 131500
},
{
"epoch": 1.06,
"learning_rate": 1.8944000000000004e-05,
"loss": 1.3377,
"step": 132000
},
{
"epoch": 1.06,
"learning_rate": 1.894e-05,
"loss": 1.3421,
"step": 132500
},
{
"epoch": 1.06,
"learning_rate": 1.8936e-05,
"loss": 1.3454,
"step": 133000
},
{
"epoch": 1.07,
"learning_rate": 1.8932e-05,
"loss": 1.3386,
"step": 133500
},
{
"epoch": 1.07,
"learning_rate": 1.8928000000000002e-05,
"loss": 1.3444,
"step": 134000
},
{
"epoch": 1.08,
"learning_rate": 1.8924000000000004e-05,
"loss": 1.3426,
"step": 134500
},
{
"epoch": 1.08,
"learning_rate": 1.8920000000000002e-05,
"loss": 1.344,
"step": 135000
},
{
"epoch": 1.08,
"learning_rate": 1.8916e-05,
"loss": 1.3443,
"step": 135500
},
{
"epoch": 1.09,
"learning_rate": 1.8912e-05,
"loss": 1.3431,
"step": 136000
},
{
"epoch": 1.09,
"learning_rate": 1.8908000000000003e-05,
"loss": 1.341,
"step": 136500
},
{
"epoch": 1.1,
"learning_rate": 1.8904000000000004e-05,
"loss": 1.3528,
"step": 137000
},
{
"epoch": 1.1,
"learning_rate": 1.8900000000000002e-05,
"loss": 1.3424,
"step": 137500
},
{
"epoch": 1.1,
"learning_rate": 1.8896e-05,
"loss": 1.3506,
"step": 138000
},
{
"epoch": 1.11,
"learning_rate": 1.8892e-05,
"loss": 1.3451,
"step": 138500
},
{
"epoch": 1.11,
"learning_rate": 1.8888000000000003e-05,
"loss": 1.3451,
"step": 139000
},
{
"epoch": 1.12,
"learning_rate": 1.8884e-05,
"loss": 1.3366,
"step": 139500
},
{
"epoch": 1.12,
"learning_rate": 1.8880000000000002e-05,
"loss": 1.34,
"step": 140000
},
{
"epoch": 1.12,
"learning_rate": 1.8876e-05,
"loss": 1.3465,
"step": 140500
},
{
"epoch": 1.13,
"learning_rate": 1.8872e-05,
"loss": 1.3462,
"step": 141000
},
{
"epoch": 1.13,
"learning_rate": 1.8868000000000003e-05,
"loss": 1.3359,
"step": 141500
},
{
"epoch": 1.14,
"learning_rate": 1.8864e-05,
"loss": 1.3431,
"step": 142000
},
{
"epoch": 1.14,
"learning_rate": 1.886e-05,
"loss": 1.337,
"step": 142500
},
{
"epoch": 1.14,
"learning_rate": 1.8856e-05,
"loss": 1.3355,
"step": 143000
},
{
"epoch": 1.15,
"learning_rate": 1.8852000000000002e-05,
"loss": 1.3374,
"step": 143500
},
{
"epoch": 1.15,
"learning_rate": 1.8848000000000003e-05,
"loss": 1.3362,
"step": 144000
},
{
"epoch": 1.16,
"learning_rate": 1.8844e-05,
"loss": 1.334,
"step": 144500
},
{
"epoch": 1.16,
"learning_rate": 1.884e-05,
"loss": 1.3311,
"step": 145000
},
{
"epoch": 1.16,
"learning_rate": 1.8836e-05,
"loss": 1.3281,
"step": 145500
},
{
"epoch": 1.17,
"learning_rate": 1.8832000000000002e-05,
"loss": 1.338,
"step": 146000
},
{
"epoch": 1.17,
"learning_rate": 1.8828000000000003e-05,
"loss": 1.3314,
"step": 146500
},
{
"epoch": 1.18,
"learning_rate": 1.8824e-05,
"loss": 1.3319,
"step": 147000
},
{
"epoch": 1.18,
"learning_rate": 1.882e-05,
"loss": 1.3316,
"step": 147500
},
{
"epoch": 1.18,
"learning_rate": 1.8816e-05,
"loss": 1.329,
"step": 148000
},
{
"epoch": 1.19,
"learning_rate": 1.8812000000000002e-05,
"loss": 1.3322,
"step": 148500
},
{
"epoch": 1.19,
"learning_rate": 1.8808e-05,
"loss": 1.3236,
"step": 149000
},
{
"epoch": 1.2,
"learning_rate": 1.8804e-05,
"loss": 1.3237,
"step": 149500
},
{
"epoch": 1.2,
"learning_rate": 1.88e-05,
"loss": 1.3316,
"step": 150000
},
{
"epoch": 1.2,
"learning_rate": 1.8796e-05,
"loss": 1.3354,
"step": 150500
},
{
"epoch": 1.21,
"learning_rate": 1.8792000000000002e-05,
"loss": 1.3304,
"step": 151000
},
{
"epoch": 1.21,
"learning_rate": 1.8788e-05,
"loss": 1.3295,
"step": 151500
},
{
"epoch": 1.22,
"learning_rate": 1.8784000000000002e-05,
"loss": 1.3275,
"step": 152000
},
{
"epoch": 1.22,
"learning_rate": 1.878e-05,
"loss": 1.3288,
"step": 152500
},
{
"epoch": 1.22,
"learning_rate": 1.8776e-05,
"loss": 1.3341,
"step": 153000
},
{
"epoch": 1.23,
"learning_rate": 1.8772000000000003e-05,
"loss": 1.3203,
"step": 153500
},
{
"epoch": 1.23,
"learning_rate": 1.8768e-05,
"loss": 1.3242,
"step": 154000
},
{
"epoch": 1.24,
"learning_rate": 1.8764000000000002e-05,
"loss": 1.3234,
"step": 154500
},
{
"epoch": 1.24,
"learning_rate": 1.876e-05,
"loss": 1.3264,
"step": 155000
},
{
"epoch": 1.24,
"learning_rate": 1.8756e-05,
"loss": 1.3247,
"step": 155500
},
{
"epoch": 1.25,
"learning_rate": 1.8752000000000003e-05,
"loss": 1.3219,
"step": 156000
},
{
"epoch": 1.25,
"learning_rate": 1.8748e-05,
"loss": 1.3211,
"step": 156500
},
{
"epoch": 1.26,
"learning_rate": 1.8744000000000002e-05,
"loss": 1.3198,
"step": 157000
},
{
"epoch": 1.26,
"learning_rate": 1.8740000000000004e-05,
"loss": 1.3225,
"step": 157500
},
{
"epoch": 1.26,
"learning_rate": 1.8736e-05,
"loss": 1.3175,
"step": 158000
},
{
"epoch": 1.27,
"learning_rate": 1.8732000000000003e-05,
"loss": 1.3248,
"step": 158500
},
{
"epoch": 1.27,
"learning_rate": 1.8728e-05,
"loss": 1.316,
"step": 159000
},
{
"epoch": 1.28,
"learning_rate": 1.8724000000000002e-05,
"loss": 1.3208,
"step": 159500
},
{
"epoch": 1.28,
"learning_rate": 1.8720000000000004e-05,
"loss": 1.3132,
"step": 160000
},
{
"epoch": 1.28,
"learning_rate": 1.8716000000000002e-05,
"loss": 1.3097,
"step": 160500
},
{
"epoch": 1.29,
"learning_rate": 1.8712e-05,
"loss": 1.3181,
"step": 161000
},
{
"epoch": 1.29,
"learning_rate": 1.8708e-05,
"loss": 1.3199,
"step": 161500
},
{
"epoch": 1.3,
"learning_rate": 1.8704000000000003e-05,
"loss": 1.3191,
"step": 162000
},
{
"epoch": 1.3,
"learning_rate": 1.8700000000000004e-05,
"loss": 1.3189,
"step": 162500
},
{
"epoch": 1.3,
"learning_rate": 1.8696000000000002e-05,
"loss": 1.319,
"step": 163000
},
{
"epoch": 1.31,
"learning_rate": 1.8692e-05,
"loss": 1.3203,
"step": 163500
},
{
"epoch": 1.31,
"learning_rate": 1.8688e-05,
"loss": 1.3155,
"step": 164000
},
{
"epoch": 1.32,
"learning_rate": 1.8684000000000003e-05,
"loss": 1.3148,
"step": 164500
},
{
"epoch": 1.32,
"learning_rate": 1.8680000000000004e-05,
"loss": 1.319,
"step": 165000
},
{
"epoch": 1.32,
"learning_rate": 1.8676000000000002e-05,
"loss": 1.3105,
"step": 165500
},
{
"epoch": 1.33,
"learning_rate": 1.8672e-05,
"loss": 1.3117,
"step": 166000
},
{
"epoch": 1.33,
"learning_rate": 1.8668e-05,
"loss": 1.3119,
"step": 166500
},
{
"epoch": 1.34,
"learning_rate": 1.8664000000000003e-05,
"loss": 1.3159,
"step": 167000
},
{
"epoch": 1.34,
"learning_rate": 1.866e-05,
"loss": 1.307,
"step": 167500
},
{
"epoch": 1.34,
"learning_rate": 1.8656000000000002e-05,
"loss": 1.3063,
"step": 168000
},
{
"epoch": 1.35,
"learning_rate": 1.8652e-05,
"loss": 1.3102,
"step": 168500
},
{
"epoch": 1.35,
"learning_rate": 1.8648000000000002e-05,
"loss": 1.3134,
"step": 169000
},
{
"epoch": 1.36,
"learning_rate": 1.8644000000000003e-05,
"loss": 1.314,
"step": 169500
},
{
"epoch": 1.36,
"learning_rate": 1.864e-05,
"loss": 1.3111,
"step": 170000
},
{
"epoch": 1.36,
"learning_rate": 1.8636e-05,
"loss": 1.3047,
"step": 170500
},
{
"epoch": 1.37,
"learning_rate": 1.8632e-05,
"loss": 1.3095,
"step": 171000
},
{
"epoch": 1.37,
"learning_rate": 1.8628000000000002e-05,
"loss": 1.3066,
"step": 171500
},
{
"epoch": 1.38,
"learning_rate": 1.8624000000000003e-05,
"loss": 1.3071,
"step": 172000
},
{
"epoch": 1.38,
"learning_rate": 1.862e-05,
"loss": 1.2985,
"step": 172500
},
{
"epoch": 1.38,
"learning_rate": 1.8616e-05,
"loss": 1.3103,
"step": 173000
},
{
"epoch": 1.39,
"learning_rate": 1.8612e-05,
"loss": 1.3106,
"step": 173500
},
{
"epoch": 1.39,
"learning_rate": 1.8608000000000002e-05,
"loss": 1.3055,
"step": 174000
},
{
"epoch": 1.4,
"learning_rate": 1.8604000000000003e-05,
"loss": 1.3074,
"step": 174500
},
{
"epoch": 1.4,
"learning_rate": 1.86e-05,
"loss": 1.3028,
"step": 175000
},
{
"epoch": 1.4,
"learning_rate": 1.8596e-05,
"loss": 1.3027,
"step": 175500
},
{
"epoch": 1.41,
"learning_rate": 1.8592e-05,
"loss": 1.2988,
"step": 176000
},
{
"epoch": 1.41,
"learning_rate": 1.8588000000000002e-05,
"loss": 1.303,
"step": 176500
},
{
"epoch": 1.42,
"learning_rate": 1.8584000000000004e-05,
"loss": 1.3009,
"step": 177000
},
{
"epoch": 1.42,
"learning_rate": 1.858e-05,
"loss": 1.3023,
"step": 177500
},
{
"epoch": 1.42,
"learning_rate": 1.8576e-05,
"loss": 1.3051,
"step": 178000
},
{
"epoch": 1.43,
"learning_rate": 1.8572e-05,
"loss": 1.3032,
"step": 178500
},
{
"epoch": 1.43,
"learning_rate": 1.8568000000000002e-05,
"loss": 1.3051,
"step": 179000
},
{
"epoch": 1.44,
"learning_rate": 1.8564e-05,
"loss": 1.303,
"step": 179500
},
{
"epoch": 1.44,
"learning_rate": 1.8560000000000002e-05,
"loss": 1.2975,
"step": 180000
},
{
"epoch": 1.44,
"learning_rate": 1.8556e-05,
"loss": 1.2985,
"step": 180500
},
{
"epoch": 1.45,
"learning_rate": 1.8552e-05,
"loss": 1.3044,
"step": 181000
},
{
"epoch": 1.45,
"learning_rate": 1.8548000000000003e-05,
"loss": 1.2983,
"step": 181500
},
{
"epoch": 1.46,
"learning_rate": 1.8544e-05,
"loss": 1.2999,
"step": 182000
},
{
"epoch": 1.46,
"learning_rate": 1.8540000000000002e-05,
"loss": 1.3027,
"step": 182500
},
{
"epoch": 1.46,
"learning_rate": 1.8536e-05,
"loss": 1.3021,
"step": 183000
},
{
"epoch": 1.47,
"learning_rate": 1.8532e-05,
"loss": 1.2962,
"step": 183500
},
{
"epoch": 1.47,
"learning_rate": 1.8528000000000003e-05,
"loss": 1.2929,
"step": 184000
},
{
"epoch": 1.48,
"learning_rate": 1.8524e-05,
"loss": 1.3006,
"step": 184500
},
{
"epoch": 1.48,
"learning_rate": 1.8520000000000002e-05,
"loss": 1.2888,
"step": 185000
},
{
"epoch": 1.48,
"learning_rate": 1.8516e-05,
"loss": 1.2982,
"step": 185500
},
{
"epoch": 1.49,
"learning_rate": 1.8512e-05,
"loss": 1.2911,
"step": 186000
},
{
"epoch": 1.49,
"learning_rate": 1.8508000000000003e-05,
"loss": 1.2942,
"step": 186500
},
{
"epoch": 1.5,
"learning_rate": 1.8504e-05,
"loss": 1.3,
"step": 187000
},
{
"epoch": 1.5,
"learning_rate": 1.8500000000000002e-05,
"loss": 1.2996,
"step": 187500
},
{
"epoch": 1.5,
"learning_rate": 1.8496e-05,
"loss": 1.2964,
"step": 188000
},
{
"epoch": 1.51,
"learning_rate": 1.8492000000000002e-05,
"loss": 1.2862,
"step": 188500
},
{
"epoch": 1.51,
"learning_rate": 1.8488e-05,
"loss": 1.2911,
"step": 189000
},
{
"epoch": 1.52,
"learning_rate": 1.8484e-05,
"loss": 1.295,
"step": 189500
},
{
"epoch": 1.52,
"learning_rate": 1.8480000000000003e-05,
"loss": 1.2934,
"step": 190000
},
{
"epoch": 1.52,
"learning_rate": 1.8476e-05,
"loss": 1.2915,
"step": 190500
},
{
"epoch": 1.53,
"learning_rate": 1.8472000000000002e-05,
"loss": 1.289,
"step": 191000
},
{
"epoch": 1.53,
"learning_rate": 1.8468e-05,
"loss": 1.2858,
"step": 191500
},
{
"epoch": 1.54,
"learning_rate": 1.8464e-05,
"loss": 1.298,
"step": 192000
},
{
"epoch": 1.54,
"learning_rate": 1.8460000000000003e-05,
"loss": 1.2918,
"step": 192500
},
{
"epoch": 1.54,
"learning_rate": 1.8456e-05,
"loss": 1.2922,
"step": 193000
},
{
"epoch": 1.55,
"learning_rate": 1.8452000000000002e-05,
"loss": 1.2901,
"step": 193500
},
{
"epoch": 1.55,
"learning_rate": 1.8448e-05,
"loss": 1.2854,
"step": 194000
},
{
"epoch": 1.56,
"learning_rate": 1.8444e-05,
"loss": 1.2853,
"step": 194500
},
{
"epoch": 1.56,
"learning_rate": 1.8440000000000003e-05,
"loss": 1.2896,
"step": 195000
},
{
"epoch": 1.56,
"learning_rate": 1.8436e-05,
"loss": 1.2873,
"step": 195500
},
{
"epoch": 1.57,
"learning_rate": 1.8432000000000002e-05,
"loss": 1.2856,
"step": 196000
},
{
"epoch": 1.57,
"learning_rate": 1.8428e-05,
"loss": 1.2851,
"step": 196500
},
{
"epoch": 1.58,
"learning_rate": 1.8424000000000002e-05,
"loss": 1.2843,
"step": 197000
},
{
"epoch": 1.58,
"learning_rate": 1.8420000000000003e-05,
"loss": 1.2896,
"step": 197500
},
{
"epoch": 1.58,
"learning_rate": 1.8416e-05,
"loss": 1.2893,
"step": 198000
},
{
"epoch": 1.59,
"learning_rate": 1.8412000000000003e-05,
"loss": 1.2837,
"step": 198500
},
{
"epoch": 1.59,
"learning_rate": 1.8408e-05,
"loss": 1.2922,
"step": 199000
},
{
"epoch": 1.6,
"learning_rate": 1.8404000000000002e-05,
"loss": 1.289,
"step": 199500
},
{
"epoch": 1.6,
"learning_rate": 1.8400000000000003e-05,
"loss": 1.2923,
"step": 200000
},
{
"epoch": 1.6,
"learning_rate": 1.8396e-05,
"loss": 1.2859,
"step": 200500
},
{
"epoch": 1.61,
"learning_rate": 1.8392e-05,
"loss": 1.2819,
"step": 201000
},
{
"epoch": 1.61,
"learning_rate": 1.8388e-05,
"loss": 1.2864,
"step": 201500
},
{
"epoch": 1.62,
"learning_rate": 1.8384000000000002e-05,
"loss": 1.2828,
"step": 202000
},
{
"epoch": 1.62,
"learning_rate": 1.8380000000000004e-05,
"loss": 1.2883,
"step": 202500
},
{
"epoch": 1.62,
"learning_rate": 1.8376e-05,
"loss": 1.2833,
"step": 203000
},
{
"epoch": 1.63,
"learning_rate": 1.8372e-05,
"loss": 1.284,
"step": 203500
},
{
"epoch": 1.63,
"learning_rate": 1.8368e-05,
"loss": 1.2811,
"step": 204000
},
{
"epoch": 1.64,
"learning_rate": 1.8364000000000002e-05,
"loss": 1.2818,
"step": 204500
},
{
"epoch": 1.64,
"learning_rate": 1.8360000000000004e-05,
"loss": 1.279,
"step": 205000
},
{
"epoch": 1.64,
"learning_rate": 1.8356000000000002e-05,
"loss": 1.2823,
"step": 205500
},
{
"epoch": 1.65,
"learning_rate": 1.8352e-05,
"loss": 1.2842,
"step": 206000
},
{
"epoch": 1.65,
"learning_rate": 1.8348e-05,
"loss": 1.2854,
"step": 206500
},
{
"epoch": 1.66,
"learning_rate": 1.8344000000000003e-05,
"loss": 1.2787,
"step": 207000
},
{
"epoch": 1.66,
"learning_rate": 1.834e-05,
"loss": 1.285,
"step": 207500
},
{
"epoch": 1.66,
"learning_rate": 1.8336000000000002e-05,
"loss": 1.2851,
"step": 208000
},
{
"epoch": 1.67,
"learning_rate": 1.8332e-05,
"loss": 1.277,
"step": 208500
},
{
"epoch": 1.67,
"learning_rate": 1.8328e-05,
"loss": 1.2828,
"step": 209000
},
{
"epoch": 1.68,
"learning_rate": 1.8324000000000003e-05,
"loss": 1.2741,
"step": 209500
},
{
"epoch": 1.68,
"learning_rate": 1.832e-05,
"loss": 1.2744,
"step": 210000
},
{
"epoch": 1.68,
"learning_rate": 1.8316e-05,
"loss": 1.2743,
"step": 210500
},
{
"epoch": 1.69,
"learning_rate": 1.8312e-05,
"loss": 1.2722,
"step": 211000
},
{
"epoch": 1.69,
"learning_rate": 1.8308e-05,
"loss": 1.2759,
"step": 211500
},
{
"epoch": 1.7,
"learning_rate": 1.8304000000000003e-05,
"loss": 1.2757,
"step": 212000
},
{
"epoch": 1.7,
"learning_rate": 1.83e-05,
"loss": 1.2806,
"step": 212500
},
{
"epoch": 1.7,
"learning_rate": 1.8296e-05,
"loss": 1.276,
"step": 213000
},
{
"epoch": 1.71,
"learning_rate": 1.8292e-05,
"loss": 1.2771,
"step": 213500
},
{
"epoch": 1.71,
"learning_rate": 1.8288000000000002e-05,
"loss": 1.2759,
"step": 214000
},
{
"epoch": 1.72,
"learning_rate": 1.8284000000000003e-05,
"loss": 1.2676,
"step": 214500
},
{
"epoch": 1.72,
"learning_rate": 1.828e-05,
"loss": 1.2758,
"step": 215000
},
{
"epoch": 1.72,
"learning_rate": 1.8276e-05,
"loss": 1.2667,
"step": 215500
},
{
"epoch": 1.73,
"learning_rate": 1.8272e-05,
"loss": 1.2738,
"step": 216000
},
{
"epoch": 1.73,
"learning_rate": 1.8268000000000002e-05,
"loss": 1.2748,
"step": 216500
},
{
"epoch": 1.74,
"learning_rate": 1.8264000000000003e-05,
"loss": 1.2762,
"step": 217000
},
{
"epoch": 1.74,
"learning_rate": 1.826e-05,
"loss": 1.2707,
"step": 217500
},
{
"epoch": 1.74,
"learning_rate": 1.8256e-05,
"loss": 1.2667,
"step": 218000
},
{
"epoch": 1.75,
"learning_rate": 1.8252e-05,
"loss": 1.2708,
"step": 218500
},
{
"epoch": 1.75,
"learning_rate": 1.8248000000000002e-05,
"loss": 1.2674,
"step": 219000
},
{
"epoch": 1.76,
"learning_rate": 1.8244e-05,
"loss": 1.2753,
"step": 219500
},
{
"epoch": 1.76,
"learning_rate": 1.824e-05,
"loss": 1.2724,
"step": 220000
},
{
"epoch": 1.76,
"learning_rate": 1.8236000000000003e-05,
"loss": 1.274,
"step": 220500
},
{
"epoch": 1.77,
"learning_rate": 1.8232e-05,
"loss": 1.2764,
"step": 221000
},
{
"epoch": 1.77,
"learning_rate": 1.8228000000000002e-05,
"loss": 1.2681,
"step": 221500
},
{
"epoch": 1.78,
"learning_rate": 1.8224e-05,
"loss": 1.2742,
"step": 222000
},
{
"epoch": 1.78,
"learning_rate": 1.8220000000000002e-05,
"loss": 1.2712,
"step": 222500
},
{
"epoch": 1.78,
"learning_rate": 1.8216000000000003e-05,
"loss": 1.2666,
"step": 223000
},
{
"epoch": 1.79,
"learning_rate": 1.8212e-05,
"loss": 1.2695,
"step": 223500
},
{
"epoch": 1.79,
"learning_rate": 1.8208000000000003e-05,
"loss": 1.2698,
"step": 224000
},
{
"epoch": 1.8,
"learning_rate": 1.8204e-05,
"loss": 1.2734,
"step": 224500
},
{
"epoch": 1.8,
"learning_rate": 1.8200000000000002e-05,
"loss": 1.2682,
"step": 225000
},
{
"epoch": 1.8,
"learning_rate": 1.8196000000000003e-05,
"loss": 1.2715,
"step": 225500
},
{
"epoch": 1.81,
"learning_rate": 1.8192e-05,
"loss": 1.2699,
"step": 226000
},
{
"epoch": 1.81,
"learning_rate": 1.8188000000000003e-05,
"loss": 1.2718,
"step": 226500
},
{
"epoch": 1.82,
"learning_rate": 1.8184e-05,
"loss": 1.2621,
"step": 227000
},
{
"epoch": 1.82,
"learning_rate": 1.8180000000000002e-05,
"loss": 1.2642,
"step": 227500
},
{
"epoch": 1.82,
"learning_rate": 1.8176000000000004e-05,
"loss": 1.2685,
"step": 228000
},
{
"epoch": 1.83,
"learning_rate": 1.8172e-05,
"loss": 1.2645,
"step": 228500
},
{
"epoch": 1.83,
"learning_rate": 1.8168e-05,
"loss": 1.2697,
"step": 229000
},
{
"epoch": 1.84,
"learning_rate": 1.8164e-05,
"loss": 1.2671,
"step": 229500
},
{
"epoch": 1.84,
"learning_rate": 1.8160000000000002e-05,
"loss": 1.2652,
"step": 230000
},
{
"epoch": 1.84,
"learning_rate": 1.8156000000000004e-05,
"loss": 1.2592,
"step": 230500
},
{
"epoch": 1.85,
"learning_rate": 1.8152000000000002e-05,
"loss": 1.2699,
"step": 231000
},
{
"epoch": 1.85,
"learning_rate": 1.8148e-05,
"loss": 1.2673,
"step": 231500
},
{
"epoch": 1.86,
"learning_rate": 1.8144e-05,
"loss": 1.27,
"step": 232000
},
{
"epoch": 1.86,
"learning_rate": 1.8140000000000003e-05,
"loss": 1.2673,
"step": 232500
},
{
"epoch": 1.86,
"learning_rate": 1.8136000000000004e-05,
"loss": 1.266,
"step": 233000
},
{
"epoch": 1.87,
"learning_rate": 1.8132000000000002e-05,
"loss": 1.2697,
"step": 233500
},
{
"epoch": 1.87,
"learning_rate": 1.8128e-05,
"loss": 1.2592,
"step": 234000
},
{
"epoch": 1.88,
"learning_rate": 1.8124e-05,
"loss": 1.2657,
"step": 234500
},
{
"epoch": 1.88,
"learning_rate": 1.8120000000000003e-05,
"loss": 1.2608,
"step": 235000
},
{
"epoch": 1.88,
"learning_rate": 1.8116000000000004e-05,
"loss": 1.2617,
"step": 235500
},
{
"epoch": 1.89,
"learning_rate": 1.8112000000000002e-05,
"loss": 1.2647,
"step": 236000
},
{
"epoch": 1.89,
"learning_rate": 1.8108e-05,
"loss": 1.2623,
"step": 236500
},
{
"epoch": 1.9,
"learning_rate": 1.8104e-05,
"loss": 1.2593,
"step": 237000
},
{
"epoch": 1.9,
"learning_rate": 1.8100000000000003e-05,
"loss": 1.258,
"step": 237500
},
{
"epoch": 1.9,
"learning_rate": 1.8096e-05,
"loss": 1.2627,
"step": 238000
},
{
"epoch": 1.91,
"learning_rate": 1.8092000000000002e-05,
"loss": 1.2666,
"step": 238500
},
{
"epoch": 1.91,
"learning_rate": 1.8088e-05,
"loss": 1.2642,
"step": 239000
},
{
"epoch": 1.92,
"learning_rate": 1.8084e-05,
"loss": 1.2583,
"step": 239500
},
{
"epoch": 1.92,
"learning_rate": 1.8080000000000003e-05,
"loss": 1.2588,
"step": 240000
},
{
"epoch": 1.92,
"learning_rate": 1.8076e-05,
"loss": 1.2676,
"step": 240500
},
{
"epoch": 1.93,
"learning_rate": 1.8072e-05,
"loss": 1.2564,
"step": 241000
},
{
"epoch": 1.93,
"learning_rate": 1.8068e-05,
"loss": 1.2554,
"step": 241500
},
{
"epoch": 1.94,
"learning_rate": 1.8064000000000002e-05,
"loss": 1.2543,
"step": 242000
},
{
"epoch": 1.94,
"learning_rate": 1.8060000000000003e-05,
"loss": 1.265,
"step": 242500
},
{
"epoch": 1.94,
"learning_rate": 1.8056e-05,
"loss": 1.2575,
"step": 243000
},
{
"epoch": 1.95,
"learning_rate": 1.8052e-05,
"loss": 1.2511,
"step": 243500
},
{
"epoch": 1.95,
"learning_rate": 1.8048e-05,
"loss": 1.2581,
"step": 244000
},
{
"epoch": 1.96,
"learning_rate": 1.8044000000000002e-05,
"loss": 1.257,
"step": 244500
},
{
"epoch": 1.96,
"learning_rate": 1.8040000000000003e-05,
"loss": 1.2639,
"step": 245000
},
{
"epoch": 1.96,
"learning_rate": 1.8036e-05,
"loss": 1.2565,
"step": 245500
},
{
"epoch": 1.97,
"learning_rate": 1.8032e-05,
"loss": 1.2567,
"step": 246000
},
{
"epoch": 1.97,
"learning_rate": 1.8028e-05,
"loss": 1.2469,
"step": 246500
},
{
"epoch": 1.98,
"learning_rate": 1.8024000000000002e-05,
"loss": 1.2606,
"step": 247000
},
{
"epoch": 1.98,
"learning_rate": 1.802e-05,
"loss": 1.2514,
"step": 247500
},
{
"epoch": 1.98,
"learning_rate": 1.8016e-05,
"loss": 1.2505,
"step": 248000
},
{
"epoch": 1.99,
"learning_rate": 1.8012e-05,
"loss": 1.2537,
"step": 248500
},
{
"epoch": 1.99,
"learning_rate": 1.8008e-05,
"loss": 1.252,
"step": 249000
},
{
"epoch": 2.0,
"learning_rate": 1.8004000000000002e-05,
"loss": 1.2528,
"step": 249500
},
{
"epoch": 2.0,
"learning_rate": 1.8e-05,
"loss": 1.256,
"step": 250000
},
{
"epoch": 2.0,
"eval_loss": 1.2035317420959473,
"eval_runtime": 81.1073,
"eval_samples_per_second": 165.373,
"eval_steps_per_second": 2.589,
"step": 250000
},
{
"epoch": 2.0,
"learning_rate": 1.7996000000000002e-05,
"loss": 1.246,
"step": 250500
},
{
"epoch": 2.01,
"learning_rate": 1.7992e-05,
"loss": 1.2571,
"step": 251000
},
{
"epoch": 2.01,
"learning_rate": 1.7988e-05,
"loss": 1.2529,
"step": 251500
},
{
"epoch": 2.02,
"learning_rate": 1.7984000000000003e-05,
"loss": 1.2474,
"step": 252000
},
{
"epoch": 2.02,
"learning_rate": 1.798e-05,
"loss": 1.2493,
"step": 252500
},
{
"epoch": 2.02,
"learning_rate": 1.7976000000000002e-05,
"loss": 1.2492,
"step": 253000
},
{
"epoch": 2.03,
"learning_rate": 1.7972e-05,
"loss": 1.2522,
"step": 253500
},
{
"epoch": 2.03,
"learning_rate": 1.7968e-05,
"loss": 1.2494,
"step": 254000
},
{
"epoch": 2.04,
"learning_rate": 1.7964000000000003e-05,
"loss": 1.2426,
"step": 254500
},
{
"epoch": 2.04,
"learning_rate": 1.796e-05,
"loss": 1.2522,
"step": 255000
},
{
"epoch": 2.04,
"learning_rate": 1.7956000000000002e-05,
"loss": 1.253,
"step": 255500
},
{
"epoch": 2.05,
"learning_rate": 1.7952e-05,
"loss": 1.2517,
"step": 256000
},
{
"epoch": 2.05,
"learning_rate": 1.7948e-05,
"loss": 1.2489,
"step": 256500
},
{
"epoch": 2.06,
"learning_rate": 1.7944000000000003e-05,
"loss": 1.2497,
"step": 257000
},
{
"epoch": 2.06,
"learning_rate": 1.794e-05,
"loss": 1.2469,
"step": 257500
},
{
"epoch": 2.06,
"learning_rate": 1.7936000000000002e-05,
"loss": 1.2405,
"step": 258000
},
{
"epoch": 2.07,
"learning_rate": 1.7932e-05,
"loss": 1.2477,
"step": 258500
},
{
"epoch": 2.07,
"learning_rate": 1.7928000000000002e-05,
"loss": 1.2427,
"step": 259000
},
{
"epoch": 2.08,
"learning_rate": 1.7924e-05,
"loss": 1.2446,
"step": 259500
},
{
"epoch": 2.08,
"learning_rate": 1.792e-05,
"loss": 1.2422,
"step": 260000
},
{
"epoch": 2.08,
"learning_rate": 1.7916000000000003e-05,
"loss": 1.2452,
"step": 260500
},
{
"epoch": 2.09,
"learning_rate": 1.7912e-05,
"loss": 1.2532,
"step": 261000
},
{
"epoch": 2.09,
"learning_rate": 1.7908000000000002e-05,
"loss": 1.2431,
"step": 261500
},
{
"epoch": 2.1,
"learning_rate": 1.7904e-05,
"loss": 1.2471,
"step": 262000
},
{
"epoch": 2.1,
"learning_rate": 1.79e-05,
"loss": 1.2446,
"step": 262500
},
{
"epoch": 2.1,
"learning_rate": 1.7896000000000003e-05,
"loss": 1.2495,
"step": 263000
},
{
"epoch": 2.11,
"learning_rate": 1.7892e-05,
"loss": 1.2468,
"step": 263500
},
{
"epoch": 2.11,
"learning_rate": 1.7888000000000002e-05,
"loss": 1.2495,
"step": 264000
},
{
"epoch": 2.12,
"learning_rate": 1.7884e-05,
"loss": 1.2519,
"step": 264500
},
{
"epoch": 2.12,
"learning_rate": 1.788e-05,
"loss": 1.2434,
"step": 265000
},
{
"epoch": 2.12,
"learning_rate": 1.7876000000000003e-05,
"loss": 1.2411,
"step": 265500
},
{
"epoch": 2.13,
"learning_rate": 1.7872e-05,
"loss": 1.235,
"step": 266000
},
{
"epoch": 2.13,
"learning_rate": 1.7868000000000002e-05,
"loss": 1.2424,
"step": 266500
},
{
"epoch": 2.14,
"learning_rate": 1.7864e-05,
"loss": 1.2456,
"step": 267000
},
{
"epoch": 2.14,
"learning_rate": 1.7860000000000002e-05,
"loss": 1.2521,
"step": 267500
},
{
"epoch": 2.14,
"learning_rate": 1.7856000000000003e-05,
"loss": 1.2368,
"step": 268000
},
{
"epoch": 2.15,
"learning_rate": 1.7852e-05,
"loss": 1.2382,
"step": 268500
},
{
"epoch": 2.15,
"learning_rate": 1.7848e-05,
"loss": 1.2447,
"step": 269000
},
{
"epoch": 2.16,
"learning_rate": 1.7844e-05,
"loss": 1.2445,
"step": 269500
},
{
"epoch": 2.16,
"learning_rate": 1.7840000000000002e-05,
"loss": 1.2429,
"step": 270000
},
{
"epoch": 2.16,
"learning_rate": 1.7836000000000003e-05,
"loss": 1.2416,
"step": 270500
},
{
"epoch": 2.17,
"learning_rate": 1.7832e-05,
"loss": 1.2376,
"step": 271000
},
{
"epoch": 2.17,
"learning_rate": 1.7828e-05,
"loss": 1.2317,
"step": 271500
},
{
"epoch": 2.18,
"learning_rate": 1.7824e-05,
"loss": 1.2303,
"step": 272000
},
{
"epoch": 2.18,
"learning_rate": 1.7820000000000002e-05,
"loss": 1.24,
"step": 272500
},
{
"epoch": 2.18,
"learning_rate": 1.7816000000000004e-05,
"loss": 1.2421,
"step": 273000
},
{
"epoch": 2.19,
"learning_rate": 1.7812e-05,
"loss": 1.2416,
"step": 273500
},
{
"epoch": 2.19,
"learning_rate": 1.7808e-05,
"loss": 1.2398,
"step": 274000
},
{
"epoch": 2.2,
"learning_rate": 1.7804e-05,
"loss": 1.2393,
"step": 274500
},
{
"epoch": 2.2,
"learning_rate": 1.7800000000000002e-05,
"loss": 1.2455,
"step": 275000
},
{
"epoch": 2.2,
"learning_rate": 1.7796000000000004e-05,
"loss": 1.2321,
"step": 275500
},
{
"epoch": 2.21,
"learning_rate": 1.7792000000000002e-05,
"loss": 1.2326,
"step": 276000
},
{
"epoch": 2.21,
"learning_rate": 1.7788e-05,
"loss": 1.2381,
"step": 276500
},
{
"epoch": 2.22,
"learning_rate": 1.7784e-05,
"loss": 1.2398,
"step": 277000
},
{
"epoch": 2.22,
"learning_rate": 1.7780000000000003e-05,
"loss": 1.2367,
"step": 277500
},
{
"epoch": 2.22,
"learning_rate": 1.7776e-05,
"loss": 1.2357,
"step": 278000
},
{
"epoch": 2.23,
"learning_rate": 1.7772000000000002e-05,
"loss": 1.2369,
"step": 278500
},
{
"epoch": 2.23,
"learning_rate": 1.7768e-05,
"loss": 1.2395,
"step": 279000
},
{
"epoch": 2.24,
"learning_rate": 1.7764e-05,
"loss": 1.2347,
"step": 279500
},
{
"epoch": 2.24,
"learning_rate": 1.7760000000000003e-05,
"loss": 1.2322,
"step": 280000
},
{
"epoch": 2.24,
"learning_rate": 1.7756e-05,
"loss": 1.2393,
"step": 280500
},
{
"epoch": 2.25,
"learning_rate": 1.7752e-05,
"loss": 1.2349,
"step": 281000
},
{
"epoch": 2.25,
"learning_rate": 1.7748e-05,
"loss": 1.2363,
"step": 281500
},
{
"epoch": 2.26,
"learning_rate": 1.7744e-05,
"loss": 1.2316,
"step": 282000
},
{
"epoch": 2.26,
"learning_rate": 1.7740000000000003e-05,
"loss": 1.237,
"step": 282500
},
{
"epoch": 2.26,
"learning_rate": 1.7736e-05,
"loss": 1.2345,
"step": 283000
},
{
"epoch": 2.27,
"learning_rate": 1.7732000000000002e-05,
"loss": 1.2369,
"step": 283500
},
{
"epoch": 2.27,
"learning_rate": 1.7728e-05,
"loss": 1.2333,
"step": 284000
},
{
"epoch": 2.28,
"learning_rate": 1.7724000000000002e-05,
"loss": 1.2326,
"step": 284500
},
{
"epoch": 2.28,
"learning_rate": 1.7720000000000003e-05,
"loss": 1.2256,
"step": 285000
},
{
"epoch": 2.28,
"learning_rate": 1.7716e-05,
"loss": 1.2268,
"step": 285500
},
{
"epoch": 2.29,
"learning_rate": 1.7712000000000003e-05,
"loss": 1.2354,
"step": 286000
},
{
"epoch": 2.29,
"learning_rate": 1.7708e-05,
"loss": 1.2298,
"step": 286500
},
{
"epoch": 2.3,
"learning_rate": 1.7704000000000002e-05,
"loss": 1.2292,
"step": 287000
},
{
"epoch": 2.3,
"learning_rate": 1.77e-05,
"loss": 1.2328,
"step": 287500
},
{
"epoch": 2.3,
"learning_rate": 1.7696e-05,
"loss": 1.2332,
"step": 288000
},
{
"epoch": 2.31,
"learning_rate": 1.7692000000000003e-05,
"loss": 1.2337,
"step": 288500
},
{
"epoch": 2.31,
"learning_rate": 1.7688e-05,
"loss": 1.2337,
"step": 289000
},
{
"epoch": 2.32,
"learning_rate": 1.7684000000000002e-05,
"loss": 1.2338,
"step": 289500
},
{
"epoch": 2.32,
"learning_rate": 1.768e-05,
"loss": 1.2373,
"step": 290000
},
{
"epoch": 2.32,
"learning_rate": 1.7676e-05,
"loss": 1.2335,
"step": 290500
},
{
"epoch": 2.33,
"learning_rate": 1.7672000000000003e-05,
"loss": 1.2338,
"step": 291000
},
{
"epoch": 2.33,
"learning_rate": 1.7668e-05,
"loss": 1.2246,
"step": 291500
},
{
"epoch": 2.34,
"learning_rate": 1.7664000000000002e-05,
"loss": 1.2349,
"step": 292000
},
{
"epoch": 2.34,
"learning_rate": 1.766e-05,
"loss": 1.2303,
"step": 292500
},
{
"epoch": 2.34,
"learning_rate": 1.7656000000000002e-05,
"loss": 1.227,
"step": 293000
},
{
"epoch": 2.35,
"learning_rate": 1.7652000000000003e-05,
"loss": 1.2304,
"step": 293500
},
{
"epoch": 2.35,
"learning_rate": 1.7648e-05,
"loss": 1.2341,
"step": 294000
},
{
"epoch": 2.36,
"learning_rate": 1.7644000000000003e-05,
"loss": 1.2397,
"step": 294500
},
{
"epoch": 2.36,
"learning_rate": 1.764e-05,
"loss": 1.2389,
"step": 295000
},
{
"epoch": 2.36,
"learning_rate": 1.7636000000000002e-05,
"loss": 1.2336,
"step": 295500
},
{
"epoch": 2.37,
"learning_rate": 1.7632000000000003e-05,
"loss": 1.2246,
"step": 296000
},
{
"epoch": 2.37,
"learning_rate": 1.7628e-05,
"loss": 1.2282,
"step": 296500
},
{
"epoch": 2.38,
"learning_rate": 1.7624000000000003e-05,
"loss": 1.222,
"step": 297000
},
{
"epoch": 2.38,
"learning_rate": 1.762e-05,
"loss": 1.2253,
"step": 297500
},
{
"epoch": 2.38,
"learning_rate": 1.7616000000000002e-05,
"loss": 1.234,
"step": 298000
},
{
"epoch": 2.39,
"learning_rate": 1.7612000000000003e-05,
"loss": 1.2259,
"step": 298500
},
{
"epoch": 2.39,
"learning_rate": 1.7608e-05,
"loss": 1.2297,
"step": 299000
},
{
"epoch": 2.4,
"learning_rate": 1.7604e-05,
"loss": 1.226,
"step": 299500
},
{
"epoch": 2.4,
"learning_rate": 1.76e-05,
"loss": 1.2278,
"step": 300000
},
{
"epoch": 2.4,
"learning_rate": 1.7596000000000002e-05,
"loss": 1.2291,
"step": 300500
},
{
"epoch": 2.41,
"learning_rate": 1.7592000000000004e-05,
"loss": 1.2291,
"step": 301000
},
{
"epoch": 2.41,
"learning_rate": 1.7588e-05,
"loss": 1.2228,
"step": 301500
},
{
"epoch": 2.42,
"learning_rate": 1.7584e-05,
"loss": 1.2252,
"step": 302000
},
{
"epoch": 2.42,
"learning_rate": 1.758e-05,
"loss": 1.2322,
"step": 302500
},
{
"epoch": 2.42,
"learning_rate": 1.7576000000000002e-05,
"loss": 1.2248,
"step": 303000
},
{
"epoch": 2.43,
"learning_rate": 1.7572000000000004e-05,
"loss": 1.2278,
"step": 303500
},
{
"epoch": 2.43,
"learning_rate": 1.7568000000000002e-05,
"loss": 1.2142,
"step": 304000
},
{
"epoch": 2.44,
"learning_rate": 1.7564e-05,
"loss": 1.2294,
"step": 304500
},
{
"epoch": 2.44,
"learning_rate": 1.756e-05,
"loss": 1.2247,
"step": 305000
},
{
"epoch": 2.44,
"learning_rate": 1.7556000000000003e-05,
"loss": 1.2266,
"step": 305500
},
{
"epoch": 2.45,
"learning_rate": 1.7552e-05,
"loss": 1.2299,
"step": 306000
},
{
"epoch": 2.45,
"learning_rate": 1.7548000000000002e-05,
"loss": 1.2284,
"step": 306500
},
{
"epoch": 2.46,
"learning_rate": 1.7544e-05,
"loss": 1.2205,
"step": 307000
},
{
"epoch": 2.46,
"learning_rate": 1.754e-05,
"loss": 1.2305,
"step": 307500
},
{
"epoch": 2.46,
"learning_rate": 1.7536000000000003e-05,
"loss": 1.2268,
"step": 308000
},
{
"epoch": 2.47,
"learning_rate": 1.7532e-05,
"loss": 1.226,
"step": 308500
},
{
"epoch": 2.47,
"learning_rate": 1.7528e-05,
"loss": 1.2249,
"step": 309000
},
{
"epoch": 2.48,
"learning_rate": 1.7524e-05,
"loss": 1.2264,
"step": 309500
},
{
"epoch": 2.48,
"learning_rate": 1.752e-05,
"loss": 1.2265,
"step": 310000
},
{
"epoch": 2.48,
"learning_rate": 1.7516000000000003e-05,
"loss": 1.2197,
"step": 310500
},
{
"epoch": 2.49,
"learning_rate": 1.7512e-05,
"loss": 1.2224,
"step": 311000
},
{
"epoch": 2.49,
"learning_rate": 1.7508e-05,
"loss": 1.2167,
"step": 311500
},
{
"epoch": 2.5,
"learning_rate": 1.7504e-05,
"loss": 1.2284,
"step": 312000
},
{
"epoch": 2.5,
"learning_rate": 1.7500000000000002e-05,
"loss": 1.2235,
"step": 312500
},
{
"epoch": 2.5,
"learning_rate": 1.7496000000000003e-05,
"loss": 1.2218,
"step": 313000
},
{
"epoch": 2.51,
"learning_rate": 1.7492e-05,
"loss": 1.215,
"step": 313500
},
{
"epoch": 2.51,
"learning_rate": 1.7488e-05,
"loss": 1.2217,
"step": 314000
},
{
"epoch": 2.52,
"learning_rate": 1.7484e-05,
"loss": 1.2207,
"step": 314500
},
{
"epoch": 2.52,
"learning_rate": 1.7480000000000002e-05,
"loss": 1.219,
"step": 315000
},
{
"epoch": 2.52,
"learning_rate": 1.7476000000000003e-05,
"loss": 1.225,
"step": 315500
},
{
"epoch": 2.53,
"learning_rate": 1.7472e-05,
"loss": 1.2214,
"step": 316000
},
{
"epoch": 2.53,
"learning_rate": 1.7468e-05,
"loss": 1.218,
"step": 316500
},
{
"epoch": 2.54,
"learning_rate": 1.7464e-05,
"loss": 1.2182,
"step": 317000
},
{
"epoch": 2.54,
"learning_rate": 1.7460000000000002e-05,
"loss": 1.2143,
"step": 317500
},
{
"epoch": 2.54,
"learning_rate": 1.7456e-05,
"loss": 1.219,
"step": 318000
},
{
"epoch": 2.55,
"learning_rate": 1.7452e-05,
"loss": 1.2204,
"step": 318500
},
{
"epoch": 2.55,
"learning_rate": 1.7448e-05,
"loss": 1.2143,
"step": 319000
},
{
"epoch": 2.56,
"learning_rate": 1.7444e-05,
"loss": 1.2188,
"step": 319500
},
{
"epoch": 2.56,
"learning_rate": 1.7440000000000002e-05,
"loss": 1.226,
"step": 320000
},
{
"epoch": 2.56,
"learning_rate": 1.7436e-05,
"loss": 1.2146,
"step": 320500
},
{
"epoch": 2.57,
"learning_rate": 1.7432000000000002e-05,
"loss": 1.2195,
"step": 321000
},
{
"epoch": 2.57,
"learning_rate": 1.7428e-05,
"loss": 1.2196,
"step": 321500
},
{
"epoch": 2.58,
"learning_rate": 1.7424e-05,
"loss": 1.2138,
"step": 322000
},
{
"epoch": 2.58,
"learning_rate": 1.7420000000000003e-05,
"loss": 1.2272,
"step": 322500
},
{
"epoch": 2.58,
"learning_rate": 1.7416e-05,
"loss": 1.221,
"step": 323000
},
{
"epoch": 2.59,
"learning_rate": 1.7412000000000002e-05,
"loss": 1.2135,
"step": 323500
},
{
"epoch": 2.59,
"learning_rate": 1.7408e-05,
"loss": 1.2085,
"step": 324000
},
{
"epoch": 2.6,
"learning_rate": 1.7404e-05,
"loss": 1.2114,
"step": 324500
},
{
"epoch": 2.6,
"learning_rate": 1.7400000000000003e-05,
"loss": 1.21,
"step": 325000
},
{
"epoch": 2.6,
"learning_rate": 1.7396e-05,
"loss": 1.213,
"step": 325500
},
{
"epoch": 2.61,
"learning_rate": 1.7392000000000002e-05,
"loss": 1.2166,
"step": 326000
},
{
"epoch": 2.61,
"learning_rate": 1.7388e-05,
"loss": 1.2193,
"step": 326500
},
{
"epoch": 2.62,
"learning_rate": 1.7384e-05,
"loss": 1.2197,
"step": 327000
},
{
"epoch": 2.62,
"learning_rate": 1.7380000000000003e-05,
"loss": 1.2207,
"step": 327500
},
{
"epoch": 2.62,
"learning_rate": 1.7376e-05,
"loss": 1.2155,
"step": 328000
},
{
"epoch": 2.63,
"learning_rate": 1.7372000000000002e-05,
"loss": 1.2246,
"step": 328500
},
{
"epoch": 2.63,
"learning_rate": 1.7368e-05,
"loss": 1.2119,
"step": 329000
},
{
"epoch": 2.64,
"learning_rate": 1.7364000000000002e-05,
"loss": 1.2119,
"step": 329500
},
{
"epoch": 2.64,
"learning_rate": 1.736e-05,
"loss": 1.2175,
"step": 330000
},
{
"epoch": 2.64,
"learning_rate": 1.7356e-05,
"loss": 1.2194,
"step": 330500
},
{
"epoch": 2.65,
"learning_rate": 1.7352000000000003e-05,
"loss": 1.2144,
"step": 331000
},
{
"epoch": 2.65,
"learning_rate": 1.7348e-05,
"loss": 1.2142,
"step": 331500
},
{
"epoch": 2.66,
"learning_rate": 1.7344000000000002e-05,
"loss": 1.2139,
"step": 332000
},
{
"epoch": 2.66,
"learning_rate": 1.734e-05,
"loss": 1.2169,
"step": 332500
},
{
"epoch": 2.66,
"learning_rate": 1.7336e-05,
"loss": 1.2131,
"step": 333000
},
{
"epoch": 2.67,
"learning_rate": 1.7332000000000003e-05,
"loss": 1.2145,
"step": 333500
},
{
"epoch": 2.67,
"learning_rate": 1.7328e-05,
"loss": 1.2125,
"step": 334000
},
{
"epoch": 2.68,
"learning_rate": 1.7324000000000002e-05,
"loss": 1.218,
"step": 334500
},
{
"epoch": 2.68,
"learning_rate": 1.732e-05,
"loss": 1.2163,
"step": 335000
},
{
"epoch": 2.68,
"learning_rate": 1.7316e-05,
"loss": 1.2136,
"step": 335500
},
{
"epoch": 2.69,
"learning_rate": 1.7312000000000003e-05,
"loss": 1.2045,
"step": 336000
},
{
"epoch": 2.69,
"learning_rate": 1.7308e-05,
"loss": 1.2145,
"step": 336500
},
{
"epoch": 2.7,
"learning_rate": 1.7304000000000002e-05,
"loss": 1.2123,
"step": 337000
},
{
"epoch": 2.7,
"learning_rate": 1.73e-05,
"loss": 1.2135,
"step": 337500
},
{
"epoch": 2.7,
"learning_rate": 1.7296000000000002e-05,
"loss": 1.2174,
"step": 338000
},
{
"epoch": 2.71,
"learning_rate": 1.7292000000000003e-05,
"loss": 1.2096,
"step": 338500
},
{
"epoch": 2.71,
"learning_rate": 1.7288e-05,
"loss": 1.2167,
"step": 339000
},
{
"epoch": 2.72,
"learning_rate": 1.7284e-05,
"loss": 1.2181,
"step": 339500
},
{
"epoch": 2.72,
"learning_rate": 1.728e-05,
"loss": 1.2163,
"step": 340000
},
{
"epoch": 2.72,
"learning_rate": 1.7276000000000002e-05,
"loss": 1.2114,
"step": 340500
},
{
"epoch": 2.73,
"learning_rate": 1.7272000000000003e-05,
"loss": 1.2157,
"step": 341000
},
{
"epoch": 2.73,
"learning_rate": 1.7268e-05,
"loss": 1.2043,
"step": 341500
},
{
"epoch": 2.74,
"learning_rate": 1.7264e-05,
"loss": 1.2088,
"step": 342000
},
{
"epoch": 2.74,
"learning_rate": 1.726e-05,
"loss": 1.2137,
"step": 342500
},
{
"epoch": 2.74,
"learning_rate": 1.7256000000000002e-05,
"loss": 1.2033,
"step": 343000
},
{
"epoch": 2.75,
"learning_rate": 1.7252000000000004e-05,
"loss": 1.2114,
"step": 343500
},
{
"epoch": 2.75,
"learning_rate": 1.7248e-05,
"loss": 1.206,
"step": 344000
},
{
"epoch": 2.76,
"learning_rate": 1.7244e-05,
"loss": 1.2084,
"step": 344500
},
{
"epoch": 2.76,
"learning_rate": 1.724e-05,
"loss": 1.2118,
"step": 345000
},
{
"epoch": 2.76,
"learning_rate": 1.7236000000000002e-05,
"loss": 1.2053,
"step": 345500
},
{
"epoch": 2.77,
"learning_rate": 1.7232000000000004e-05,
"loss": 1.2034,
"step": 346000
},
{
"epoch": 2.77,
"learning_rate": 1.7228000000000002e-05,
"loss": 1.2034,
"step": 346500
},
{
"epoch": 2.78,
"learning_rate": 1.7224e-05,
"loss": 1.2055,
"step": 347000
},
{
"epoch": 2.78,
"learning_rate": 1.722e-05,
"loss": 1.2068,
"step": 347500
},
{
"epoch": 2.78,
"learning_rate": 1.7216000000000003e-05,
"loss": 1.2056,
"step": 348000
},
{
"epoch": 2.79,
"learning_rate": 1.7212e-05,
"loss": 1.2027,
"step": 348500
},
{
"epoch": 2.79,
"learning_rate": 1.7208000000000002e-05,
"loss": 1.2045,
"step": 349000
},
{
"epoch": 2.8,
"learning_rate": 1.7204e-05,
"loss": 1.2044,
"step": 349500
},
{
"epoch": 2.8,
"learning_rate": 1.72e-05,
"loss": 1.2073,
"step": 350000
},
{
"epoch": 2.8,
"learning_rate": 1.7196000000000003e-05,
"loss": 1.2113,
"step": 350500
},
{
"epoch": 2.81,
"learning_rate": 1.7192e-05,
"loss": 1.2081,
"step": 351000
},
{
"epoch": 2.81,
"learning_rate": 1.7188000000000002e-05,
"loss": 1.2076,
"step": 351500
},
{
"epoch": 2.82,
"learning_rate": 1.7184e-05,
"loss": 1.2116,
"step": 352000
},
{
"epoch": 2.82,
"learning_rate": 1.718e-05,
"loss": 1.209,
"step": 352500
},
{
"epoch": 2.82,
"learning_rate": 1.7176000000000003e-05,
"loss": 1.2089,
"step": 353000
},
{
"epoch": 2.83,
"learning_rate": 1.7172e-05,
"loss": 1.2073,
"step": 353500
},
{
"epoch": 2.83,
"learning_rate": 1.7168000000000002e-05,
"loss": 1.2057,
"step": 354000
},
{
"epoch": 2.84,
"learning_rate": 1.7164e-05,
"loss": 1.2046,
"step": 354500
},
{
"epoch": 2.84,
"learning_rate": 1.7160000000000002e-05,
"loss": 1.2061,
"step": 355000
},
{
"epoch": 2.84,
"learning_rate": 1.7156000000000003e-05,
"loss": 1.2079,
"step": 355500
},
{
"epoch": 2.85,
"learning_rate": 1.7152e-05,
"loss": 1.2075,
"step": 356000
},
{
"epoch": 2.85,
"learning_rate": 1.7148000000000003e-05,
"loss": 1.2085,
"step": 356500
},
{
"epoch": 2.86,
"learning_rate": 1.7144e-05,
"loss": 1.2058,
"step": 357000
},
{
"epoch": 2.86,
"learning_rate": 1.7140000000000002e-05,
"loss": 1.2032,
"step": 357500
},
{
"epoch": 2.86,
"learning_rate": 1.7136e-05,
"loss": 1.197,
"step": 358000
},
{
"epoch": 2.87,
"learning_rate": 1.7132e-05,
"loss": 1.2035,
"step": 358500
},
{
"epoch": 2.87,
"learning_rate": 1.7128000000000003e-05,
"loss": 1.2008,
"step": 359000
},
{
"epoch": 2.88,
"learning_rate": 1.7124e-05,
"loss": 1.2024,
"step": 359500
},
{
"epoch": 2.88,
"learning_rate": 1.7120000000000002e-05,
"loss": 1.1994,
"step": 360000
},
{
"epoch": 2.88,
"learning_rate": 1.7116e-05,
"loss": 1.2064,
"step": 360500
},
{
"epoch": 2.89,
"learning_rate": 1.7112e-05,
"loss": 1.2014,
"step": 361000
},
{
"epoch": 2.89,
"learning_rate": 1.7108000000000003e-05,
"loss": 1.2051,
"step": 361500
},
{
"epoch": 2.9,
"learning_rate": 1.7104e-05,
"loss": 1.2027,
"step": 362000
},
{
"epoch": 2.9,
"learning_rate": 1.7100000000000002e-05,
"loss": 1.2051,
"step": 362500
},
{
"epoch": 2.9,
"learning_rate": 1.7096e-05,
"loss": 1.2101,
"step": 363000
},
{
"epoch": 2.91,
"learning_rate": 1.7092000000000002e-05,
"loss": 1.2065,
"step": 363500
},
{
"epoch": 2.91,
"learning_rate": 1.7088000000000003e-05,
"loss": 1.2112,
"step": 364000
},
{
"epoch": 2.92,
"learning_rate": 1.7084e-05,
"loss": 1.2,
"step": 364500
},
{
"epoch": 2.92,
"learning_rate": 1.7080000000000002e-05,
"loss": 1.1968,
"step": 365000
},
{
"epoch": 2.92,
"learning_rate": 1.7076e-05,
"loss": 1.2012,
"step": 365500
},
{
"epoch": 2.93,
"learning_rate": 1.7072000000000002e-05,
"loss": 1.2013,
"step": 366000
},
{
"epoch": 2.93,
"learning_rate": 1.7068000000000003e-05,
"loss": 1.2086,
"step": 366500
},
{
"epoch": 2.94,
"learning_rate": 1.7064e-05,
"loss": 1.2017,
"step": 367000
},
{
"epoch": 2.94,
"learning_rate": 1.7060000000000003e-05,
"loss": 1.1961,
"step": 367500
},
{
"epoch": 2.94,
"learning_rate": 1.7056e-05,
"loss": 1.2033,
"step": 368000
},
{
"epoch": 2.95,
"learning_rate": 1.7052000000000002e-05,
"loss": 1.205,
"step": 368500
},
{
"epoch": 2.95,
"learning_rate": 1.7048000000000003e-05,
"loss": 1.1949,
"step": 369000
},
{
"epoch": 2.96,
"learning_rate": 1.7044e-05,
"loss": 1.2033,
"step": 369500
},
{
"epoch": 2.96,
"learning_rate": 1.704e-05,
"loss": 1.1956,
"step": 370000
},
{
"epoch": 2.96,
"learning_rate": 1.7036e-05,
"loss": 1.1983,
"step": 370500
},
{
"epoch": 2.97,
"learning_rate": 1.7032000000000002e-05,
"loss": 1.1971,
"step": 371000
},
{
"epoch": 2.97,
"learning_rate": 1.7028000000000004e-05,
"loss": 1.194,
"step": 371500
},
{
"epoch": 2.98,
"learning_rate": 1.7024e-05,
"loss": 1.2097,
"step": 372000
},
{
"epoch": 2.98,
"learning_rate": 1.702e-05,
"loss": 1.2002,
"step": 372500
},
{
"epoch": 2.98,
"learning_rate": 1.7016e-05,
"loss": 1.1966,
"step": 373000
},
{
"epoch": 2.99,
"learning_rate": 1.7012000000000002e-05,
"loss": 1.1937,
"step": 373500
},
{
"epoch": 2.99,
"learning_rate": 1.7008000000000004e-05,
"loss": 1.1968,
"step": 374000
},
{
"epoch": 3.0,
"learning_rate": 1.7004000000000002e-05,
"loss": 1.195,
"step": 374500
},
{
"epoch": 3.0,
"learning_rate": 1.7e-05,
"loss": 1.1981,
"step": 375000
},
{
"epoch": 3.0,
"eval_loss": 1.144347906112671,
"eval_runtime": 81.5337,
"eval_samples_per_second": 164.509,
"eval_steps_per_second": 2.576,
"step": 375000
},
{
"epoch": 3.0,
"learning_rate": 1.6996e-05,
"loss": 1.1964,
"step": 375500
},
{
"epoch": 3.01,
"learning_rate": 1.6992000000000003e-05,
"loss": 1.1956,
"step": 376000
},
{
"epoch": 3.01,
"learning_rate": 1.6988e-05,
"loss": 1.197,
"step": 376500
},
{
"epoch": 3.02,
"learning_rate": 1.6984000000000002e-05,
"loss": 1.1958,
"step": 377000
},
{
"epoch": 3.02,
"learning_rate": 1.698e-05,
"loss": 1.1972,
"step": 377500
},
{
"epoch": 3.02,
"learning_rate": 1.6976e-05,
"loss": 1.1957,
"step": 378000
},
{
"epoch": 3.03,
"learning_rate": 1.6972000000000003e-05,
"loss": 1.1937,
"step": 378500
},
{
"epoch": 3.03,
"learning_rate": 1.6968e-05,
"loss": 1.1905,
"step": 379000
},
{
"epoch": 3.04,
"learning_rate": 1.6964e-05,
"loss": 1.1894,
"step": 379500
},
{
"epoch": 3.04,
"learning_rate": 1.696e-05,
"loss": 1.2001,
"step": 380000
},
{
"epoch": 3.04,
"learning_rate": 1.6956e-05,
"loss": 1.1898,
"step": 380500
},
{
"epoch": 3.05,
"learning_rate": 1.6952000000000003e-05,
"loss": 1.1942,
"step": 381000
},
{
"epoch": 3.05,
"learning_rate": 1.6948e-05,
"loss": 1.1924,
"step": 381500
},
{
"epoch": 3.06,
"learning_rate": 1.6944e-05,
"loss": 1.1963,
"step": 382000
},
{
"epoch": 3.06,
"learning_rate": 1.694e-05,
"loss": 1.1896,
"step": 382500
},
{
"epoch": 3.06,
"learning_rate": 1.6936000000000002e-05,
"loss": 1.2003,
"step": 383000
},
{
"epoch": 3.07,
"learning_rate": 1.6932000000000003e-05,
"loss": 1.1931,
"step": 383500
},
{
"epoch": 3.07,
"learning_rate": 1.6928e-05,
"loss": 1.1962,
"step": 384000
},
{
"epoch": 3.08,
"learning_rate": 1.6924e-05,
"loss": 1.1963,
"step": 384500
},
{
"epoch": 3.08,
"learning_rate": 1.692e-05,
"loss": 1.1862,
"step": 385000
},
{
"epoch": 3.08,
"learning_rate": 1.6916000000000002e-05,
"loss": 1.1885,
"step": 385500
},
{
"epoch": 3.09,
"learning_rate": 1.6912000000000003e-05,
"loss": 1.1898,
"step": 386000
},
{
"epoch": 3.09,
"learning_rate": 1.6908e-05,
"loss": 1.1947,
"step": 386500
},
{
"epoch": 3.1,
"learning_rate": 1.6904e-05,
"loss": 1.191,
"step": 387000
},
{
"epoch": 3.1,
"learning_rate": 1.69e-05,
"loss": 1.1958,
"step": 387500
},
{
"epoch": 3.1,
"learning_rate": 1.6896000000000002e-05,
"loss": 1.1936,
"step": 388000
},
{
"epoch": 3.11,
"learning_rate": 1.6892e-05,
"loss": 1.1912,
"step": 388500
},
{
"epoch": 3.11,
"learning_rate": 1.6888e-05,
"loss": 1.1888,
"step": 389000
},
{
"epoch": 3.12,
"learning_rate": 1.6884e-05,
"loss": 1.201,
"step": 389500
},
{
"epoch": 3.12,
"learning_rate": 1.688e-05,
"loss": 1.1902,
"step": 390000
},
{
"epoch": 3.12,
"learning_rate": 1.6876000000000002e-05,
"loss": 1.1854,
"step": 390500
},
{
"epoch": 3.13,
"learning_rate": 1.6872e-05,
"loss": 1.1942,
"step": 391000
},
{
"epoch": 3.13,
"learning_rate": 1.6868000000000002e-05,
"loss": 1.1923,
"step": 391500
},
{
"epoch": 3.14,
"learning_rate": 1.6864e-05,
"loss": 1.1982,
"step": 392000
},
{
"epoch": 3.14,
"learning_rate": 1.686e-05,
"loss": 1.1952,
"step": 392500
},
{
"epoch": 3.14,
"learning_rate": 1.6856000000000003e-05,
"loss": 1.1924,
"step": 393000
},
{
"epoch": 3.15,
"learning_rate": 1.6852e-05,
"loss": 1.1905,
"step": 393500
},
{
"epoch": 3.15,
"learning_rate": 1.6848000000000002e-05,
"loss": 1.1935,
"step": 394000
},
{
"epoch": 3.16,
"learning_rate": 1.6844e-05,
"loss": 1.1952,
"step": 394500
},
{
"epoch": 3.16,
"learning_rate": 1.684e-05,
"loss": 1.192,
"step": 395000
},
{
"epoch": 3.16,
"learning_rate": 1.6836000000000003e-05,
"loss": 1.1836,
"step": 395500
},
{
"epoch": 3.17,
"learning_rate": 1.6832e-05,
"loss": 1.1882,
"step": 396000
},
{
"epoch": 3.17,
"learning_rate": 1.6828000000000002e-05,
"loss": 1.1887,
"step": 396500
},
{
"epoch": 3.18,
"learning_rate": 1.6824e-05,
"loss": 1.1919,
"step": 397000
},
{
"epoch": 3.18,
"learning_rate": 1.682e-05,
"loss": 1.187,
"step": 397500
},
{
"epoch": 3.18,
"learning_rate": 1.6816e-05,
"loss": 1.1905,
"step": 398000
},
{
"epoch": 3.19,
"learning_rate": 1.6812e-05,
"loss": 1.1838,
"step": 398500
},
{
"epoch": 3.19,
"learning_rate": 1.6808000000000002e-05,
"loss": 1.1948,
"step": 399000
},
{
"epoch": 3.2,
"learning_rate": 1.6804e-05,
"loss": 1.1851,
"step": 399500
},
{
"epoch": 3.2,
"learning_rate": 1.6800000000000002e-05,
"loss": 1.1852,
"step": 400000
},
{
"epoch": 3.2,
"learning_rate": 1.6796e-05,
"loss": 1.1913,
"step": 400500
},
{
"epoch": 3.21,
"learning_rate": 1.6792e-05,
"loss": 1.1823,
"step": 401000
},
{
"epoch": 3.21,
"learning_rate": 1.6788000000000003e-05,
"loss": 1.1902,
"step": 401500
},
{
"epoch": 3.22,
"learning_rate": 1.6784e-05,
"loss": 1.1918,
"step": 402000
},
{
"epoch": 3.22,
"learning_rate": 1.6780000000000002e-05,
"loss": 1.1869,
"step": 402500
},
{
"epoch": 3.22,
"learning_rate": 1.6776e-05,
"loss": 1.1833,
"step": 403000
},
{
"epoch": 3.23,
"learning_rate": 1.6772e-05,
"loss": 1.1936,
"step": 403500
},
{
"epoch": 3.23,
"learning_rate": 1.6768000000000003e-05,
"loss": 1.1898,
"step": 404000
},
{
"epoch": 3.24,
"learning_rate": 1.6764e-05,
"loss": 1.1863,
"step": 404500
},
{
"epoch": 3.24,
"learning_rate": 1.6760000000000002e-05,
"loss": 1.1815,
"step": 405000
},
{
"epoch": 3.24,
"learning_rate": 1.6756e-05,
"loss": 1.1915,
"step": 405500
},
{
"epoch": 3.25,
"learning_rate": 1.6752e-05,
"loss": 1.1861,
"step": 406000
},
{
"epoch": 3.25,
"learning_rate": 1.6748000000000003e-05,
"loss": 1.187,
"step": 406500
},
{
"epoch": 3.26,
"learning_rate": 1.6744e-05,
"loss": 1.1894,
"step": 407000
},
{
"epoch": 3.26,
"learning_rate": 1.6740000000000002e-05,
"loss": 1.191,
"step": 407500
},
{
"epoch": 3.26,
"learning_rate": 1.6736e-05,
"loss": 1.1859,
"step": 408000
},
{
"epoch": 3.27,
"learning_rate": 1.6732000000000002e-05,
"loss": 1.1908,
"step": 408500
},
{
"epoch": 3.27,
"learning_rate": 1.6728000000000003e-05,
"loss": 1.1944,
"step": 409000
},
{
"epoch": 3.28,
"learning_rate": 1.6724e-05,
"loss": 1.1877,
"step": 409500
},
{
"epoch": 3.28,
"learning_rate": 1.672e-05,
"loss": 1.1828,
"step": 410000
},
{
"epoch": 3.28,
"learning_rate": 1.6716e-05,
"loss": 1.1844,
"step": 410500
},
{
"epoch": 3.29,
"learning_rate": 1.6712000000000002e-05,
"loss": 1.1919,
"step": 411000
},
{
"epoch": 3.29,
"learning_rate": 1.6708000000000003e-05,
"loss": 1.1876,
"step": 411500
},
{
"epoch": 3.3,
"learning_rate": 1.6704e-05,
"loss": 1.1812,
"step": 412000
},
{
"epoch": 3.3,
"learning_rate": 1.67e-05,
"loss": 1.1826,
"step": 412500
},
{
"epoch": 3.3,
"learning_rate": 1.6696e-05,
"loss": 1.1894,
"step": 413000
},
{
"epoch": 3.31,
"learning_rate": 1.6692000000000002e-05,
"loss": 1.1832,
"step": 413500
},
{
"epoch": 3.31,
"learning_rate": 1.6688000000000004e-05,
"loss": 1.1883,
"step": 414000
},
{
"epoch": 3.32,
"learning_rate": 1.6684e-05,
"loss": 1.1826,
"step": 414500
},
{
"epoch": 3.32,
"learning_rate": 1.668e-05,
"loss": 1.1855,
"step": 415000
},
{
"epoch": 3.32,
"learning_rate": 1.6676e-05,
"loss": 1.187,
"step": 415500
},
{
"epoch": 3.33,
"learning_rate": 1.6672000000000002e-05,
"loss": 1.1851,
"step": 416000
},
{
"epoch": 3.33,
"learning_rate": 1.6668e-05,
"loss": 1.1814,
"step": 416500
},
{
"epoch": 3.34,
"learning_rate": 1.6664000000000002e-05,
"loss": 1.1778,
"step": 417000
},
{
"epoch": 3.34,
"learning_rate": 1.666e-05,
"loss": 1.1759,
"step": 417500
},
{
"epoch": 3.34,
"learning_rate": 1.6656e-05,
"loss": 1.1782,
"step": 418000
},
{
"epoch": 3.35,
"learning_rate": 1.6652000000000003e-05,
"loss": 1.1881,
"step": 418500
},
{
"epoch": 3.35,
"learning_rate": 1.6648e-05,
"loss": 1.1909,
"step": 419000
},
{
"epoch": 3.36,
"learning_rate": 1.6644000000000002e-05,
"loss": 1.1796,
"step": 419500
},
{
"epoch": 3.36,
"learning_rate": 1.664e-05,
"loss": 1.1754,
"step": 420000
},
{
"epoch": 3.36,
"learning_rate": 1.6636e-05,
"loss": 1.1875,
"step": 420500
},
{
"epoch": 3.37,
"learning_rate": 1.6632000000000003e-05,
"loss": 1.1817,
"step": 421000
},
{
"epoch": 3.37,
"learning_rate": 1.6628e-05,
"loss": 1.1857,
"step": 421500
},
{
"epoch": 3.38,
"learning_rate": 1.6624000000000002e-05,
"loss": 1.1819,
"step": 422000
},
{
"epoch": 3.38,
"learning_rate": 1.662e-05,
"loss": 1.1793,
"step": 422500
},
{
"epoch": 3.38,
"learning_rate": 1.6616e-05,
"loss": 1.1847,
"step": 423000
},
{
"epoch": 3.39,
"learning_rate": 1.6612000000000003e-05,
"loss": 1.185,
"step": 423500
},
{
"epoch": 3.39,
"learning_rate": 1.6608e-05,
"loss": 1.1784,
"step": 424000
},
{
"epoch": 3.4,
"learning_rate": 1.6604000000000002e-05,
"loss": 1.1818,
"step": 424500
},
{
"epoch": 3.4,
"learning_rate": 1.66e-05,
"loss": 1.1802,
"step": 425000
},
{
"epoch": 3.4,
"learning_rate": 1.6596000000000002e-05,
"loss": 1.177,
"step": 425500
},
{
"epoch": 3.41,
"learning_rate": 1.6592000000000003e-05,
"loss": 1.1916,
"step": 426000
},
{
"epoch": 3.41,
"learning_rate": 1.6588e-05,
"loss": 1.1828,
"step": 426500
},
{
"epoch": 3.42,
"learning_rate": 1.6584000000000002e-05,
"loss": 1.1867,
"step": 427000
},
{
"epoch": 3.42,
"learning_rate": 1.658e-05,
"loss": 1.1824,
"step": 427500
},
{
"epoch": 3.42,
"learning_rate": 1.6576000000000002e-05,
"loss": 1.1842,
"step": 428000
},
{
"epoch": 3.43,
"learning_rate": 1.6572e-05,
"loss": 1.1784,
"step": 428500
},
{
"epoch": 3.43,
"learning_rate": 1.6568e-05,
"loss": 1.1784,
"step": 429000
},
{
"epoch": 3.44,
"learning_rate": 1.6564000000000003e-05,
"loss": 1.1832,
"step": 429500
},
{
"epoch": 3.44,
"learning_rate": 1.656e-05,
"loss": 1.1866,
"step": 430000
},
{
"epoch": 3.44,
"learning_rate": 1.6556000000000002e-05,
"loss": 1.1874,
"step": 430500
},
{
"epoch": 3.45,
"learning_rate": 1.6552e-05,
"loss": 1.1833,
"step": 431000
},
{
"epoch": 3.45,
"learning_rate": 1.6548e-05,
"loss": 1.177,
"step": 431500
},
{
"epoch": 3.46,
"learning_rate": 1.6544000000000003e-05,
"loss": 1.186,
"step": 432000
},
{
"epoch": 3.46,
"learning_rate": 1.654e-05,
"loss": 1.1775,
"step": 432500
},
{
"epoch": 3.46,
"learning_rate": 1.6536000000000002e-05,
"loss": 1.1769,
"step": 433000
},
{
"epoch": 3.47,
"learning_rate": 1.6532e-05,
"loss": 1.1782,
"step": 433500
},
{
"epoch": 3.47,
"learning_rate": 1.6528e-05,
"loss": 1.176,
"step": 434000
},
{
"epoch": 3.48,
"learning_rate": 1.6524000000000003e-05,
"loss": 1.1798,
"step": 434500
},
{
"epoch": 3.48,
"learning_rate": 1.652e-05,
"loss": 1.1817,
"step": 435000
},
{
"epoch": 3.48,
"learning_rate": 1.6516000000000002e-05,
"loss": 1.1744,
"step": 435500
},
{
"epoch": 3.49,
"learning_rate": 1.6512e-05,
"loss": 1.1763,
"step": 436000
},
{
"epoch": 3.49,
"learning_rate": 1.6508000000000002e-05,
"loss": 1.1829,
"step": 436500
},
{
"epoch": 3.5,
"learning_rate": 1.6504000000000003e-05,
"loss": 1.1812,
"step": 437000
},
{
"epoch": 3.5,
"learning_rate": 1.65e-05,
"loss": 1.1802,
"step": 437500
},
{
"epoch": 3.5,
"learning_rate": 1.6496e-05,
"loss": 1.1792,
"step": 438000
},
{
"epoch": 3.51,
"learning_rate": 1.6492e-05,
"loss": 1.1719,
"step": 438500
},
{
"epoch": 3.51,
"learning_rate": 1.6488000000000002e-05,
"loss": 1.1821,
"step": 439000
},
{
"epoch": 3.52,
"learning_rate": 1.6484000000000003e-05,
"loss": 1.1748,
"step": 439500
},
{
"epoch": 3.52,
"learning_rate": 1.648e-05,
"loss": 1.1721,
"step": 440000
},
{
"epoch": 3.52,
"learning_rate": 1.6476e-05,
"loss": 1.1778,
"step": 440500
},
{
"epoch": 3.53,
"learning_rate": 1.6472e-05,
"loss": 1.1827,
"step": 441000
},
{
"epoch": 3.53,
"learning_rate": 1.6468000000000002e-05,
"loss": 1.1802,
"step": 441500
},
{
"epoch": 3.54,
"learning_rate": 1.6464000000000004e-05,
"loss": 1.1715,
"step": 442000
},
{
"epoch": 3.54,
"learning_rate": 1.646e-05,
"loss": 1.1792,
"step": 442500
},
{
"epoch": 3.54,
"learning_rate": 1.6456e-05,
"loss": 1.1726,
"step": 443000
},
{
"epoch": 3.55,
"learning_rate": 1.6452e-05,
"loss": 1.1764,
"step": 443500
},
{
"epoch": 3.55,
"learning_rate": 1.6448000000000002e-05,
"loss": 1.1704,
"step": 444000
},
{
"epoch": 3.56,
"learning_rate": 1.6444000000000004e-05,
"loss": 1.1757,
"step": 444500
},
{
"epoch": 3.56,
"learning_rate": 1.6440000000000002e-05,
"loss": 1.1678,
"step": 445000
},
{
"epoch": 3.56,
"learning_rate": 1.6436e-05,
"loss": 1.1785,
"step": 445500
},
{
"epoch": 3.57,
"learning_rate": 1.6432e-05,
"loss": 1.1728,
"step": 446000
},
{
"epoch": 3.57,
"learning_rate": 1.6428000000000003e-05,
"loss": 1.1744,
"step": 446500
},
{
"epoch": 3.58,
"learning_rate": 1.6424e-05,
"loss": 1.1834,
"step": 447000
},
{
"epoch": 3.58,
"learning_rate": 1.6420000000000002e-05,
"loss": 1.1728,
"step": 447500
},
{
"epoch": 3.58,
"learning_rate": 1.6416e-05,
"loss": 1.1737,
"step": 448000
},
{
"epoch": 3.59,
"learning_rate": 1.6412e-05,
"loss": 1.1758,
"step": 448500
},
{
"epoch": 3.59,
"learning_rate": 1.6408000000000003e-05,
"loss": 1.1765,
"step": 449000
},
{
"epoch": 3.6,
"learning_rate": 1.6404e-05,
"loss": 1.1766,
"step": 449500
},
{
"epoch": 3.6,
"learning_rate": 1.64e-05,
"loss": 1.1749,
"step": 450000
},
{
"epoch": 3.6,
"learning_rate": 1.6396e-05,
"loss": 1.1668,
"step": 450500
},
{
"epoch": 3.61,
"learning_rate": 1.6392e-05,
"loss": 1.1831,
"step": 451000
},
{
"epoch": 3.61,
"learning_rate": 1.6388000000000003e-05,
"loss": 1.1829,
"step": 451500
},
{
"epoch": 3.62,
"learning_rate": 1.6384e-05,
"loss": 1.1731,
"step": 452000
},
{
"epoch": 3.62,
"learning_rate": 1.638e-05,
"loss": 1.1816,
"step": 452500
},
{
"epoch": 3.62,
"learning_rate": 1.6376e-05,
"loss": 1.1754,
"step": 453000
},
{
"epoch": 3.63,
"learning_rate": 1.6372000000000002e-05,
"loss": 1.1797,
"step": 453500
},
{
"epoch": 3.63,
"learning_rate": 1.6368000000000003e-05,
"loss": 1.1721,
"step": 454000
},
{
"epoch": 3.64,
"learning_rate": 1.6364e-05,
"loss": 1.1729,
"step": 454500
},
{
"epoch": 3.64,
"learning_rate": 1.636e-05,
"loss": 1.1752,
"step": 455000
},
{
"epoch": 3.64,
"learning_rate": 1.6356e-05,
"loss": 1.1668,
"step": 455500
},
{
"epoch": 3.65,
"learning_rate": 1.6352000000000002e-05,
"loss": 1.17,
"step": 456000
},
{
"epoch": 3.65,
"learning_rate": 1.6348e-05,
"loss": 1.1812,
"step": 456500
},
{
"epoch": 3.66,
"learning_rate": 1.6344e-05,
"loss": 1.1788,
"step": 457000
},
{
"epoch": 3.66,
"learning_rate": 1.634e-05,
"loss": 1.1743,
"step": 457500
},
{
"epoch": 3.66,
"learning_rate": 1.6336e-05,
"loss": 1.1749,
"step": 458000
},
{
"epoch": 3.67,
"learning_rate": 1.6332000000000002e-05,
"loss": 1.1717,
"step": 458500
},
{
"epoch": 3.67,
"learning_rate": 1.6328e-05,
"loss": 1.1752,
"step": 459000
},
{
"epoch": 3.68,
"learning_rate": 1.6324e-05,
"loss": 1.1732,
"step": 459500
},
{
"epoch": 3.68,
"learning_rate": 1.632e-05,
"loss": 1.1763,
"step": 460000
},
{
"epoch": 3.68,
"learning_rate": 1.6316e-05,
"loss": 1.1745,
"step": 460500
},
{
"epoch": 3.69,
"learning_rate": 1.6312000000000002e-05,
"loss": 1.1701,
"step": 461000
},
{
"epoch": 3.69,
"learning_rate": 1.6308e-05,
"loss": 1.1702,
"step": 461500
},
{
"epoch": 3.7,
"learning_rate": 1.6304000000000002e-05,
"loss": 1.1753,
"step": 462000
},
{
"epoch": 3.7,
"learning_rate": 1.63e-05,
"loss": 1.1708,
"step": 462500
},
{
"epoch": 3.7,
"learning_rate": 1.6296e-05,
"loss": 1.1715,
"step": 463000
},
{
"epoch": 3.71,
"learning_rate": 1.6292000000000003e-05,
"loss": 1.1687,
"step": 463500
},
{
"epoch": 3.71,
"learning_rate": 1.6288e-05,
"loss": 1.1749,
"step": 464000
},
{
"epoch": 3.72,
"learning_rate": 1.6284000000000002e-05,
"loss": 1.1726,
"step": 464500
},
{
"epoch": 3.72,
"learning_rate": 1.628e-05,
"loss": 1.1722,
"step": 465000
},
{
"epoch": 3.72,
"learning_rate": 1.6276e-05,
"loss": 1.1736,
"step": 465500
},
{
"epoch": 3.73,
"learning_rate": 1.6272000000000003e-05,
"loss": 1.17,
"step": 466000
},
{
"epoch": 3.73,
"learning_rate": 1.6268e-05,
"loss": 1.1682,
"step": 466500
},
{
"epoch": 3.74,
"learning_rate": 1.6264000000000002e-05,
"loss": 1.1698,
"step": 467000
},
{
"epoch": 3.74,
"learning_rate": 1.626e-05,
"loss": 1.1782,
"step": 467500
},
{
"epoch": 3.74,
"learning_rate": 1.6256e-05,
"loss": 1.1645,
"step": 468000
},
{
"epoch": 3.75,
"learning_rate": 1.6252e-05,
"loss": 1.1628,
"step": 468500
},
{
"epoch": 3.75,
"learning_rate": 1.6248e-05,
"loss": 1.1681,
"step": 469000
},
{
"epoch": 3.76,
"learning_rate": 1.6244000000000002e-05,
"loss": 1.1764,
"step": 469500
},
{
"epoch": 3.76,
"learning_rate": 1.6240000000000004e-05,
"loss": 1.1692,
"step": 470000
},
{
"epoch": 3.76,
"learning_rate": 1.6236000000000002e-05,
"loss": 1.1682,
"step": 470500
},
{
"epoch": 3.77,
"learning_rate": 1.6232e-05,
"loss": 1.1692,
"step": 471000
},
{
"epoch": 3.77,
"learning_rate": 1.6228e-05,
"loss": 1.1715,
"step": 471500
},
{
"epoch": 3.78,
"learning_rate": 1.6224000000000003e-05,
"loss": 1.1729,
"step": 472000
},
{
"epoch": 3.78,
"learning_rate": 1.6220000000000004e-05,
"loss": 1.1708,
"step": 472500
},
{
"epoch": 3.78,
"learning_rate": 1.6216000000000002e-05,
"loss": 1.1658,
"step": 473000
},
{
"epoch": 3.79,
"learning_rate": 1.6212e-05,
"loss": 1.1732,
"step": 473500
},
{
"epoch": 3.79,
"learning_rate": 1.6208e-05,
"loss": 1.1708,
"step": 474000
},
{
"epoch": 3.8,
"learning_rate": 1.6204000000000003e-05,
"loss": 1.1728,
"step": 474500
},
{
"epoch": 3.8,
"learning_rate": 1.62e-05,
"loss": 1.1706,
"step": 475000
},
{
"epoch": 3.8,
"learning_rate": 1.6196000000000002e-05,
"loss": 1.1685,
"step": 475500
},
{
"epoch": 3.81,
"learning_rate": 1.6192e-05,
"loss": 1.1747,
"step": 476000
},
{
"epoch": 3.81,
"learning_rate": 1.6188e-05,
"loss": 1.1659,
"step": 476500
},
{
"epoch": 3.82,
"learning_rate": 1.6184000000000003e-05,
"loss": 1.1724,
"step": 477000
},
{
"epoch": 3.82,
"learning_rate": 1.618e-05,
"loss": 1.1669,
"step": 477500
},
{
"epoch": 3.82,
"learning_rate": 1.6176e-05,
"loss": 1.1697,
"step": 478000
},
{
"epoch": 3.83,
"learning_rate": 1.6172e-05,
"loss": 1.1685,
"step": 478500
},
{
"epoch": 3.83,
"learning_rate": 1.6168000000000002e-05,
"loss": 1.1733,
"step": 479000
},
{
"epoch": 3.84,
"learning_rate": 1.6164000000000003e-05,
"loss": 1.1688,
"step": 479500
},
{
"epoch": 3.84,
"learning_rate": 1.616e-05,
"loss": 1.1703,
"step": 480000
},
{
"epoch": 3.84,
"learning_rate": 1.6156e-05,
"loss": 1.1612,
"step": 480500
},
{
"epoch": 3.85,
"learning_rate": 1.6152e-05,
"loss": 1.1667,
"step": 481000
},
{
"epoch": 3.85,
"learning_rate": 1.6148000000000002e-05,
"loss": 1.1614,
"step": 481500
},
{
"epoch": 3.86,
"learning_rate": 1.6144000000000003e-05,
"loss": 1.1668,
"step": 482000
},
{
"epoch": 3.86,
"learning_rate": 1.614e-05,
"loss": 1.1684,
"step": 482500
},
{
"epoch": 3.86,
"learning_rate": 1.6136e-05,
"loss": 1.1666,
"step": 483000
},
{
"epoch": 3.87,
"learning_rate": 1.6132e-05,
"loss": 1.1691,
"step": 483500
},
{
"epoch": 3.87,
"learning_rate": 1.6128000000000002e-05,
"loss": 1.1692,
"step": 484000
},
{
"epoch": 3.88,
"learning_rate": 1.6124000000000004e-05,
"loss": 1.1654,
"step": 484500
},
{
"epoch": 3.88,
"learning_rate": 1.612e-05,
"loss": 1.1644,
"step": 485000
},
{
"epoch": 3.88,
"learning_rate": 1.6116e-05,
"loss": 1.1645,
"step": 485500
},
{
"epoch": 3.89,
"learning_rate": 1.6112e-05,
"loss": 1.1735,
"step": 486000
},
{
"epoch": 3.89,
"learning_rate": 1.6108000000000002e-05,
"loss": 1.1704,
"step": 486500
},
{
"epoch": 3.9,
"learning_rate": 1.6104e-05,
"loss": 1.164,
"step": 487000
},
{
"epoch": 3.9,
"learning_rate": 1.6100000000000002e-05,
"loss": 1.1728,
"step": 487500
},
{
"epoch": 3.9,
"learning_rate": 1.6096e-05,
"loss": 1.1637,
"step": 488000
},
{
"epoch": 3.91,
"learning_rate": 1.6092e-05,
"loss": 1.1682,
"step": 488500
},
{
"epoch": 3.91,
"learning_rate": 1.6088000000000002e-05,
"loss": 1.167,
"step": 489000
},
{
"epoch": 3.92,
"learning_rate": 1.6084e-05,
"loss": 1.167,
"step": 489500
},
{
"epoch": 3.92,
"learning_rate": 1.6080000000000002e-05,
"loss": 1.1655,
"step": 490000
},
{
"epoch": 3.92,
"learning_rate": 1.6076e-05,
"loss": 1.1643,
"step": 490500
},
{
"epoch": 3.93,
"learning_rate": 1.6072e-05,
"loss": 1.1687,
"step": 491000
},
{
"epoch": 3.93,
"learning_rate": 1.6068000000000003e-05,
"loss": 1.1636,
"step": 491500
},
{
"epoch": 3.94,
"learning_rate": 1.6064e-05,
"loss": 1.1609,
"step": 492000
},
{
"epoch": 3.94,
"learning_rate": 1.6060000000000002e-05,
"loss": 1.1678,
"step": 492500
},
{
"epoch": 3.94,
"learning_rate": 1.6056e-05,
"loss": 1.1704,
"step": 493000
},
{
"epoch": 3.95,
"learning_rate": 1.6052e-05,
"loss": 1.1666,
"step": 493500
},
{
"epoch": 3.95,
"learning_rate": 1.6048000000000003e-05,
"loss": 1.1636,
"step": 494000
},
{
"epoch": 3.96,
"learning_rate": 1.6044e-05,
"loss": 1.1686,
"step": 494500
},
{
"epoch": 3.96,
"learning_rate": 1.6040000000000002e-05,
"loss": 1.1635,
"step": 495000
},
{
"epoch": 3.96,
"learning_rate": 1.6036e-05,
"loss": 1.1607,
"step": 495500
},
{
"epoch": 3.97,
"learning_rate": 1.6032e-05,
"loss": 1.1592,
"step": 496000
},
{
"epoch": 3.97,
"learning_rate": 1.6028e-05,
"loss": 1.1602,
"step": 496500
},
{
"epoch": 3.98,
"learning_rate": 1.6024e-05,
"loss": 1.1608,
"step": 497000
},
{
"epoch": 3.98,
"learning_rate": 1.6020000000000002e-05,
"loss": 1.1578,
"step": 497500
},
{
"epoch": 3.98,
"learning_rate": 1.6016e-05,
"loss": 1.1653,
"step": 498000
},
{
"epoch": 3.99,
"learning_rate": 1.6012000000000002e-05,
"loss": 1.1625,
"step": 498500
},
{
"epoch": 3.99,
"learning_rate": 1.6008e-05,
"loss": 1.1701,
"step": 499000
},
{
"epoch": 4.0,
"learning_rate": 1.6004e-05,
"loss": 1.1644,
"step": 499500
},
{
"epoch": 4.0,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.1652,
"step": 500000
},
{
"epoch": 4.0,
"eval_loss": 1.1185648441314697,
"eval_runtime": 90.4279,
"eval_samples_per_second": 148.328,
"eval_steps_per_second": 2.322,
"step": 500000
},
{
"epoch": 4.0,
"learning_rate": 1.5996e-05,
"loss": 1.1599,
"step": 500500
},
{
"epoch": 4.01,
"learning_rate": 1.5992000000000002e-05,
"loss": 1.1548,
"step": 501000
},
{
"epoch": 4.01,
"learning_rate": 1.5988e-05,
"loss": 1.1586,
"step": 501500
},
{
"epoch": 4.02,
"learning_rate": 1.5984e-05,
"loss": 1.1593,
"step": 502000
},
{
"epoch": 4.02,
"learning_rate": 1.5980000000000003e-05,
"loss": 1.1563,
"step": 502500
},
{
"epoch": 4.02,
"learning_rate": 1.5976e-05,
"loss": 1.1611,
"step": 503000
},
{
"epoch": 4.03,
"learning_rate": 1.5972000000000002e-05,
"loss": 1.1596,
"step": 503500
},
{
"epoch": 4.03,
"learning_rate": 1.5968e-05,
"loss": 1.167,
"step": 504000
},
{
"epoch": 4.04,
"learning_rate": 1.5964e-05,
"loss": 1.1596,
"step": 504500
},
{
"epoch": 4.04,
"learning_rate": 1.5960000000000003e-05,
"loss": 1.1587,
"step": 505000
},
{
"epoch": 4.04,
"learning_rate": 1.5956e-05,
"loss": 1.1529,
"step": 505500
},
{
"epoch": 4.05,
"learning_rate": 1.5952000000000002e-05,
"loss": 1.1622,
"step": 506000
},
{
"epoch": 4.05,
"learning_rate": 1.5948e-05,
"loss": 1.1545,
"step": 506500
},
{
"epoch": 4.06,
"learning_rate": 1.5944000000000002e-05,
"loss": 1.1604,
"step": 507000
},
{
"epoch": 4.06,
"learning_rate": 1.5940000000000003e-05,
"loss": 1.1553,
"step": 507500
},
{
"epoch": 4.06,
"learning_rate": 1.5936e-05,
"loss": 1.1561,
"step": 508000
},
{
"epoch": 4.07,
"learning_rate": 1.5932e-05,
"loss": 1.161,
"step": 508500
},
{
"epoch": 4.07,
"learning_rate": 1.5928e-05,
"loss": 1.1563,
"step": 509000
},
{
"epoch": 4.08,
"learning_rate": 1.5924000000000002e-05,
"loss": 1.1626,
"step": 509500
},
{
"epoch": 4.08,
"learning_rate": 1.5920000000000003e-05,
"loss": 1.1587,
"step": 510000
},
{
"epoch": 4.08,
"learning_rate": 1.5916e-05,
"loss": 1.1594,
"step": 510500
},
{
"epoch": 4.09,
"learning_rate": 1.5912e-05,
"loss": 1.1605,
"step": 511000
},
{
"epoch": 4.09,
"learning_rate": 1.5908e-05,
"loss": 1.1605,
"step": 511500
},
{
"epoch": 4.1,
"learning_rate": 1.5904000000000002e-05,
"loss": 1.1573,
"step": 512000
},
{
"epoch": 4.1,
"learning_rate": 1.5900000000000004e-05,
"loss": 1.1549,
"step": 512500
},
{
"epoch": 4.1,
"learning_rate": 1.5896e-05,
"loss": 1.162,
"step": 513000
},
{
"epoch": 4.11,
"learning_rate": 1.5892e-05,
"loss": 1.1561,
"step": 513500
},
{
"epoch": 4.11,
"learning_rate": 1.5888e-05,
"loss": 1.1526,
"step": 514000
},
{
"epoch": 4.12,
"learning_rate": 1.5884000000000002e-05,
"loss": 1.1637,
"step": 514500
},
{
"epoch": 4.12,
"learning_rate": 1.588e-05,
"loss": 1.1591,
"step": 515000
},
{
"epoch": 4.12,
"learning_rate": 1.5876000000000002e-05,
"loss": 1.155,
"step": 515500
},
{
"epoch": 4.13,
"learning_rate": 1.5872e-05,
"loss": 1.1536,
"step": 516000
},
{
"epoch": 4.13,
"learning_rate": 1.5868e-05,
"loss": 1.1491,
"step": 516500
},
{
"epoch": 4.14,
"learning_rate": 1.5864000000000003e-05,
"loss": 1.1498,
"step": 517000
},
{
"epoch": 4.14,
"learning_rate": 1.586e-05,
"loss": 1.1559,
"step": 517500
},
{
"epoch": 4.14,
"learning_rate": 1.5856e-05,
"loss": 1.159,
"step": 518000
},
{
"epoch": 4.15,
"learning_rate": 1.5852e-05,
"loss": 1.1526,
"step": 518500
},
{
"epoch": 4.15,
"learning_rate": 1.5848e-05,
"loss": 1.1584,
"step": 519000
},
{
"epoch": 4.16,
"learning_rate": 1.5844000000000003e-05,
"loss": 1.1558,
"step": 519500
},
{
"epoch": 4.16,
"learning_rate": 1.584e-05,
"loss": 1.1523,
"step": 520000
},
{
"epoch": 4.16,
"learning_rate": 1.5836e-05,
"loss": 1.1543,
"step": 520500
},
{
"epoch": 4.17,
"learning_rate": 1.5832e-05,
"loss": 1.1553,
"step": 521000
},
{
"epoch": 4.17,
"learning_rate": 1.5828e-05,
"loss": 1.1503,
"step": 521500
},
{
"epoch": 4.18,
"learning_rate": 1.5824000000000003e-05,
"loss": 1.1507,
"step": 522000
},
{
"epoch": 4.18,
"learning_rate": 1.582e-05,
"loss": 1.1615,
"step": 522500
},
{
"epoch": 4.18,
"learning_rate": 1.5816e-05,
"loss": 1.1574,
"step": 523000
},
{
"epoch": 4.19,
"learning_rate": 1.5812e-05,
"loss": 1.1551,
"step": 523500
},
{
"epoch": 4.19,
"learning_rate": 1.5808000000000002e-05,
"loss": 1.1556,
"step": 524000
},
{
"epoch": 4.2,
"learning_rate": 1.5804000000000003e-05,
"loss": 1.1576,
"step": 524500
},
{
"epoch": 4.2,
"learning_rate": 1.58e-05,
"loss": 1.1539,
"step": 525000
},
{
"epoch": 4.2,
"learning_rate": 1.5796e-05,
"loss": 1.1565,
"step": 525500
},
{
"epoch": 4.21,
"learning_rate": 1.5792e-05,
"loss": 1.1587,
"step": 526000
},
{
"epoch": 4.21,
"learning_rate": 1.5788000000000002e-05,
"loss": 1.1537,
"step": 526500
},
{
"epoch": 4.22,
"learning_rate": 1.5784e-05,
"loss": 1.1569,
"step": 527000
},
{
"epoch": 4.22,
"learning_rate": 1.578e-05,
"loss": 1.1536,
"step": 527500
},
{
"epoch": 4.22,
"learning_rate": 1.5776e-05,
"loss": 1.1576,
"step": 528000
},
{
"epoch": 4.23,
"learning_rate": 1.5772e-05,
"loss": 1.1559,
"step": 528500
},
{
"epoch": 4.23,
"learning_rate": 1.5768000000000002e-05,
"loss": 1.1601,
"step": 529000
},
{
"epoch": 4.24,
"learning_rate": 1.5764e-05,
"loss": 1.1544,
"step": 529500
},
{
"epoch": 4.24,
"learning_rate": 1.576e-05,
"loss": 1.1554,
"step": 530000
},
{
"epoch": 4.24,
"learning_rate": 1.5756e-05,
"loss": 1.155,
"step": 530500
},
{
"epoch": 4.25,
"learning_rate": 1.5752e-05,
"loss": 1.159,
"step": 531000
},
{
"epoch": 4.25,
"learning_rate": 1.5748000000000002e-05,
"loss": 1.1549,
"step": 531500
},
{
"epoch": 4.26,
"learning_rate": 1.5744e-05,
"loss": 1.1496,
"step": 532000
},
{
"epoch": 4.26,
"learning_rate": 1.5740000000000002e-05,
"loss": 1.1525,
"step": 532500
},
{
"epoch": 4.26,
"learning_rate": 1.5736000000000003e-05,
"loss": 1.154,
"step": 533000
},
{
"epoch": 4.27,
"learning_rate": 1.5732e-05,
"loss": 1.157,
"step": 533500
},
{
"epoch": 4.27,
"learning_rate": 1.5728000000000003e-05,
"loss": 1.1542,
"step": 534000
},
{
"epoch": 4.28,
"learning_rate": 1.5724e-05,
"loss": 1.1529,
"step": 534500
},
{
"epoch": 4.28,
"learning_rate": 1.5720000000000002e-05,
"loss": 1.1508,
"step": 535000
},
{
"epoch": 4.28,
"learning_rate": 1.5716000000000003e-05,
"loss": 1.1584,
"step": 535500
},
{
"epoch": 4.29,
"learning_rate": 1.5712e-05,
"loss": 1.1536,
"step": 536000
},
{
"epoch": 4.29,
"learning_rate": 1.5708e-05,
"loss": 1.1577,
"step": 536500
},
{
"epoch": 4.3,
"learning_rate": 1.5704e-05,
"loss": 1.1519,
"step": 537000
},
{
"epoch": 4.3,
"learning_rate": 1.5700000000000002e-05,
"loss": 1.1511,
"step": 537500
},
{
"epoch": 4.3,
"learning_rate": 1.5696000000000004e-05,
"loss": 1.1562,
"step": 538000
},
{
"epoch": 4.31,
"learning_rate": 1.5692e-05,
"loss": 1.1563,
"step": 538500
},
{
"epoch": 4.31,
"learning_rate": 1.5688e-05,
"loss": 1.1557,
"step": 539000
},
{
"epoch": 4.32,
"learning_rate": 1.5684e-05,
"loss": 1.1491,
"step": 539500
},
{
"epoch": 4.32,
"learning_rate": 1.5680000000000002e-05,
"loss": 1.1512,
"step": 540000
},
{
"epoch": 4.32,
"learning_rate": 1.5676000000000004e-05,
"loss": 1.1544,
"step": 540500
},
{
"epoch": 4.33,
"learning_rate": 1.5672000000000002e-05,
"loss": 1.1547,
"step": 541000
},
{
"epoch": 4.33,
"learning_rate": 1.5668e-05,
"loss": 1.1499,
"step": 541500
},
{
"epoch": 4.34,
"learning_rate": 1.5664e-05,
"loss": 1.1503,
"step": 542000
},
{
"epoch": 4.34,
"learning_rate": 1.5660000000000003e-05,
"loss": 1.1546,
"step": 542500
},
{
"epoch": 4.34,
"learning_rate": 1.5656000000000004e-05,
"loss": 1.1511,
"step": 543000
},
{
"epoch": 4.35,
"learning_rate": 1.5652000000000002e-05,
"loss": 1.1508,
"step": 543500
},
{
"epoch": 4.35,
"learning_rate": 1.5648e-05,
"loss": 1.15,
"step": 544000
},
{
"epoch": 4.36,
"learning_rate": 1.5644e-05,
"loss": 1.15,
"step": 544500
},
{
"epoch": 4.36,
"learning_rate": 1.5640000000000003e-05,
"loss": 1.1526,
"step": 545000
},
{
"epoch": 4.36,
"learning_rate": 1.5636e-05,
"loss": 1.1565,
"step": 545500
},
{
"epoch": 4.37,
"learning_rate": 1.5632000000000002e-05,
"loss": 1.149,
"step": 546000
},
{
"epoch": 4.37,
"learning_rate": 1.5628e-05,
"loss": 1.1605,
"step": 546500
},
{
"epoch": 4.38,
"learning_rate": 1.5624e-05,
"loss": 1.1538,
"step": 547000
},
{
"epoch": 4.38,
"learning_rate": 1.5620000000000003e-05,
"loss": 1.148,
"step": 547500
},
{
"epoch": 4.38,
"learning_rate": 1.5616e-05,
"loss": 1.1501,
"step": 548000
},
{
"epoch": 4.39,
"learning_rate": 1.5612e-05,
"loss": 1.1526,
"step": 548500
},
{
"epoch": 4.39,
"learning_rate": 1.5608e-05,
"loss": 1.155,
"step": 549000
},
{
"epoch": 4.4,
"learning_rate": 1.5604000000000002e-05,
"loss": 1.1529,
"step": 549500
},
{
"epoch": 4.4,
"learning_rate": 1.5600000000000003e-05,
"loss": 1.1506,
"step": 550000
},
{
"epoch": 4.4,
"learning_rate": 1.5596e-05,
"loss": 1.1557,
"step": 550500
},
{
"epoch": 4.41,
"learning_rate": 1.5592e-05,
"loss": 1.1498,
"step": 551000
},
{
"epoch": 4.41,
"learning_rate": 1.5588e-05,
"loss": 1.153,
"step": 551500
},
{
"epoch": 4.42,
"learning_rate": 1.5584000000000002e-05,
"loss": 1.1534,
"step": 552000
},
{
"epoch": 4.42,
"learning_rate": 1.5580000000000003e-05,
"loss": 1.1564,
"step": 552500
},
{
"epoch": 4.42,
"learning_rate": 1.5576e-05,
"loss": 1.1463,
"step": 553000
},
{
"epoch": 4.43,
"learning_rate": 1.5572e-05,
"loss": 1.1479,
"step": 553500
},
{
"epoch": 4.43,
"learning_rate": 1.5568e-05,
"loss": 1.15,
"step": 554000
},
{
"epoch": 4.44,
"learning_rate": 1.5564000000000002e-05,
"loss": 1.1564,
"step": 554500
},
{
"epoch": 4.44,
"learning_rate": 1.556e-05,
"loss": 1.146,
"step": 555000
},
{
"epoch": 4.44,
"learning_rate": 1.5556e-05,
"loss": 1.1495,
"step": 555500
},
{
"epoch": 4.45,
"learning_rate": 1.5552e-05,
"loss": 1.1479,
"step": 556000
},
{
"epoch": 4.45,
"learning_rate": 1.5548e-05,
"loss": 1.1542,
"step": 556500
},
{
"epoch": 4.46,
"learning_rate": 1.5544000000000002e-05,
"loss": 1.1566,
"step": 557000
},
{
"epoch": 4.46,
"learning_rate": 1.554e-05,
"loss": 1.1505,
"step": 557500
},
{
"epoch": 4.46,
"learning_rate": 1.5536e-05,
"loss": 1.149,
"step": 558000
},
{
"epoch": 4.47,
"learning_rate": 1.5532e-05,
"loss": 1.1447,
"step": 558500
},
{
"epoch": 4.47,
"learning_rate": 1.5528e-05,
"loss": 1.1472,
"step": 559000
},
{
"epoch": 4.48,
"learning_rate": 1.5524000000000002e-05,
"loss": 1.1476,
"step": 559500
},
{
"epoch": 4.48,
"learning_rate": 1.552e-05,
"loss": 1.1506,
"step": 560000
},
{
"epoch": 4.48,
"learning_rate": 1.5516000000000002e-05,
"loss": 1.1446,
"step": 560500
},
{
"epoch": 4.49,
"learning_rate": 1.5512e-05,
"loss": 1.1546,
"step": 561000
},
{
"epoch": 4.49,
"learning_rate": 1.5508e-05,
"loss": 1.1502,
"step": 561500
},
{
"epoch": 4.5,
"learning_rate": 1.5504000000000003e-05,
"loss": 1.148,
"step": 562000
},
{
"epoch": 4.5,
"learning_rate": 1.55e-05,
"loss": 1.1537,
"step": 562500
},
{
"epoch": 4.5,
"learning_rate": 1.5496000000000002e-05,
"loss": 1.1527,
"step": 563000
},
{
"epoch": 4.51,
"learning_rate": 1.5492e-05,
"loss": 1.1474,
"step": 563500
},
{
"epoch": 4.51,
"learning_rate": 1.5488e-05,
"loss": 1.1475,
"step": 564000
},
{
"epoch": 4.52,
"learning_rate": 1.5484000000000003e-05,
"loss": 1.1504,
"step": 564500
},
{
"epoch": 4.52,
"learning_rate": 1.548e-05,
"loss": 1.1429,
"step": 565000
},
{
"epoch": 4.52,
"learning_rate": 1.5476000000000002e-05,
"loss": 1.1501,
"step": 565500
},
{
"epoch": 4.53,
"learning_rate": 1.5472e-05,
"loss": 1.1445,
"step": 566000
},
{
"epoch": 4.53,
"learning_rate": 1.5468e-05,
"loss": 1.1389,
"step": 566500
},
{
"epoch": 4.54,
"learning_rate": 1.5464e-05,
"loss": 1.1516,
"step": 567000
},
{
"epoch": 4.54,
"learning_rate": 1.546e-05,
"loss": 1.1445,
"step": 567500
},
{
"epoch": 4.54,
"learning_rate": 1.5456000000000002e-05,
"loss": 1.1443,
"step": 568000
},
{
"epoch": 4.55,
"learning_rate": 1.5452e-05,
"loss": 1.1487,
"step": 568500
},
{
"epoch": 4.55,
"learning_rate": 1.5448000000000002e-05,
"loss": 1.1455,
"step": 569000
},
{
"epoch": 4.56,
"learning_rate": 1.5444e-05,
"loss": 1.1459,
"step": 569500
},
{
"epoch": 4.56,
"learning_rate": 1.544e-05,
"loss": 1.1431,
"step": 570000
},
{
"epoch": 4.56,
"learning_rate": 1.5436000000000003e-05,
"loss": 1.1438,
"step": 570500
},
{
"epoch": 4.57,
"learning_rate": 1.5432e-05,
"loss": 1.1398,
"step": 571000
},
{
"epoch": 4.57,
"learning_rate": 1.5428000000000002e-05,
"loss": 1.1496,
"step": 571500
},
{
"epoch": 4.58,
"learning_rate": 1.5424e-05,
"loss": 1.1434,
"step": 572000
},
{
"epoch": 4.58,
"learning_rate": 1.542e-05,
"loss": 1.1453,
"step": 572500
},
{
"epoch": 4.58,
"learning_rate": 1.5416000000000003e-05,
"loss": 1.1447,
"step": 573000
},
{
"epoch": 4.59,
"learning_rate": 1.5412e-05,
"loss": 1.1413,
"step": 573500
},
{
"epoch": 4.59,
"learning_rate": 1.5408000000000002e-05,
"loss": 1.1439,
"step": 574000
},
{
"epoch": 4.6,
"learning_rate": 1.5404e-05,
"loss": 1.142,
"step": 574500
},
{
"epoch": 4.6,
"learning_rate": 1.54e-05,
"loss": 1.1379,
"step": 575000
},
{
"epoch": 4.6,
"learning_rate": 1.5396000000000003e-05,
"loss": 1.1464,
"step": 575500
},
{
"epoch": 4.61,
"learning_rate": 1.5392e-05,
"loss": 1.1433,
"step": 576000
},
{
"epoch": 4.61,
"learning_rate": 1.5388e-05,
"loss": 1.1473,
"step": 576500
},
{
"epoch": 4.62,
"learning_rate": 1.5384e-05,
"loss": 1.1487,
"step": 577000
},
{
"epoch": 4.62,
"learning_rate": 1.5380000000000002e-05,
"loss": 1.1462,
"step": 577500
},
{
"epoch": 4.62,
"learning_rate": 1.5376000000000003e-05,
"loss": 1.1419,
"step": 578000
},
{
"epoch": 4.63,
"learning_rate": 1.5372e-05,
"loss": 1.143,
"step": 578500
},
{
"epoch": 4.63,
"learning_rate": 1.5368e-05,
"loss": 1.1429,
"step": 579000
},
{
"epoch": 4.64,
"learning_rate": 1.5364e-05,
"loss": 1.1469,
"step": 579500
},
{
"epoch": 4.64,
"learning_rate": 1.5360000000000002e-05,
"loss": 1.1418,
"step": 580000
},
{
"epoch": 4.64,
"learning_rate": 1.5356000000000003e-05,
"loss": 1.1427,
"step": 580500
},
{
"epoch": 4.65,
"learning_rate": 1.5352e-05,
"loss": 1.1446,
"step": 581000
},
{
"epoch": 4.65,
"learning_rate": 1.5348e-05,
"loss": 1.1447,
"step": 581500
},
{
"epoch": 4.66,
"learning_rate": 1.5344e-05,
"loss": 1.1427,
"step": 582000
},
{
"epoch": 4.66,
"learning_rate": 1.5340000000000002e-05,
"loss": 1.1451,
"step": 582500
},
{
"epoch": 4.66,
"learning_rate": 1.5336000000000004e-05,
"loss": 1.1531,
"step": 583000
},
{
"epoch": 4.67,
"learning_rate": 1.5332e-05,
"loss": 1.1482,
"step": 583500
},
{
"epoch": 4.67,
"learning_rate": 1.5328e-05,
"loss": 1.1402,
"step": 584000
},
{
"epoch": 4.68,
"learning_rate": 1.5324e-05,
"loss": 1.1497,
"step": 584500
},
{
"epoch": 4.68,
"learning_rate": 1.5320000000000002e-05,
"loss": 1.1405,
"step": 585000
},
{
"epoch": 4.68,
"learning_rate": 1.5316e-05,
"loss": 1.1484,
"step": 585500
},
{
"epoch": 4.69,
"learning_rate": 1.5312000000000002e-05,
"loss": 1.1422,
"step": 586000
},
{
"epoch": 4.69,
"learning_rate": 1.5308e-05,
"loss": 1.1462,
"step": 586500
},
{
"epoch": 4.7,
"learning_rate": 1.5304e-05,
"loss": 1.1398,
"step": 587000
},
{
"epoch": 4.7,
"learning_rate": 1.5300000000000003e-05,
"loss": 1.1404,
"step": 587500
},
{
"epoch": 4.7,
"learning_rate": 1.5296e-05,
"loss": 1.1431,
"step": 588000
},
{
"epoch": 4.71,
"learning_rate": 1.5292e-05,
"loss": 1.1457,
"step": 588500
},
{
"epoch": 4.71,
"learning_rate": 1.5288e-05,
"loss": 1.1436,
"step": 589000
},
{
"epoch": 4.72,
"learning_rate": 1.5284e-05,
"loss": 1.1406,
"step": 589500
},
{
"epoch": 4.72,
"learning_rate": 1.5280000000000003e-05,
"loss": 1.1449,
"step": 590000
},
{
"epoch": 4.72,
"learning_rate": 1.5276e-05,
"loss": 1.1451,
"step": 590500
},
{
"epoch": 4.73,
"learning_rate": 1.5272e-05,
"loss": 1.1442,
"step": 591000
},
{
"epoch": 4.73,
"learning_rate": 1.5268e-05,
"loss": 1.1447,
"step": 591500
},
{
"epoch": 4.74,
"learning_rate": 1.5264e-05,
"loss": 1.1363,
"step": 592000
},
{
"epoch": 4.74,
"learning_rate": 1.5260000000000003e-05,
"loss": 1.1394,
"step": 592500
},
{
"epoch": 4.74,
"learning_rate": 1.5256000000000003e-05,
"loss": 1.1427,
"step": 593000
},
{
"epoch": 4.75,
"learning_rate": 1.5252e-05,
"loss": 1.1433,
"step": 593500
},
{
"epoch": 4.75,
"learning_rate": 1.5248e-05,
"loss": 1.1434,
"step": 594000
},
{
"epoch": 4.76,
"learning_rate": 1.5244000000000002e-05,
"loss": 1.1395,
"step": 594500
},
{
"epoch": 4.76,
"learning_rate": 1.5240000000000001e-05,
"loss": 1.1394,
"step": 595000
},
{
"epoch": 4.76,
"learning_rate": 1.5236000000000001e-05,
"loss": 1.1442,
"step": 595500
},
{
"epoch": 4.77,
"learning_rate": 1.5232000000000003e-05,
"loss": 1.1438,
"step": 596000
},
{
"epoch": 4.77,
"learning_rate": 1.5228e-05,
"loss": 1.1381,
"step": 596500
},
{
"epoch": 4.78,
"learning_rate": 1.5224e-05,
"loss": 1.1445,
"step": 597000
},
{
"epoch": 4.78,
"learning_rate": 1.5220000000000002e-05,
"loss": 1.1399,
"step": 597500
},
{
"epoch": 4.78,
"learning_rate": 1.5216000000000001e-05,
"loss": 1.1416,
"step": 598000
},
{
"epoch": 4.79,
"learning_rate": 1.5212000000000003e-05,
"loss": 1.1322,
"step": 598500
},
{
"epoch": 4.79,
"learning_rate": 1.5208e-05,
"loss": 1.1481,
"step": 599000
},
{
"epoch": 4.8,
"learning_rate": 1.5204e-05,
"loss": 1.1438,
"step": 599500
},
{
"epoch": 4.8,
"learning_rate": 1.5200000000000002e-05,
"loss": 1.1459,
"step": 600000
},
{
"epoch": 4.8,
"learning_rate": 1.5196000000000002e-05,
"loss": 1.1441,
"step": 600500
},
{
"epoch": 4.81,
"learning_rate": 1.5192000000000003e-05,
"loss": 1.1437,
"step": 601000
},
{
"epoch": 4.81,
"learning_rate": 1.5188000000000001e-05,
"loss": 1.1386,
"step": 601500
},
{
"epoch": 4.82,
"learning_rate": 1.5184e-05,
"loss": 1.1394,
"step": 602000
},
{
"epoch": 4.82,
"learning_rate": 1.5180000000000002e-05,
"loss": 1.1445,
"step": 602500
},
{
"epoch": 4.82,
"learning_rate": 1.5176000000000002e-05,
"loss": 1.1377,
"step": 603000
},
{
"epoch": 4.83,
"learning_rate": 1.5172000000000001e-05,
"loss": 1.1405,
"step": 603500
},
{
"epoch": 4.83,
"learning_rate": 1.5168000000000001e-05,
"loss": 1.1404,
"step": 604000
},
{
"epoch": 4.84,
"learning_rate": 1.5164e-05,
"loss": 1.1472,
"step": 604500
},
{
"epoch": 4.84,
"learning_rate": 1.516e-05,
"loss": 1.1384,
"step": 605000
},
{
"epoch": 4.84,
"learning_rate": 1.5156000000000002e-05,
"loss": 1.1396,
"step": 605500
},
{
"epoch": 4.85,
"learning_rate": 1.5152000000000002e-05,
"loss": 1.1337,
"step": 606000
},
{
"epoch": 4.85,
"learning_rate": 1.5148e-05,
"loss": 1.1399,
"step": 606500
},
{
"epoch": 4.86,
"learning_rate": 1.5144000000000001e-05,
"loss": 1.1407,
"step": 607000
},
{
"epoch": 4.86,
"learning_rate": 1.514e-05,
"loss": 1.1419,
"step": 607500
},
{
"epoch": 4.86,
"learning_rate": 1.5136000000000002e-05,
"loss": 1.1408,
"step": 608000
},
{
"epoch": 4.87,
"learning_rate": 1.5132000000000002e-05,
"loss": 1.1376,
"step": 608500
},
{
"epoch": 4.87,
"learning_rate": 1.5128e-05,
"loss": 1.1439,
"step": 609000
},
{
"epoch": 4.88,
"learning_rate": 1.5124000000000001e-05,
"loss": 1.1368,
"step": 609500
},
{
"epoch": 4.88,
"learning_rate": 1.5120000000000001e-05,
"loss": 1.1404,
"step": 610000
},
{
"epoch": 4.88,
"learning_rate": 1.5116000000000002e-05,
"loss": 1.138,
"step": 610500
},
{
"epoch": 4.89,
"learning_rate": 1.5112000000000002e-05,
"loss": 1.1406,
"step": 611000
},
{
"epoch": 4.89,
"learning_rate": 1.5108e-05,
"loss": 1.1373,
"step": 611500
},
{
"epoch": 4.9,
"learning_rate": 1.5104000000000001e-05,
"loss": 1.1421,
"step": 612000
},
{
"epoch": 4.9,
"learning_rate": 1.5100000000000001e-05,
"loss": 1.1362,
"step": 612500
},
{
"epoch": 4.9,
"learning_rate": 1.5096000000000003e-05,
"loss": 1.1476,
"step": 613000
},
{
"epoch": 4.91,
"learning_rate": 1.5092000000000002e-05,
"loss": 1.1333,
"step": 613500
},
{
"epoch": 4.91,
"learning_rate": 1.5088e-05,
"loss": 1.1306,
"step": 614000
},
{
"epoch": 4.92,
"learning_rate": 1.5084000000000002e-05,
"loss": 1.1361,
"step": 614500
},
{
"epoch": 4.92,
"learning_rate": 1.5080000000000001e-05,
"loss": 1.1351,
"step": 615000
},
{
"epoch": 4.92,
"learning_rate": 1.5076000000000001e-05,
"loss": 1.1363,
"step": 615500
},
{
"epoch": 4.93,
"learning_rate": 1.5072000000000002e-05,
"loss": 1.1414,
"step": 616000
},
{
"epoch": 4.93,
"learning_rate": 1.5068e-05,
"loss": 1.1449,
"step": 616500
},
{
"epoch": 4.94,
"learning_rate": 1.5064e-05,
"loss": 1.137,
"step": 617000
},
{
"epoch": 4.94,
"learning_rate": 1.5060000000000001e-05,
"loss": 1.1387,
"step": 617500
},
{
"epoch": 4.94,
"learning_rate": 1.5056000000000001e-05,
"loss": 1.1336,
"step": 618000
},
{
"epoch": 4.95,
"learning_rate": 1.5052000000000003e-05,
"loss": 1.1365,
"step": 618500
},
{
"epoch": 4.95,
"learning_rate": 1.5048e-05,
"loss": 1.1387,
"step": 619000
},
{
"epoch": 4.96,
"learning_rate": 1.5044e-05,
"loss": 1.1352,
"step": 619500
},
{
"epoch": 4.96,
"learning_rate": 1.5040000000000002e-05,
"loss": 1.1321,
"step": 620000
},
{
"epoch": 4.96,
"learning_rate": 1.5036000000000001e-05,
"loss": 1.1341,
"step": 620500
},
{
"epoch": 4.97,
"learning_rate": 1.5032000000000003e-05,
"loss": 1.1283,
"step": 621000
},
{
"epoch": 4.97,
"learning_rate": 1.5028e-05,
"loss": 1.135,
"step": 621500
},
{
"epoch": 4.98,
"learning_rate": 1.5024e-05,
"loss": 1.1357,
"step": 622000
},
{
"epoch": 4.98,
"learning_rate": 1.5020000000000002e-05,
"loss": 1.1351,
"step": 622500
},
{
"epoch": 4.98,
"learning_rate": 1.5016000000000002e-05,
"loss": 1.1347,
"step": 623000
},
{
"epoch": 4.99,
"learning_rate": 1.5012000000000001e-05,
"loss": 1.1358,
"step": 623500
},
{
"epoch": 4.99,
"learning_rate": 1.5008000000000001e-05,
"loss": 1.1409,
"step": 624000
},
{
"epoch": 5.0,
"learning_rate": 1.5004e-05,
"loss": 1.138,
"step": 624500
},
{
"epoch": 5.0,
"learning_rate": 1.5000000000000002e-05,
"loss": 1.135,
"step": 625000
},
{
"epoch": 5.0,
"eval_loss": 1.0943782329559326,
"eval_runtime": 105.3282,
"eval_samples_per_second": 127.345,
"eval_steps_per_second": 1.994,
"step": 625000
}
],
"max_steps": 2500000,
"num_train_epochs": 20,
"total_flos": 7.883799881847431e+18,
"trial_name": null,
"trial_params": null
}