|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 143758, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993043865384883e-05, |
|
"loss": 3.1775, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.986087730769766e-05, |
|
"loss": 2.9882, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.979131596154649e-05, |
|
"loss": 2.932, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9721754615395325e-05, |
|
"loss": 2.8774, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.965219326924415e-05, |
|
"loss": 2.9227, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9582631923092976e-05, |
|
"loss": 2.9011, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9513070576941804e-05, |
|
"loss": 2.8233, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.944350923079063e-05, |
|
"loss": 2.859, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.937394788463946e-05, |
|
"loss": 2.8269, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.93043865384883e-05, |
|
"loss": 2.82, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9234825192337127e-05, |
|
"loss": 2.7556, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9165263846185955e-05, |
|
"loss": 2.7388, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9095702500034784e-05, |
|
"loss": 2.8441, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.902614115388361e-05, |
|
"loss": 2.7828, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.895657980773244e-05, |
|
"loss": 2.7528, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.888701846158127e-05, |
|
"loss": 2.7744, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.88174571154301e-05, |
|
"loss": 2.7752, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.874789576927893e-05, |
|
"loss": 2.7288, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.867833442312776e-05, |
|
"loss": 2.7043, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8608773076976586e-05, |
|
"loss": 2.7296, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8539211730825415e-05, |
|
"loss": 2.6831, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.846965038467425e-05, |
|
"loss": 2.7519, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.840008903852308e-05, |
|
"loss": 2.6923, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.833052769237191e-05, |
|
"loss": 2.6681, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.826096634622073e-05, |
|
"loss": 2.6918, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.819140500006956e-05, |
|
"loss": 2.6827, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.812184365391839e-05, |
|
"loss": 2.6477, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8052282307767224e-05, |
|
"loss": 2.6638, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.798272096161605e-05, |
|
"loss": 2.7062, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.791315961546488e-05, |
|
"loss": 2.6954, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.784359826931371e-05, |
|
"loss": 2.7133, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.777403692316254e-05, |
|
"loss": 2.693, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.770447557701137e-05, |
|
"loss": 2.632, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.76349142308602e-05, |
|
"loss": 2.6715, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7565352884709026e-05, |
|
"loss": 2.6947, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7495791538557855e-05, |
|
"loss": 2.6396, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7426230192406684e-05, |
|
"loss": 2.6506, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.735666884625551e-05, |
|
"loss": 2.6204, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.728710750010434e-05, |
|
"loss": 2.6298, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.721754615395318e-05, |
|
"loss": 2.5967, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.7147984807802006e-05, |
|
"loss": 2.6396, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7078423461650835e-05, |
|
"loss": 2.601, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7008862115499664e-05, |
|
"loss": 2.6289, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.693930076934849e-05, |
|
"loss": 2.6143, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.6869739423197315e-05, |
|
"loss": 2.6672, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.680017807704615e-05, |
|
"loss": 2.6145, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.673061673089498e-05, |
|
"loss": 2.626, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.666105538474381e-05, |
|
"loss": 2.6622, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.659149403859264e-05, |
|
"loss": 2.578, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.6521932692441466e-05, |
|
"loss": 2.6476, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.6452371346290294e-05, |
|
"loss": 2.6051, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.638281000013912e-05, |
|
"loss": 2.5568, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.631324865398796e-05, |
|
"loss": 2.5498, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.624368730783679e-05, |
|
"loss": 2.5283, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.617412596168561e-05, |
|
"loss": 2.615, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.610456461553444e-05, |
|
"loss": 2.6121, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.603500326938327e-05, |
|
"loss": 2.535, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.59654419232321e-05, |
|
"loss": 2.584, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.589588057708093e-05, |
|
"loss": 2.5619, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.582631923092976e-05, |
|
"loss": 2.581, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.575675788477859e-05, |
|
"loss": 2.5513, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.568719653862742e-05, |
|
"loss": 2.5844, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.561763519247625e-05, |
|
"loss": 2.6033, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5548073846325076e-05, |
|
"loss": 2.4988, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5478512500173905e-05, |
|
"loss": 2.5467, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5408951154022734e-05, |
|
"loss": 2.5825, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.533938980787156e-05, |
|
"loss": 2.5732, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.526982846172039e-05, |
|
"loss": 2.572, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.520026711556922e-05, |
|
"loss": 2.4804, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.513070576941805e-05, |
|
"loss": 2.5576, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.5061144423266885e-05, |
|
"loss": 2.5474, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4991583077115714e-05, |
|
"loss": 2.5262, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.492202173096454e-05, |
|
"loss": 2.5679, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.485246038481337e-05, |
|
"loss": 2.5541, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4782899038662194e-05, |
|
"loss": 2.4937, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.471333769251102e-05, |
|
"loss": 2.5643, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.464377634635986e-05, |
|
"loss": 2.5978, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.457421500020869e-05, |
|
"loss": 2.4727, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.4504653654057516e-05, |
|
"loss": 2.613, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.4435092307906345e-05, |
|
"loss": 2.5453, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.4365530961755174e-05, |
|
"loss": 2.5362, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.4295969615604e-05, |
|
"loss": 2.5296, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.422640826945283e-05, |
|
"loss": 2.5282, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.415684692330167e-05, |
|
"loss": 2.4973, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.408728557715049e-05, |
|
"loss": 2.5563, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.401772423099932e-05, |
|
"loss": 2.5514, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.394816288484815e-05, |
|
"loss": 2.5327, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.3878601538696976e-05, |
|
"loss": 2.5251, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.380904019254581e-05, |
|
"loss": 2.5695, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.373947884639464e-05, |
|
"loss": 2.4936, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.366991750024347e-05, |
|
"loss": 2.516, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.36003561540923e-05, |
|
"loss": 2.5568, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.353079480794113e-05, |
|
"loss": 2.5253, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.346123346178995e-05, |
|
"loss": 2.4991, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.3391672115638785e-05, |
|
"loss": 2.5029, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.332211076948761e-05, |
|
"loss": 2.5178, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.325254942333644e-05, |
|
"loss": 2.4964, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.318298807718527e-05, |
|
"loss": 2.4339, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.31134267310341e-05, |
|
"loss": 2.4874, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.304386538488293e-05, |
|
"loss": 2.4894, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.297430403873176e-05, |
|
"loss": 2.5352, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.290474269258059e-05, |
|
"loss": 2.5251, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.283518134642942e-05, |
|
"loss": 2.4545, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.2765620000278244e-05, |
|
"loss": 2.4831, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.269605865412707e-05, |
|
"loss": 2.5145, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.26264973079759e-05, |
|
"loss": 2.4543, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.255693596182474e-05, |
|
"loss": 2.5056, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.2487374615673566e-05, |
|
"loss": 2.4933, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.2417813269522395e-05, |
|
"loss": 2.4786, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.2348251923371224e-05, |
|
"loss": 2.3971, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.227869057722005e-05, |
|
"loss": 2.4759, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.220912923106888e-05, |
|
"loss": 2.4637, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.213956788491771e-05, |
|
"loss": 2.5033, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.207000653876654e-05, |
|
"loss": 2.5021, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.200044519261537e-05, |
|
"loss": 2.4136, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.19308838464642e-05, |
|
"loss": 2.5604, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.1861322500313026e-05, |
|
"loss": 2.5054, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.1791761154161855e-05, |
|
"loss": 2.4761, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.1722199808010684e-05, |
|
"loss": 2.4802, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.165263846185952e-05, |
|
"loss": 2.5241, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.158307711570835e-05, |
|
"loss": 2.4519, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.151351576955718e-05, |
|
"loss": 2.487, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1443954423406006e-05, |
|
"loss": 2.4719, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.137439307725483e-05, |
|
"loss": 2.5132, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.130483173110366e-05, |
|
"loss": 2.4432, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.123527038495249e-05, |
|
"loss": 2.448, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.116570903880132e-05, |
|
"loss": 2.4659, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.109614769265015e-05, |
|
"loss": 2.4596, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.102658634649898e-05, |
|
"loss": 2.473, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.095702500034781e-05, |
|
"loss": 2.4812, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.088746365419664e-05, |
|
"loss": 2.43, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.081790230804547e-05, |
|
"loss": 2.4743, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.07483409618943e-05, |
|
"loss": 2.4693, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.0678779615743124e-05, |
|
"loss": 2.4932, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.060921826959195e-05, |
|
"loss": 2.4459, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.053965692344078e-05, |
|
"loss": 2.4123, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.047009557728961e-05, |
|
"loss": 2.4364, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.0400534231138446e-05, |
|
"loss": 2.4428, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.0330972884987275e-05, |
|
"loss": 2.4157, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.0261411538836103e-05, |
|
"loss": 2.4145, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.019185019268493e-05, |
|
"loss": 2.5131, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.012228884653376e-05, |
|
"loss": 2.4509, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.005272750038259e-05, |
|
"loss": 2.4781, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.998316615423142e-05, |
|
"loss": 2.428, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.991360480808025e-05, |
|
"loss": 2.4402, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.9844043461929077e-05, |
|
"loss": 2.3886, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.9774482115777905e-05, |
|
"loss": 2.4329, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.9704920769626734e-05, |
|
"loss": 2.4038, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.963535942347556e-05, |
|
"loss": 2.3857, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.956579807732439e-05, |
|
"loss": 2.3806, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.949623673117323e-05, |
|
"loss": 2.4243, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.9426675385022057e-05, |
|
"loss": 2.4545, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.9357114038870885e-05, |
|
"loss": 2.5031, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.928755269271971e-05, |
|
"loss": 2.4046, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.9217991346568536e-05, |
|
"loss": 2.4326, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.914843000041737e-05, |
|
"loss": 2.4118, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.90788686542662e-05, |
|
"loss": 2.4556, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.900930730811503e-05, |
|
"loss": 2.4579, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.893974596196386e-05, |
|
"loss": 2.3932, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.887018461581269e-05, |
|
"loss": 2.4281, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.8800623269661516e-05, |
|
"loss": 2.4362, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.8731061923510345e-05, |
|
"loss": 2.4217, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.866150057735918e-05, |
|
"loss": 2.4305, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.8591939231208e-05, |
|
"loss": 2.441, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.852237788505683e-05, |
|
"loss": 2.422, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.845281653890566e-05, |
|
"loss": 2.3997, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.838325519275449e-05, |
|
"loss": 2.3749, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.831369384660332e-05, |
|
"loss": 2.4073, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.8244132500452154e-05, |
|
"loss": 2.4585, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.817457115430098e-05, |
|
"loss": 2.4215, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.810500980814981e-05, |
|
"loss": 2.414, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.803544846199864e-05, |
|
"loss": 2.4223, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.796588711584746e-05, |
|
"loss": 2.4012, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.78963257696963e-05, |
|
"loss": 2.422, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.782676442354513e-05, |
|
"loss": 2.3821, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.7757203077393956e-05, |
|
"loss": 2.4588, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.7687641731242785e-05, |
|
"loss": 2.4303, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.7618080385091614e-05, |
|
"loss": 2.4043, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.754851903894044e-05, |
|
"loss": 2.3885, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.747895769278927e-05, |
|
"loss": 2.452, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.740939634663811e-05, |
|
"loss": 2.4303, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.7339835000486936e-05, |
|
"loss": 2.4219, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.727027365433576e-05, |
|
"loss": 2.4514, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.720071230818459e-05, |
|
"loss": 2.3961, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.7131150962033416e-05, |
|
"loss": 2.3741, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.7061589615882244e-05, |
|
"loss": 2.4553, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.699202826973108e-05, |
|
"loss": 2.3806, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.692246692357991e-05, |
|
"loss": 2.416, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.685290557742874e-05, |
|
"loss": 2.3745, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.678334423127757e-05, |
|
"loss": 2.4195, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6713782885126395e-05, |
|
"loss": 2.4216, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6644221538975224e-05, |
|
"loss": 2.3769, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.657466019282405e-05, |
|
"loss": 2.3851, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.650509884667288e-05, |
|
"loss": 2.4112, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.643553750052171e-05, |
|
"loss": 2.4096, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.636597615437054e-05, |
|
"loss": 2.4354, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.629641480821937e-05, |
|
"loss": 2.422, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.62268534620682e-05, |
|
"loss": 2.4195, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.6157292115917026e-05, |
|
"loss": 2.4506, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.608773076976586e-05, |
|
"loss": 2.4016, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.601816942361469e-05, |
|
"loss": 2.3905, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.594860807746352e-05, |
|
"loss": 2.4293, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.587904673131234e-05, |
|
"loss": 2.4237, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.580948538516117e-05, |
|
"loss": 2.3867, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.5739924039010006e-05, |
|
"loss": 2.3885, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.5670362692858835e-05, |
|
"loss": 2.4321, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.5600801346707664e-05, |
|
"loss": 2.3701, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.553124000055649e-05, |
|
"loss": 2.4581, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.546167865440532e-05, |
|
"loss": 2.398, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.539211730825415e-05, |
|
"loss": 2.4155, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.532255596210298e-05, |
|
"loss": 2.4122, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.5252994615951815e-05, |
|
"loss": 2.3833, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.518343326980064e-05, |
|
"loss": 2.4614, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.5113871923649466e-05, |
|
"loss": 2.3859, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.5044310577498295e-05, |
|
"loss": 2.3674, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.4974749231347124e-05, |
|
"loss": 2.4227, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.490518788519595e-05, |
|
"loss": 2.4031, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.483562653904479e-05, |
|
"loss": 2.4271, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.476606519289362e-05, |
|
"loss": 2.4176, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4696503846742446e-05, |
|
"loss": 2.4076, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4626942500591275e-05, |
|
"loss": 2.3618, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.4557381154440104e-05, |
|
"loss": 2.3998, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.448781980828893e-05, |
|
"loss": 2.4145, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.441825846213776e-05, |
|
"loss": 2.3621, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.434869711598659e-05, |
|
"loss": 2.4247, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.427913576983542e-05, |
|
"loss": 2.3934, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.420957442368425e-05, |
|
"loss": 2.379, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.414001307753308e-05, |
|
"loss": 2.3932, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.4070451731381906e-05, |
|
"loss": 2.3922, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.400089038523074e-05, |
|
"loss": 2.4221, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.393132903907957e-05, |
|
"loss": 2.3921, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.38617676929284e-05, |
|
"loss": 2.3484, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.379220634677722e-05, |
|
"loss": 2.3812, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.372264500062605e-05, |
|
"loss": 2.3616, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.365308365447488e-05, |
|
"loss": 2.388, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3583522308323714e-05, |
|
"loss": 2.3102, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.351396096217254e-05, |
|
"loss": 2.3746, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.344439961602137e-05, |
|
"loss": 2.4212, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.33748382698702e-05, |
|
"loss": 2.3558, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.330527692371903e-05, |
|
"loss": 2.393, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.323571557756786e-05, |
|
"loss": 2.3798, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.316615423141669e-05, |
|
"loss": 2.4259, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.3096592885265516e-05, |
|
"loss": 2.3816, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.3027031539114345e-05, |
|
"loss": 2.3699, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.2957470192963174e-05, |
|
"loss": 2.4018, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.2887908846812e-05, |
|
"loss": 2.3356, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.281834750066083e-05, |
|
"loss": 2.347, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.274878615450967e-05, |
|
"loss": 2.3598, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.2679224808358496e-05, |
|
"loss": 2.3773, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.2609663462207325e-05, |
|
"loss": 2.3704, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.2540102116056154e-05, |
|
"loss": 2.3317, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.247054076990498e-05, |
|
"loss": 2.3878, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.2400979423753805e-05, |
|
"loss": 2.3198, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.233141807760264e-05, |
|
"loss": 2.4416, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.226185673145147e-05, |
|
"loss": 2.3275, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.21922953853003e-05, |
|
"loss": 2.3858, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.212273403914913e-05, |
|
"loss": 2.3448, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.2053172692997956e-05, |
|
"loss": 2.3493, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.1983611346846785e-05, |
|
"loss": 2.3644, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.1914050000695614e-05, |
|
"loss": 2.339, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.184448865454445e-05, |
|
"loss": 2.3393, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.177492730839327e-05, |
|
"loss": 2.3754, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.17053659622421e-05, |
|
"loss": 2.3251, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.163580461609093e-05, |
|
"loss": 2.3407, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.156624326993976e-05, |
|
"loss": 2.3092, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.149668192378859e-05, |
|
"loss": 2.3626, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.142712057763742e-05, |
|
"loss": 2.3639, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.135755923148625e-05, |
|
"loss": 2.3443, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.128799788533508e-05, |
|
"loss": 2.3274, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.121843653918391e-05, |
|
"loss": 2.3728, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.114887519303274e-05, |
|
"loss": 2.3047, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.107931384688157e-05, |
|
"loss": 2.316, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.1009752500730396e-05, |
|
"loss": 2.3326, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.0940191154579225e-05, |
|
"loss": 2.357, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.0870629808428053e-05, |
|
"loss": 2.367, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.080106846227688e-05, |
|
"loss": 2.3529, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.073150711612571e-05, |
|
"loss": 2.3748, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.066194576997454e-05, |
|
"loss": 2.3208, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.0592384423823376e-05, |
|
"loss": 2.3327, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.0522823077672204e-05, |
|
"loss": 2.3327, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.0453261731521033e-05, |
|
"loss": 2.3588, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.0383700385369855e-05, |
|
"loss": 2.3343, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0314139039218688e-05, |
|
"loss": 2.3095, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0244577693067517e-05, |
|
"loss": 2.3547, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0175016346916345e-05, |
|
"loss": 2.3346, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.0105455000765174e-05, |
|
"loss": 2.289, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.0035893654614006e-05, |
|
"loss": 2.3577, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.9966332308462835e-05, |
|
"loss": 2.3592, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.9896770962311664e-05, |
|
"loss": 2.3258, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.9827209616160496e-05, |
|
"loss": 2.336, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.9757648270009325e-05, |
|
"loss": 2.3663, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.968808692385815e-05, |
|
"loss": 2.3952, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.961852557770698e-05, |
|
"loss": 2.3366, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.954896423155581e-05, |
|
"loss": 2.316, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.9479402885404637e-05, |
|
"loss": 2.3167, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.940984153925347e-05, |
|
"loss": 2.3853, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.93402801931023e-05, |
|
"loss": 2.3824, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.9270718846951127e-05, |
|
"loss": 2.385, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.920115750079996e-05, |
|
"loss": 2.3916, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.913159615464879e-05, |
|
"loss": 2.3757, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.9062034808497617e-05, |
|
"loss": 2.3031, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.8992473462346443e-05, |
|
"loss": 2.3391, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.892291211619527e-05, |
|
"loss": 2.3327, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.88533507700441e-05, |
|
"loss": 2.3553, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.8783789423892933e-05, |
|
"loss": 2.4071, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.871422807774176e-05, |
|
"loss": 2.2922, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.864466673159059e-05, |
|
"loss": 2.2956, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8575105385439423e-05, |
|
"loss": 2.3518, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.850554403928825e-05, |
|
"loss": 2.3378, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.843598269313708e-05, |
|
"loss": 2.3567, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.836642134698591e-05, |
|
"loss": 2.3835, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.8296860000834735e-05, |
|
"loss": 2.3527, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.8227298654683564e-05, |
|
"loss": 2.3609, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.8157737308532396e-05, |
|
"loss": 2.3263, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.8088175962381225e-05, |
|
"loss": 2.3604, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.8018614616230054e-05, |
|
"loss": 2.371, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.7949053270078886e-05, |
|
"loss": 2.3555, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.7879491923927715e-05, |
|
"loss": 2.3573, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7809930577776543e-05, |
|
"loss": 2.3151, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7740369231625372e-05, |
|
"loss": 2.342, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7670807885474205e-05, |
|
"loss": 2.3264, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7601246539323027e-05, |
|
"loss": 2.3124, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.753168519317186e-05, |
|
"loss": 2.3672, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7462123847020688e-05, |
|
"loss": 2.3758, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7392562500869517e-05, |
|
"loss": 2.3426, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.732300115471835e-05, |
|
"loss": 2.3127, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7253439808567178e-05, |
|
"loss": 2.351, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7183878462416007e-05, |
|
"loss": 2.3214, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.7114317116264835e-05, |
|
"loss": 2.3141, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.7044755770113668e-05, |
|
"loss": 2.3095, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.6975194423962497e-05, |
|
"loss": 2.3303, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.6905633077811322e-05, |
|
"loss": 2.3111, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.683607173166015e-05, |
|
"loss": 2.2752, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.676651038550898e-05, |
|
"loss": 2.3245, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.6696949039357812e-05, |
|
"loss": 2.3506, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.662738769320664e-05, |
|
"loss": 2.2878, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.655782634705547e-05, |
|
"loss": 2.2858, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.64882650009043e-05, |
|
"loss": 2.3803, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.641870365475313e-05, |
|
"loss": 2.334, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.634914230860196e-05, |
|
"loss": 2.3088, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.627958096245079e-05, |
|
"loss": 2.3356, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6210019616299614e-05, |
|
"loss": 2.321, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6140458270148443e-05, |
|
"loss": 2.3396, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.607089692399727e-05, |
|
"loss": 2.2976, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6001335577846104e-05, |
|
"loss": 2.2884, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.5931774231694933e-05, |
|
"loss": 2.3099, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.586221288554376e-05, |
|
"loss": 2.3217, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5792651539392594e-05, |
|
"loss": 2.2856, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5723090193241423e-05, |
|
"loss": 2.3615, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.565352884709025e-05, |
|
"loss": 2.2836, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5583967500939077e-05, |
|
"loss": 2.3666, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5514406154787906e-05, |
|
"loss": 2.3123, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5444844808636735e-05, |
|
"loss": 2.3703, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5375283462485567e-05, |
|
"loss": 2.3, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5305722116334396e-05, |
|
"loss": 2.3252, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5236160770183225e-05, |
|
"loss": 2.2764, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5166599424032057e-05, |
|
"loss": 2.3298, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5097038077880886e-05, |
|
"loss": 2.2929, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5027476731729715e-05, |
|
"loss": 2.2647, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4957915385578544e-05, |
|
"loss": 2.2967, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4888354039427372e-05, |
|
"loss": 2.2854, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.48187926932762e-05, |
|
"loss": 2.2653, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.4749231347125034e-05, |
|
"loss": 2.2476, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.467967000097386e-05, |
|
"loss": 2.2801, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4610108654822688e-05, |
|
"loss": 2.2836, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.454054730867152e-05, |
|
"loss": 2.2847, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.447098596252035e-05, |
|
"loss": 2.3052, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4401424616369178e-05, |
|
"loss": 2.285, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4331863270218007e-05, |
|
"loss": 2.22, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4262301924066836e-05, |
|
"loss": 2.2589, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4192740577915664e-05, |
|
"loss": 2.2705, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.4123179231764497e-05, |
|
"loss": 2.304, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.4053617885613325e-05, |
|
"loss": 2.2499, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.398405653946215e-05, |
|
"loss": 2.3109, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3914495193310983e-05, |
|
"loss": 2.2772, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.3844933847159812e-05, |
|
"loss": 2.2835, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.377537250100864e-05, |
|
"loss": 2.298, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.370581115485747e-05, |
|
"loss": 2.2486, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.36362498087063e-05, |
|
"loss": 2.304, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.3566688462555127e-05, |
|
"loss": 2.2604, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.3497127116403956e-05, |
|
"loss": 2.2368, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.342756577025279e-05, |
|
"loss": 2.2736, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.3358004424101617e-05, |
|
"loss": 2.3323, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.3288443077950446e-05, |
|
"loss": 2.2373, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.3218881731799275e-05, |
|
"loss": 2.3072, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.3149320385648104e-05, |
|
"loss": 2.2919, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3079759039496933e-05, |
|
"loss": 2.2874, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3010197693345762e-05, |
|
"loss": 2.2723, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.294063634719459e-05, |
|
"loss": 2.3047, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.287107500104342e-05, |
|
"loss": 2.2542, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.2801513654892252e-05, |
|
"loss": 2.2426, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.273195230874108e-05, |
|
"loss": 2.2554, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.266239096258991e-05, |
|
"loss": 2.3179, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2592829616438738e-05, |
|
"loss": 2.2934, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2523268270287567e-05, |
|
"loss": 2.2707, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2453706924136396e-05, |
|
"loss": 2.311, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2384145577985228e-05, |
|
"loss": 2.3381, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2314584231834054e-05, |
|
"loss": 2.247, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2245022885682883e-05, |
|
"loss": 2.3296, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2175461539531715e-05, |
|
"loss": 2.2792, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2105900193380544e-05, |
|
"loss": 2.3004, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2036338847229373e-05, |
|
"loss": 2.2845, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.19667775010782e-05, |
|
"loss": 2.2623, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.189721615492703e-05, |
|
"loss": 2.2849, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.182765480877586e-05, |
|
"loss": 2.231, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.175809346262469e-05, |
|
"loss": 2.2894, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.168853211647352e-05, |
|
"loss": 2.2979, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1618970770322346e-05, |
|
"loss": 2.2961, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1549409424171178e-05, |
|
"loss": 2.2625, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1479848078020007e-05, |
|
"loss": 2.2742, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1410286731868836e-05, |
|
"loss": 2.2718, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1340725385717668e-05, |
|
"loss": 2.2341, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1271164039566493e-05, |
|
"loss": 2.2424, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1201602693415322e-05, |
|
"loss": 2.2147, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1132041347264154e-05, |
|
"loss": 2.3273, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1062480001112983e-05, |
|
"loss": 2.2852, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.0992918654961812e-05, |
|
"loss": 2.301, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.092335730881064e-05, |
|
"loss": 2.2577, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.085379596265947e-05, |
|
"loss": 2.3224, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.07842346165083e-05, |
|
"loss": 2.2808, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.071467327035713e-05, |
|
"loss": 2.369, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.064511192420596e-05, |
|
"loss": 2.2545, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0575550578054785e-05, |
|
"loss": 2.2866, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0505989231903618e-05, |
|
"loss": 2.2575, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0436427885752446e-05, |
|
"loss": 2.2734, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0366866539601275e-05, |
|
"loss": 2.3246, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0297305193450104e-05, |
|
"loss": 2.2856, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0227743847298933e-05, |
|
"loss": 2.2153, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0158182501147762e-05, |
|
"loss": 2.3101, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0088621154996594e-05, |
|
"loss": 2.256, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0019059808845423e-05, |
|
"loss": 2.2453, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.9949498462694252e-05, |
|
"loss": 2.2607, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.987993711654308e-05, |
|
"loss": 2.2636, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.981037577039191e-05, |
|
"loss": 2.2273, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.974081442424074e-05, |
|
"loss": 2.2384, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.9671253078089567e-05, |
|
"loss": 2.234, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.96016917319384e-05, |
|
"loss": 2.2819, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9532130385787225e-05, |
|
"loss": 2.2695, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9462569039636054e-05, |
|
"loss": 2.2836, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9393007693484886e-05, |
|
"loss": 2.2486, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9323446347333715e-05, |
|
"loss": 2.2448, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9253885001182544e-05, |
|
"loss": 2.3028, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9184323655031373e-05, |
|
"loss": 2.2855, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.91147623088802e-05, |
|
"loss": 2.2171, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.904520096272903e-05, |
|
"loss": 2.2834, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.8975639616577863e-05, |
|
"loss": 2.2743, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.890607827042669e-05, |
|
"loss": 2.2437, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8836516924275517e-05, |
|
"loss": 2.2791, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.876695557812435e-05, |
|
"loss": 2.2572, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8697394231973178e-05, |
|
"loss": 2.3147, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8627832885822007e-05, |
|
"loss": 2.2677, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.855827153967084e-05, |
|
"loss": 2.2691, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8488710193519665e-05, |
|
"loss": 2.2629, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8419148847368493e-05, |
|
"loss": 2.2621, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.8349587501217326e-05, |
|
"loss": 2.2666, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.8280026155066155e-05, |
|
"loss": 2.2626, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.8210464808914983e-05, |
|
"loss": 2.2453, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.8140903462763812e-05, |
|
"loss": 2.2943, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.807134211661264e-05, |
|
"loss": 2.2731, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.800178077046147e-05, |
|
"loss": 2.2229, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7932219424310302e-05, |
|
"loss": 2.2683, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.786265807815913e-05, |
|
"loss": 2.2642, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7793096732007957e-05, |
|
"loss": 2.2761, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.772353538585679e-05, |
|
"loss": 2.2218, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7653974039705618e-05, |
|
"loss": 2.2866, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7584412693554447e-05, |
|
"loss": 2.267, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.751485134740328e-05, |
|
"loss": 2.2521, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7445290001252104e-05, |
|
"loss": 2.2212, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7375728655100933e-05, |
|
"loss": 2.2599, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.7306167308949765e-05, |
|
"loss": 2.2767, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.7236605962798594e-05, |
|
"loss": 2.2473, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.716704461664742e-05, |
|
"loss": 2.2549, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.7097483270496252e-05, |
|
"loss": 2.2694, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.702792192434508e-05, |
|
"loss": 2.2805, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.695836057819391e-05, |
|
"loss": 2.2728, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.688879923204274e-05, |
|
"loss": 2.2649, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.6819237885891567e-05, |
|
"loss": 2.2667, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.6749676539740396e-05, |
|
"loss": 2.2807, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.668011519358923e-05, |
|
"loss": 2.2694, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6610553847438057e-05, |
|
"loss": 2.2407, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6540992501286886e-05, |
|
"loss": 2.2147, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6471431155135715e-05, |
|
"loss": 2.266, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6401869808984544e-05, |
|
"loss": 2.2406, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6332308462833373e-05, |
|
"loss": 2.2371, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.62627471166822e-05, |
|
"loss": 2.2532, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6193185770531034e-05, |
|
"loss": 2.2243, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.612362442437986e-05, |
|
"loss": 2.2837, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.605406307822869e-05, |
|
"loss": 2.2991, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.598450173207752e-05, |
|
"loss": 2.2166, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.591494038592635e-05, |
|
"loss": 2.2738, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5845379039775178e-05, |
|
"loss": 2.2654, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5775817693624007e-05, |
|
"loss": 2.2906, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5706256347472836e-05, |
|
"loss": 2.2701, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5636695001321665e-05, |
|
"loss": 2.2599, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.5567133655170497e-05, |
|
"loss": 2.2211, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.5497572309019326e-05, |
|
"loss": 2.2448, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.542801096286815e-05, |
|
"loss": 2.2691, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5358449616716984e-05, |
|
"loss": 2.2314, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5288888270565812e-05, |
|
"loss": 2.2875, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5219326924414643e-05, |
|
"loss": 2.2662, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5149765578263472e-05, |
|
"loss": 2.2468, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5080204232112299e-05, |
|
"loss": 2.2398, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.501064288596113e-05, |
|
"loss": 2.2485, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.4941081539809958e-05, |
|
"loss": 2.235, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4871520193658789e-05, |
|
"loss": 2.2683, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.480195884750762e-05, |
|
"loss": 2.2748, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4732397501356447e-05, |
|
"loss": 2.2722, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4662836155205275e-05, |
|
"loss": 2.256, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4593274809054106e-05, |
|
"loss": 2.2949, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4523713462902935e-05, |
|
"loss": 2.2466, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4454152116751765e-05, |
|
"loss": 2.2626, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4384590770600593e-05, |
|
"loss": 2.2383, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4315029424449421e-05, |
|
"loss": 2.2195, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4245468078298252e-05, |
|
"loss": 2.2826, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4175906732147083e-05, |
|
"loss": 2.2879, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4106345385995911e-05, |
|
"loss": 2.2546, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4036784039844739e-05, |
|
"loss": 2.3098, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.3967222693693569e-05, |
|
"loss": 2.2196, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.3897661347542398e-05, |
|
"loss": 2.2716, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.3828100001391229e-05, |
|
"loss": 2.283, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.3758538655240057e-05, |
|
"loss": 2.2332, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.3688977309088885e-05, |
|
"loss": 2.2702, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3619415962937715e-05, |
|
"loss": 2.2292, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3549854616786544e-05, |
|
"loss": 2.2587, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3480293270635375e-05, |
|
"loss": 2.2908, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3410731924484205e-05, |
|
"loss": 2.2374, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3341170578333032e-05, |
|
"loss": 2.202, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3271609232181861e-05, |
|
"loss": 2.2496, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3202047886030692e-05, |
|
"loss": 2.2488, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.313248653987952e-05, |
|
"loss": 2.2002, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.3062925193728351e-05, |
|
"loss": 2.2189, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.2993363847577178e-05, |
|
"loss": 2.2373, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.2923802501426007e-05, |
|
"loss": 2.2146, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2854241155274838e-05, |
|
"loss": 2.2442, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2784679809123668e-05, |
|
"loss": 2.2113, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2715118462972497e-05, |
|
"loss": 2.2127, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2645557116821324e-05, |
|
"loss": 2.2608, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2575995770670155e-05, |
|
"loss": 2.2913, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2506434424518984e-05, |
|
"loss": 2.2128, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2436873078367812e-05, |
|
"loss": 2.2995, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2367311732216643e-05, |
|
"loss": 2.1982, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2297750386065472e-05, |
|
"loss": 2.2296, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.22281890399143e-05, |
|
"loss": 2.2605, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2158627693763131e-05, |
|
"loss": 2.2234, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.2089066347611958e-05, |
|
"loss": 2.2545, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.2019505001460789e-05, |
|
"loss": 2.2522, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.1949943655309618e-05, |
|
"loss": 2.2098, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.1880382309158447e-05, |
|
"loss": 2.2733, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.1810820963007277e-05, |
|
"loss": 2.2238, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.1741259616856106e-05, |
|
"loss": 2.2313, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.1671698270704935e-05, |
|
"loss": 2.2784, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.1602136924553766e-05, |
|
"loss": 2.2708, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.1532575578402593e-05, |
|
"loss": 2.2571, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.1463014232251423e-05, |
|
"loss": 2.2817, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.1393452886100252e-05, |
|
"loss": 2.2155, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1323891539949081e-05, |
|
"loss": 2.2027, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1254330193797912e-05, |
|
"loss": 2.2216, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.118476884764674e-05, |
|
"loss": 2.2222, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.111520750149557e-05, |
|
"loss": 2.2636, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.1045646155344398e-05, |
|
"loss": 2.2968, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.0976084809193229e-05, |
|
"loss": 2.3037, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.0906523463042057e-05, |
|
"loss": 2.2548, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0836962116890886e-05, |
|
"loss": 2.2708, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0767400770739717e-05, |
|
"loss": 2.2342, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0697839424588544e-05, |
|
"loss": 2.2659, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0628278078437375e-05, |
|
"loss": 2.2394, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.0558716732286205e-05, |
|
"loss": 2.29, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.0489155386135032e-05, |
|
"loss": 2.2658, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.0419594039983863e-05, |
|
"loss": 2.2412, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0350032693832692e-05, |
|
"loss": 2.2438, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.028047134768152e-05, |
|
"loss": 2.2179, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0210910001530351e-05, |
|
"loss": 2.2669, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.014134865537918e-05, |
|
"loss": 2.1845, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.0071787309228009e-05, |
|
"loss": 2.239, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.0002225963076838e-05, |
|
"loss": 2.2727, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.932664616925667e-06, |
|
"loss": 2.2217, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.863103270774495e-06, |
|
"loss": 2.2662, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.793541924623326e-06, |
|
"loss": 2.2163, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.723980578472155e-06, |
|
"loss": 2.242, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.654419232320984e-06, |
|
"loss": 2.2967, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.584857886169814e-06, |
|
"loss": 2.2005, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.515296540018641e-06, |
|
"loss": 2.244, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.445735193867472e-06, |
|
"loss": 2.2641, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.376173847716303e-06, |
|
"loss": 2.2491, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.30661250156513e-06, |
|
"loss": 2.1801, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.23705115541396e-06, |
|
"loss": 2.2559, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.167489809262789e-06, |
|
"loss": 2.2147, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.097928463111618e-06, |
|
"loss": 2.2392, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.028367116960449e-06, |
|
"loss": 2.2059, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.958805770809277e-06, |
|
"loss": 2.2391, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.889244424658106e-06, |
|
"loss": 2.2566, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.819683078506935e-06, |
|
"loss": 2.1766, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.750121732355766e-06, |
|
"loss": 2.2256, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.680560386204594e-06, |
|
"loss": 2.2554, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.610999040053423e-06, |
|
"loss": 2.2409, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.541437693902254e-06, |
|
"loss": 2.2004, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.471876347751081e-06, |
|
"loss": 2.2624, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.402315001599912e-06, |
|
"loss": 2.2336, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.33275365544874e-06, |
|
"loss": 2.2002, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.26319230929757e-06, |
|
"loss": 2.2472, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.1936309631464e-06, |
|
"loss": 2.2573, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.124069616995229e-06, |
|
"loss": 2.2176, |
|
"step": 120400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.054508270844058e-06, |
|
"loss": 2.2385, |
|
"step": 120600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.984946924692888e-06, |
|
"loss": 2.2575, |
|
"step": 120800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.915385578541715e-06, |
|
"loss": 2.2158, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.845824232390546e-06, |
|
"loss": 2.2307, |
|
"step": 121200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.776262886239375e-06, |
|
"loss": 2.2312, |
|
"step": 121400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.706701540088204e-06, |
|
"loss": 2.251, |
|
"step": 121600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.637140193937034e-06, |
|
"loss": 2.2864, |
|
"step": 121800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.567578847785862e-06, |
|
"loss": 2.2243, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.498017501634693e-06, |
|
"loss": 2.252, |
|
"step": 122200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.428456155483521e-06, |
|
"loss": 2.2009, |
|
"step": 122400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.35889480933235e-06, |
|
"loss": 2.2145, |
|
"step": 122600 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.28933346318118e-06, |
|
"loss": 2.2335, |
|
"step": 122800 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.219772117030009e-06, |
|
"loss": 2.2516, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.150210770878839e-06, |
|
"loss": 2.2146, |
|
"step": 123200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.0806494247276675e-06, |
|
"loss": 2.2304, |
|
"step": 123400 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.011088078576497e-06, |
|
"loss": 2.1978, |
|
"step": 123600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.941526732425327e-06, |
|
"loss": 2.2488, |
|
"step": 123800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.871965386274155e-06, |
|
"loss": 2.2347, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.8024040401229855e-06, |
|
"loss": 2.2496, |
|
"step": 124200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.7328426939718135e-06, |
|
"loss": 2.2366, |
|
"step": 124400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.663281347820643e-06, |
|
"loss": 2.1691, |
|
"step": 124600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.593720001669472e-06, |
|
"loss": 2.205, |
|
"step": 124800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.524158655518302e-06, |
|
"loss": 2.2107, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.4545973093671315e-06, |
|
"loss": 2.2188, |
|
"step": 125200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.38503596321596e-06, |
|
"loss": 2.2381, |
|
"step": 125400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.31547461706479e-06, |
|
"loss": 2.2638, |
|
"step": 125600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.245913270913619e-06, |
|
"loss": 2.2688, |
|
"step": 125800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.176351924762449e-06, |
|
"loss": 2.1799, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.1067905786112775e-06, |
|
"loss": 2.2184, |
|
"step": 126200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.037229232460106e-06, |
|
"loss": 2.2297, |
|
"step": 126400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5.967667886308936e-06, |
|
"loss": 2.2882, |
|
"step": 126600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5.898106540157766e-06, |
|
"loss": 2.2388, |
|
"step": 126800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.828545194006595e-06, |
|
"loss": 2.2356, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.7589838478554234e-06, |
|
"loss": 2.2427, |
|
"step": 127200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.689422501704253e-06, |
|
"loss": 2.2371, |
|
"step": 127400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.619861155553083e-06, |
|
"loss": 2.2639, |
|
"step": 127600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.550299809401912e-06, |
|
"loss": 2.2532, |
|
"step": 127800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.480738463250741e-06, |
|
"loss": 2.2085, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.41117711709957e-06, |
|
"loss": 2.263, |
|
"step": 128200 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.341615770948399e-06, |
|
"loss": 2.1695, |
|
"step": 128400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.272054424797229e-06, |
|
"loss": 2.2055, |
|
"step": 128600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.2024930786460585e-06, |
|
"loss": 2.2349, |
|
"step": 128800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.132931732494887e-06, |
|
"loss": 2.2024, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.063370386343716e-06, |
|
"loss": 2.2263, |
|
"step": 129200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.993809040192546e-06, |
|
"loss": 2.2084, |
|
"step": 129400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.924247694041376e-06, |
|
"loss": 2.2397, |
|
"step": 129600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.8546863478902045e-06, |
|
"loss": 2.2455, |
|
"step": 129800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.785125001739034e-06, |
|
"loss": 2.2159, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.715563655587863e-06, |
|
"loss": 2.1978, |
|
"step": 130200 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.646002309436692e-06, |
|
"loss": 2.2635, |
|
"step": 130400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.576440963285522e-06, |
|
"loss": 2.2005, |
|
"step": 130600 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.506879617134351e-06, |
|
"loss": 2.2388, |
|
"step": 130800 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.43731827098318e-06, |
|
"loss": 2.2142, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.36775692483201e-06, |
|
"loss": 2.2437, |
|
"step": 131200 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.298195578680839e-06, |
|
"loss": 2.2269, |
|
"step": 131400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.228634232529668e-06, |
|
"loss": 2.1651, |
|
"step": 131600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.159072886378497e-06, |
|
"loss": 2.2552, |
|
"step": 131800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.089511540227327e-06, |
|
"loss": 2.2328, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.019950194076156e-06, |
|
"loss": 2.2267, |
|
"step": 132200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.950388847924985e-06, |
|
"loss": 2.138, |
|
"step": 132400 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.8808275017738145e-06, |
|
"loss": 2.1773, |
|
"step": 132600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.811266155622644e-06, |
|
"loss": 2.1719, |
|
"step": 132800 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.741704809471473e-06, |
|
"loss": 2.2124, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.6721434633203023e-06, |
|
"loss": 2.2293, |
|
"step": 133200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.6025821171691316e-06, |
|
"loss": 2.1802, |
|
"step": 133400 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.533020771017961e-06, |
|
"loss": 2.1823, |
|
"step": 133600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.4634594248667906e-06, |
|
"loss": 2.2114, |
|
"step": 133800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.39389807871562e-06, |
|
"loss": 2.2102, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.3243367325644487e-06, |
|
"loss": 2.2313, |
|
"step": 134200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.254775386413278e-06, |
|
"loss": 2.2214, |
|
"step": 134400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.1852140402621073e-06, |
|
"loss": 2.2491, |
|
"step": 134600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.1156526941109365e-06, |
|
"loss": 2.1957, |
|
"step": 134800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.046091347959766e-06, |
|
"loss": 2.251, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.976530001808595e-06, |
|
"loss": 2.1941, |
|
"step": 135200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.9069686556574244e-06, |
|
"loss": 2.231, |
|
"step": 135400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.8374073095062537e-06, |
|
"loss": 2.1893, |
|
"step": 135600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.767845963355083e-06, |
|
"loss": 2.2114, |
|
"step": 135800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.6982846172039122e-06, |
|
"loss": 2.2375, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.6287232710527415e-06, |
|
"loss": 2.1917, |
|
"step": 136200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.559161924901571e-06, |
|
"loss": 2.2227, |
|
"step": 136400 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.4896005787504e-06, |
|
"loss": 2.2534, |
|
"step": 136600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.4200392325992294e-06, |
|
"loss": 2.2529, |
|
"step": 136800 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.3504778864480586e-06, |
|
"loss": 2.2046, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.280916540296888e-06, |
|
"loss": 2.262, |
|
"step": 137200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.211355194145717e-06, |
|
"loss": 2.1998, |
|
"step": 137400 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.1417938479945465e-06, |
|
"loss": 2.2093, |
|
"step": 137600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0722325018433758e-06, |
|
"loss": 2.204, |
|
"step": 137800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.002671155692205e-06, |
|
"loss": 2.234, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9331098095410343e-06, |
|
"loss": 2.2064, |
|
"step": 138200 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8635484633898636e-06, |
|
"loss": 2.2288, |
|
"step": 138400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7939871172386929e-06, |
|
"loss": 2.1994, |
|
"step": 138600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7244257710875224e-06, |
|
"loss": 2.2235, |
|
"step": 138800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.6548644249363515e-06, |
|
"loss": 2.2251, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.5853030787851805e-06, |
|
"loss": 2.2068, |
|
"step": 139200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.51574173263401e-06, |
|
"loss": 2.1716, |
|
"step": 139400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.4461803864828393e-06, |
|
"loss": 2.229, |
|
"step": 139600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3766190403316686e-06, |
|
"loss": 2.2169, |
|
"step": 139800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3070576941804979e-06, |
|
"loss": 2.1852, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.2374963480293271e-06, |
|
"loss": 2.2213, |
|
"step": 140200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.1679350018781564e-06, |
|
"loss": 2.2633, |
|
"step": 140400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0983736557269857e-06, |
|
"loss": 2.1825, |
|
"step": 140600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.028812309575815e-06, |
|
"loss": 2.2269, |
|
"step": 140800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.592509634246443e-07, |
|
"loss": 2.2141, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.896896172734734e-07, |
|
"loss": 2.2351, |
|
"step": 141200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.201282711223028e-07, |
|
"loss": 2.2102, |
|
"step": 141400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.505669249711321e-07, |
|
"loss": 2.1856, |
|
"step": 141600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.810055788199613e-07, |
|
"loss": 2.2066, |
|
"step": 141800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.114442326687907e-07, |
|
"loss": 2.2295, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.418828865176198e-07, |
|
"loss": 2.1887, |
|
"step": 142200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.7232154036644923e-07, |
|
"loss": 2.1985, |
|
"step": 142400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.0276019421527846e-07, |
|
"loss": 2.2791, |
|
"step": 142600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.3319884806410774e-07, |
|
"loss": 2.2265, |
|
"step": 142800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.63637501912937e-07, |
|
"loss": 2.2137, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.940761557617663e-07, |
|
"loss": 2.253, |
|
"step": 143200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2451480961059558e-07, |
|
"loss": 2.2167, |
|
"step": 143400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.495346345942487e-08, |
|
"loss": 2.2104, |
|
"step": 143600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 143758, |
|
"total_flos": 1.502524253059154e+17, |
|
"train_loss": 2.352252098808129, |
|
"train_runtime": 164976.46, |
|
"train_samples_per_second": 3.486, |
|
"train_steps_per_second": 0.871 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 143758, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 1.502524253059154e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|