|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 200, |
|
"global_step": 978, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002044989775051125, |
|
"grad_norm": 2.8995674216310645, |
|
"learning_rate": 9.999974203447434e-06, |
|
"loss": 0.097, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00408997955010225, |
|
"grad_norm": 2.0878590818900724, |
|
"learning_rate": 9.999896814055916e-06, |
|
"loss": 0.0793, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006134969325153374, |
|
"grad_norm": 3.252004392065858, |
|
"learning_rate": 9.999767832624e-06, |
|
"loss": 0.1446, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0081799591002045, |
|
"grad_norm": 2.1719657405930333, |
|
"learning_rate": 9.999587260482597e-06, |
|
"loss": 0.0606, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010224948875255624, |
|
"grad_norm": 1.5951295112804458, |
|
"learning_rate": 9.999355099494961e-06, |
|
"loss": 0.0543, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012269938650306749, |
|
"grad_norm": 2.0082268910826957, |
|
"learning_rate": 9.999071352056676e-06, |
|
"loss": 0.0752, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014314928425357873, |
|
"grad_norm": 1.9536326911273243, |
|
"learning_rate": 9.998736021095621e-06, |
|
"loss": 0.0453, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016359918200409, |
|
"grad_norm": 2.13634714300749, |
|
"learning_rate": 9.99834911007195e-06, |
|
"loss": 0.0732, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018404907975460124, |
|
"grad_norm": 1.920732150945499, |
|
"learning_rate": 9.99791062297805e-06, |
|
"loss": 0.0541, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02044989775051125, |
|
"grad_norm": 2.1324187216203034, |
|
"learning_rate": 9.99742056433851e-06, |
|
"loss": 0.0549, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022494887525562373, |
|
"grad_norm": 2.919114524687416, |
|
"learning_rate": 9.99687893921005e-06, |
|
"loss": 0.0895, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.024539877300613498, |
|
"grad_norm": 1.899625115074746, |
|
"learning_rate": 9.996285753181499e-06, |
|
"loss": 0.0589, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.026584867075664622, |
|
"grad_norm": 2.5554509832362973, |
|
"learning_rate": 9.99564101237372e-06, |
|
"loss": 0.0785, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028629856850715747, |
|
"grad_norm": 2.4318482065803666, |
|
"learning_rate": 9.994944723439546e-06, |
|
"loss": 0.0784, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03067484662576687, |
|
"grad_norm": 3.583468004202154, |
|
"learning_rate": 9.994196893563722e-06, |
|
"loss": 0.1125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032719836400818, |
|
"grad_norm": 1.4181812641718199, |
|
"learning_rate": 9.993397530462818e-06, |
|
"loss": 0.0397, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.034764826175869123, |
|
"grad_norm": 1.8010048779280416, |
|
"learning_rate": 9.99254664238516e-06, |
|
"loss": 0.0575, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03680981595092025, |
|
"grad_norm": 2.1503927037059385, |
|
"learning_rate": 9.991644238110741e-06, |
|
"loss": 0.0665, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03885480572597137, |
|
"grad_norm": 1.8100049883218121, |
|
"learning_rate": 9.990690326951126e-06, |
|
"loss": 0.0682, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0408997955010225, |
|
"grad_norm": 2.3966939056398266, |
|
"learning_rate": 9.989684918749365e-06, |
|
"loss": 0.0846, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04294478527607362, |
|
"grad_norm": 1.918166279143656, |
|
"learning_rate": 9.988628023879883e-06, |
|
"loss": 0.0668, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.044989775051124746, |
|
"grad_norm": 1.7912977784419148, |
|
"learning_rate": 9.98751965324838e-06, |
|
"loss": 0.0729, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04703476482617587, |
|
"grad_norm": 1.9098490695074073, |
|
"learning_rate": 9.986359818291706e-06, |
|
"loss": 0.0733, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.049079754601226995, |
|
"grad_norm": 2.2200718894862805, |
|
"learning_rate": 9.985148530977767e-06, |
|
"loss": 0.0723, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05112474437627812, |
|
"grad_norm": 1.8085849304791404, |
|
"learning_rate": 9.983885803805373e-06, |
|
"loss": 0.0713, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.053169734151329244, |
|
"grad_norm": 2.5900909947296507, |
|
"learning_rate": 9.982571649804126e-06, |
|
"loss": 0.0805, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05521472392638037, |
|
"grad_norm": 2.557173352737123, |
|
"learning_rate": 9.981206082534287e-06, |
|
"loss": 0.0849, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05725971370143149, |
|
"grad_norm": 2.3095562915819503, |
|
"learning_rate": 9.979789116086625e-06, |
|
"loss": 0.0848, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05930470347648262, |
|
"grad_norm": 1.652313462404793, |
|
"learning_rate": 9.97832076508228e-06, |
|
"loss": 0.057, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06134969325153374, |
|
"grad_norm": 3.3750373556197752, |
|
"learning_rate": 9.976801044672608e-06, |
|
"loss": 0.1154, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06339468302658487, |
|
"grad_norm": 2.7053744260152803, |
|
"learning_rate": 9.97522997053903e-06, |
|
"loss": 0.0841, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.065439672801636, |
|
"grad_norm": 2.1510005490299497, |
|
"learning_rate": 9.973607558892864e-06, |
|
"loss": 0.0732, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06748466257668712, |
|
"grad_norm": 2.1823073488659324, |
|
"learning_rate": 9.971933826475162e-06, |
|
"loss": 0.0776, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06952965235173825, |
|
"grad_norm": 2.0539979554320817, |
|
"learning_rate": 9.970208790556531e-06, |
|
"loss": 0.0688, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07157464212678936, |
|
"grad_norm": 1.6876619685011311, |
|
"learning_rate": 9.968432468936967e-06, |
|
"loss": 0.0608, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0736196319018405, |
|
"grad_norm": 3.0575087238752805, |
|
"learning_rate": 9.966604879945659e-06, |
|
"loss": 0.12, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07566462167689161, |
|
"grad_norm": 2.414478148852492, |
|
"learning_rate": 9.964726042440802e-06, |
|
"loss": 0.0958, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07770961145194274, |
|
"grad_norm": 2.173225061106067, |
|
"learning_rate": 9.962795975809411e-06, |
|
"loss": 0.084, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07975460122699386, |
|
"grad_norm": 2.040698856807742, |
|
"learning_rate": 9.960814699967112e-06, |
|
"loss": 0.0794, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.081799591002045, |
|
"grad_norm": 2.249606373953477, |
|
"learning_rate": 9.958782235357938e-06, |
|
"loss": 0.0951, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08384458077709611, |
|
"grad_norm": 2.5979000902419895, |
|
"learning_rate": 9.956698602954124e-06, |
|
"loss": 0.1029, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08588957055214724, |
|
"grad_norm": 2.1602269446719644, |
|
"learning_rate": 9.954563824255879e-06, |
|
"loss": 0.0901, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08793456032719836, |
|
"grad_norm": 1.8153325069101112, |
|
"learning_rate": 9.952377921291179e-06, |
|
"loss": 0.0623, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08997955010224949, |
|
"grad_norm": 2.7967114830172615, |
|
"learning_rate": 9.950140916615526e-06, |
|
"loss": 0.1192, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09202453987730061, |
|
"grad_norm": 2.0707153248622827, |
|
"learning_rate": 9.947852833311725e-06, |
|
"loss": 0.0846, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09406952965235174, |
|
"grad_norm": 2.1452757583479474, |
|
"learning_rate": 9.94551369498964e-06, |
|
"loss": 0.0875, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09611451942740286, |
|
"grad_norm": 2.3194318990073923, |
|
"learning_rate": 9.943123525785952e-06, |
|
"loss": 0.0921, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09815950920245399, |
|
"grad_norm": 1.798820349857878, |
|
"learning_rate": 9.940682350363913e-06, |
|
"loss": 0.0592, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10020449897750511, |
|
"grad_norm": 1.8591670519797276, |
|
"learning_rate": 9.938190193913084e-06, |
|
"loss": 0.0757, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10224948875255624, |
|
"grad_norm": 1.8617586001231685, |
|
"learning_rate": 9.935647082149088e-06, |
|
"loss": 0.0677, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10429447852760736, |
|
"grad_norm": 2.402863095839252, |
|
"learning_rate": 9.933053041313325e-06, |
|
"loss": 0.0873, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10633946830265849, |
|
"grad_norm": 2.2249519855906756, |
|
"learning_rate": 9.930408098172725e-06, |
|
"loss": 0.0912, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1083844580777096, |
|
"grad_norm": 2.1251826013803323, |
|
"learning_rate": 9.92771228001945e-06, |
|
"loss": 0.076, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11042944785276074, |
|
"grad_norm": 1.9764253903583366, |
|
"learning_rate": 9.924965614670629e-06, |
|
"loss": 0.0784, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11247443762781185, |
|
"grad_norm": 1.8078917942604569, |
|
"learning_rate": 9.92216813046806e-06, |
|
"loss": 0.0667, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11451942740286299, |
|
"grad_norm": 2.5631523625105372, |
|
"learning_rate": 9.919319856277921e-06, |
|
"loss": 0.1003, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1165644171779141, |
|
"grad_norm": 2.066653670325792, |
|
"learning_rate": 9.916420821490474e-06, |
|
"loss": 0.0756, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11860940695296524, |
|
"grad_norm": 2.5780966602305693, |
|
"learning_rate": 9.91347105601976e-06, |
|
"loss": 0.0984, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12065439672801637, |
|
"grad_norm": 2.219344023968354, |
|
"learning_rate": 9.910470590303294e-06, |
|
"loss": 0.0789, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 2.642779106386566, |
|
"learning_rate": 9.90741945530174e-06, |
|
"loss": 0.078, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12474437627811862, |
|
"grad_norm": 1.8439341873720778, |
|
"learning_rate": 9.904317682498609e-06, |
|
"loss": 0.0725, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12678936605316973, |
|
"grad_norm": 2.1976218170570783, |
|
"learning_rate": 9.901165303899916e-06, |
|
"loss": 0.1094, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12883435582822086, |
|
"grad_norm": 2.4577264483674166, |
|
"learning_rate": 9.89796235203386e-06, |
|
"loss": 0.0922, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.130879345603272, |
|
"grad_norm": 3.012519841445848, |
|
"learning_rate": 9.89470885995049e-06, |
|
"loss": 0.1109, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1329243353783231, |
|
"grad_norm": 2.248540711936193, |
|
"learning_rate": 9.891404861221356e-06, |
|
"loss": 0.0892, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13496932515337423, |
|
"grad_norm": 2.3347058109208825, |
|
"learning_rate": 9.888050389939172e-06, |
|
"loss": 0.0851, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13701431492842536, |
|
"grad_norm": 2.460632130242845, |
|
"learning_rate": 9.884645480717452e-06, |
|
"loss": 0.0967, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1390593047034765, |
|
"grad_norm": 1.8587061916271175, |
|
"learning_rate": 9.881190168690164e-06, |
|
"loss": 0.0661, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1411042944785276, |
|
"grad_norm": 2.813362612221172, |
|
"learning_rate": 9.877684489511367e-06, |
|
"loss": 0.1079, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14314928425357873, |
|
"grad_norm": 2.7724880857855085, |
|
"learning_rate": 9.874128479354833e-06, |
|
"loss": 0.0865, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14519427402862986, |
|
"grad_norm": 2.0084192749000223, |
|
"learning_rate": 9.870522174913683e-06, |
|
"loss": 0.0811, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.147239263803681, |
|
"grad_norm": 1.901062419637755, |
|
"learning_rate": 9.866865613400008e-06, |
|
"loss": 0.0834, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1492842535787321, |
|
"grad_norm": 2.143697771944517, |
|
"learning_rate": 9.863158832544477e-06, |
|
"loss": 0.0967, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15132924335378323, |
|
"grad_norm": 1.8252931029432322, |
|
"learning_rate": 9.859401870595959e-06, |
|
"loss": 0.0725, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15337423312883436, |
|
"grad_norm": 1.9307281956151774, |
|
"learning_rate": 9.855594766321122e-06, |
|
"loss": 0.077, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1554192229038855, |
|
"grad_norm": 2.2429643925966993, |
|
"learning_rate": 9.85173755900403e-06, |
|
"loss": 0.0891, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1574642126789366, |
|
"grad_norm": 1.8200761545917128, |
|
"learning_rate": 9.847830288445745e-06, |
|
"loss": 0.0785, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15950920245398773, |
|
"grad_norm": 1.8916674815016423, |
|
"learning_rate": 9.843872994963912e-06, |
|
"loss": 0.0755, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16155419222903886, |
|
"grad_norm": 2.0741375008009655, |
|
"learning_rate": 9.83986571939234e-06, |
|
"loss": 0.0744, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16359918200409, |
|
"grad_norm": 1.7919605782077757, |
|
"learning_rate": 9.835808503080586e-06, |
|
"loss": 0.0757, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1656441717791411, |
|
"grad_norm": 1.950729934719885, |
|
"learning_rate": 9.831701387893533e-06, |
|
"loss": 0.0815, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16768916155419222, |
|
"grad_norm": 2.124785118083205, |
|
"learning_rate": 9.82754441621094e-06, |
|
"loss": 0.0807, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16973415132924335, |
|
"grad_norm": 2.053195322602257, |
|
"learning_rate": 9.823337630927027e-06, |
|
"loss": 0.0902, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17177914110429449, |
|
"grad_norm": 2.5090758861647826, |
|
"learning_rate": 9.819081075450014e-06, |
|
"loss": 0.0873, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1738241308793456, |
|
"grad_norm": 2.137957503401185, |
|
"learning_rate": 9.814774793701686e-06, |
|
"loss": 0.092, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17586912065439672, |
|
"grad_norm": 2.230490758825473, |
|
"learning_rate": 9.810418830116933e-06, |
|
"loss": 0.0833, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17791411042944785, |
|
"grad_norm": 2.012709266353046, |
|
"learning_rate": 9.80601322964329e-06, |
|
"loss": 0.0877, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17995910020449898, |
|
"grad_norm": 2.572752374501912, |
|
"learning_rate": 9.80155803774048e-06, |
|
"loss": 0.1141, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18200408997955012, |
|
"grad_norm": 1.5628909847161165, |
|
"learning_rate": 9.797053300379938e-06, |
|
"loss": 0.0672, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18404907975460122, |
|
"grad_norm": 1.8013050781356985, |
|
"learning_rate": 9.792499064044343e-06, |
|
"loss": 0.0804, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18609406952965235, |
|
"grad_norm": 2.128417350277261, |
|
"learning_rate": 9.787895375727137e-06, |
|
"loss": 0.0903, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18813905930470348, |
|
"grad_norm": 2.6231742831814255, |
|
"learning_rate": 9.783242282932028e-06, |
|
"loss": 0.0991, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1901840490797546, |
|
"grad_norm": 2.14671431766684, |
|
"learning_rate": 9.778539833672525e-06, |
|
"loss": 0.0844, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19222903885480572, |
|
"grad_norm": 1.668300942440577, |
|
"learning_rate": 9.773788076471415e-06, |
|
"loss": 0.0677, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19427402862985685, |
|
"grad_norm": 1.6611049562639426, |
|
"learning_rate": 9.76898706036028e-06, |
|
"loss": 0.0815, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19631901840490798, |
|
"grad_norm": 1.7467281372812702, |
|
"learning_rate": 9.764136834878987e-06, |
|
"loss": 0.0802, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1983640081799591, |
|
"grad_norm": 2.0082876640493525, |
|
"learning_rate": 9.759237450075174e-06, |
|
"loss": 0.0845, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.20040899795501022, |
|
"grad_norm": 1.6218133242260213, |
|
"learning_rate": 9.754288956503737e-06, |
|
"loss": 0.0792, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.20245398773006135, |
|
"grad_norm": 1.8693374042253028, |
|
"learning_rate": 9.749291405226304e-06, |
|
"loss": 0.089, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.20449897750511248, |
|
"grad_norm": 2.3402858038101337, |
|
"learning_rate": 9.744244847810716e-06, |
|
"loss": 0.0945, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2065439672801636, |
|
"grad_norm": 2.400216651654056, |
|
"learning_rate": 9.739149336330482e-06, |
|
"loss": 0.0994, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2085889570552147, |
|
"grad_norm": 1.9932426008301034, |
|
"learning_rate": 9.734004923364258e-06, |
|
"loss": 0.0813, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.21063394683026584, |
|
"grad_norm": 1.8232352554241547, |
|
"learning_rate": 9.728811661995287e-06, |
|
"loss": 0.0833, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.21267893660531698, |
|
"grad_norm": 1.774918510432305, |
|
"learning_rate": 9.72356960581087e-06, |
|
"loss": 0.0853, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2147239263803681, |
|
"grad_norm": 2.987329389159815, |
|
"learning_rate": 9.718278808901797e-06, |
|
"loss": 0.1114, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2167689161554192, |
|
"grad_norm": 2.248351378515216, |
|
"learning_rate": 9.712939325861794e-06, |
|
"loss": 0.0826, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21881390593047034, |
|
"grad_norm": 2.218767795388457, |
|
"learning_rate": 9.707551211786966e-06, |
|
"loss": 0.088, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.22085889570552147, |
|
"grad_norm": 2.3431433008509917, |
|
"learning_rate": 9.702114522275216e-06, |
|
"loss": 0.0897, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2229038854805726, |
|
"grad_norm": 1.9166897788167856, |
|
"learning_rate": 9.696629313425688e-06, |
|
"loss": 0.088, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2249488752556237, |
|
"grad_norm": 1.9440115291462636, |
|
"learning_rate": 9.691095641838168e-06, |
|
"loss": 0.0836, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22699386503067484, |
|
"grad_norm": 1.813961610317634, |
|
"learning_rate": 9.685513564612521e-06, |
|
"loss": 0.078, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22903885480572597, |
|
"grad_norm": 1.8809059426216883, |
|
"learning_rate": 9.679883139348082e-06, |
|
"loss": 0.0821, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2310838445807771, |
|
"grad_norm": 2.2311254705001233, |
|
"learning_rate": 9.674204424143079e-06, |
|
"loss": 0.0883, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2331288343558282, |
|
"grad_norm": 1.9295136215801372, |
|
"learning_rate": 9.668477477594021e-06, |
|
"loss": 0.0833, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.23517382413087934, |
|
"grad_norm": 1.8615614639144564, |
|
"learning_rate": 9.662702358795098e-06, |
|
"loss": 0.0822, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23721881390593047, |
|
"grad_norm": 1.8761973618596817, |
|
"learning_rate": 9.656879127337571e-06, |
|
"loss": 0.0785, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2392638036809816, |
|
"grad_norm": 2.017270471451727, |
|
"learning_rate": 9.651007843309164e-06, |
|
"loss": 0.0878, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24130879345603273, |
|
"grad_norm": 2.1414773647169936, |
|
"learning_rate": 9.645088567293426e-06, |
|
"loss": 0.0932, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24335378323108384, |
|
"grad_norm": 1.7284124634354323, |
|
"learning_rate": 9.639121360369127e-06, |
|
"loss": 0.0683, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.24539877300613497, |
|
"grad_norm": 2.3422614186852577, |
|
"learning_rate": 9.633106284109612e-06, |
|
"loss": 0.1061, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2474437627811861, |
|
"grad_norm": 1.9680728218006462, |
|
"learning_rate": 9.627043400582173e-06, |
|
"loss": 0.0832, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24948875255623723, |
|
"grad_norm": 1.744621659832594, |
|
"learning_rate": 9.620932772347408e-06, |
|
"loss": 0.0716, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.25153374233128833, |
|
"grad_norm": 2.003659281799268, |
|
"learning_rate": 9.614774462458573e-06, |
|
"loss": 0.0943, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.25357873210633947, |
|
"grad_norm": 1.9112829391643362, |
|
"learning_rate": 9.608568534460938e-06, |
|
"loss": 0.0791, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2556237218813906, |
|
"grad_norm": 1.6018069748701698, |
|
"learning_rate": 9.602315052391116e-06, |
|
"loss": 0.0699, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25766871165644173, |
|
"grad_norm": 1.9898564316497316, |
|
"learning_rate": 9.596014080776424e-06, |
|
"loss": 0.0868, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.25971370143149286, |
|
"grad_norm": 1.9062653706577775, |
|
"learning_rate": 9.589665684634197e-06, |
|
"loss": 0.0797, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.261758691206544, |
|
"grad_norm": 2.105685404483493, |
|
"learning_rate": 9.583269929471129e-06, |
|
"loss": 0.0802, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.26380368098159507, |
|
"grad_norm": 1.8889444529306618, |
|
"learning_rate": 9.576826881282595e-06, |
|
"loss": 0.0773, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2658486707566462, |
|
"grad_norm": 1.89509366954467, |
|
"learning_rate": 9.570336606551966e-06, |
|
"loss": 0.0845, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26789366053169733, |
|
"grad_norm": 2.5730619597875792, |
|
"learning_rate": 9.56379917224993e-06, |
|
"loss": 0.1218, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26993865030674846, |
|
"grad_norm": 3.174335117295452, |
|
"learning_rate": 9.557214645833792e-06, |
|
"loss": 0.1396, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2719836400817996, |
|
"grad_norm": 1.506901278245754, |
|
"learning_rate": 9.550583095246786e-06, |
|
"loss": 0.0631, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2740286298568507, |
|
"grad_norm": 2.3300783174234887, |
|
"learning_rate": 9.543904588917366e-06, |
|
"loss": 0.109, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.27607361963190186, |
|
"grad_norm": 1.8554323699407922, |
|
"learning_rate": 9.537179195758513e-06, |
|
"loss": 0.0746, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.278118609406953, |
|
"grad_norm": 1.4907022435447066, |
|
"learning_rate": 9.530406985167005e-06, |
|
"loss": 0.0712, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.28016359918200406, |
|
"grad_norm": 1.7196544870819945, |
|
"learning_rate": 9.523588027022721e-06, |
|
"loss": 0.075, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2822085889570552, |
|
"grad_norm": 1.7344914939658451, |
|
"learning_rate": 9.516722391687903e-06, |
|
"loss": 0.0856, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2842535787321063, |
|
"grad_norm": 2.1773597101038087, |
|
"learning_rate": 9.50981015000644e-06, |
|
"loss": 0.0929, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28629856850715746, |
|
"grad_norm": 2.0166181602910376, |
|
"learning_rate": 9.502851373303137e-06, |
|
"loss": 0.0892, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2883435582822086, |
|
"grad_norm": 2.0996295005016483, |
|
"learning_rate": 9.495846133382973e-06, |
|
"loss": 0.085, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2903885480572597, |
|
"grad_norm": 2.09058564013836, |
|
"learning_rate": 9.488794502530361e-06, |
|
"loss": 0.0872, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.29243353783231085, |
|
"grad_norm": 1.8321276625056864, |
|
"learning_rate": 9.481696553508411e-06, |
|
"loss": 0.0927, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.294478527607362, |
|
"grad_norm": 1.918438250366742, |
|
"learning_rate": 9.474552359558167e-06, |
|
"loss": 0.0744, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2965235173824131, |
|
"grad_norm": 2.327981634380635, |
|
"learning_rate": 9.46736199439786e-06, |
|
"loss": 0.1025, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2985685071574642, |
|
"grad_norm": 2.2135170524903995, |
|
"learning_rate": 9.460125532222142e-06, |
|
"loss": 0.09, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3006134969325153, |
|
"grad_norm": 2.2539230814408073, |
|
"learning_rate": 9.452843047701324e-06, |
|
"loss": 0.1023, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.30265848670756645, |
|
"grad_norm": 2.104687258049424, |
|
"learning_rate": 9.445514615980604e-06, |
|
"loss": 0.0905, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3047034764826176, |
|
"grad_norm": 1.7372025147408934, |
|
"learning_rate": 9.438140312679292e-06, |
|
"loss": 0.0849, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3067484662576687, |
|
"grad_norm": 2.0671665965859662, |
|
"learning_rate": 9.43072021389003e-06, |
|
"loss": 0.0924, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.30879345603271985, |
|
"grad_norm": 1.6350351491282862, |
|
"learning_rate": 9.423254396178003e-06, |
|
"loss": 0.0769, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.310838445807771, |
|
"grad_norm": 2.878396608282762, |
|
"learning_rate": 9.415742936580156e-06, |
|
"loss": 0.1538, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3128834355828221, |
|
"grad_norm": 1.4213578692087034, |
|
"learning_rate": 9.408185912604395e-06, |
|
"loss": 0.065, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3149284253578732, |
|
"grad_norm": 2.0855996921354, |
|
"learning_rate": 9.400583402228785e-06, |
|
"loss": 0.0844, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3169734151329243, |
|
"grad_norm": 1.7352864078553754, |
|
"learning_rate": 9.39293548390075e-06, |
|
"loss": 0.0853, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.31901840490797545, |
|
"grad_norm": 1.334038745461943, |
|
"learning_rate": 9.385242236536259e-06, |
|
"loss": 0.0656, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3210633946830266, |
|
"grad_norm": 2.174575475791565, |
|
"learning_rate": 9.377503739519019e-06, |
|
"loss": 0.0991, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3231083844580777, |
|
"grad_norm": 1.6357643314755432, |
|
"learning_rate": 9.369720072699648e-06, |
|
"loss": 0.0792, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.32515337423312884, |
|
"grad_norm": 2.316934261247635, |
|
"learning_rate": 9.36189131639485e-06, |
|
"loss": 0.1112, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.32719836400818, |
|
"grad_norm": 1.9234234290855614, |
|
"learning_rate": 9.354017551386599e-06, |
|
"loss": 0.0851, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3292433537832311, |
|
"grad_norm": 2.475496525507223, |
|
"learning_rate": 9.346098858921292e-06, |
|
"loss": 0.1062, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3312883435582822, |
|
"grad_norm": 2.3268380138649487, |
|
"learning_rate": 9.338135320708912e-06, |
|
"loss": 0.1035, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.5336893905703746, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.0702, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.33537832310838445, |
|
"grad_norm": 2.8082604544179035, |
|
"learning_rate": 9.32207403619577e-06, |
|
"loss": 0.1209, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3374233128834356, |
|
"grad_norm": 1.5750634984249117, |
|
"learning_rate": 9.313976455625316e-06, |
|
"loss": 0.0713, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3394683026584867, |
|
"grad_norm": 2.2373522766525262, |
|
"learning_rate": 9.305834360766695e-06, |
|
"loss": 0.0969, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.34151329243353784, |
|
"grad_norm": 2.342451381996767, |
|
"learning_rate": 9.297647835635102e-06, |
|
"loss": 0.0934, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.34355828220858897, |
|
"grad_norm": 1.936610520437153, |
|
"learning_rate": 9.289416964704186e-06, |
|
"loss": 0.0883, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3456032719836401, |
|
"grad_norm": 1.8338353993342575, |
|
"learning_rate": 9.281141832905185e-06, |
|
"loss": 0.0778, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3476482617586912, |
|
"grad_norm": 1.9110066741814127, |
|
"learning_rate": 9.272822525626047e-06, |
|
"loss": 0.0735, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3496932515337423, |
|
"grad_norm": 2.179479069452803, |
|
"learning_rate": 9.26445912871055e-06, |
|
"loss": 0.0843, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.35173824130879344, |
|
"grad_norm": 1.9177594380676963, |
|
"learning_rate": 9.25605172845742e-06, |
|
"loss": 0.0805, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3537832310838446, |
|
"grad_norm": 2.1882619443952684, |
|
"learning_rate": 9.247600411619434e-06, |
|
"loss": 0.0965, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3558282208588957, |
|
"grad_norm": 2.2176075779513824, |
|
"learning_rate": 9.239105265402525e-06, |
|
"loss": 0.0974, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.35787321063394684, |
|
"grad_norm": 1.5074567124767815, |
|
"learning_rate": 9.23056637746489e-06, |
|
"loss": 0.0735, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35991820040899797, |
|
"grad_norm": 2.060069998365139, |
|
"learning_rate": 9.221983835916074e-06, |
|
"loss": 0.1022, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3619631901840491, |
|
"grad_norm": 2.1165212064315235, |
|
"learning_rate": 9.213357729316077e-06, |
|
"loss": 0.0995, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.36400817995910023, |
|
"grad_norm": 2.1868849806726787, |
|
"learning_rate": 9.204688146674418e-06, |
|
"loss": 0.0939, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3660531697341513, |
|
"grad_norm": 1.7544924490641574, |
|
"learning_rate": 9.195975177449238e-06, |
|
"loss": 0.0873, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.36809815950920244, |
|
"grad_norm": 1.838768964795654, |
|
"learning_rate": 9.187218911546363e-06, |
|
"loss": 0.0864, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.37014314928425357, |
|
"grad_norm": 1.9536263850909072, |
|
"learning_rate": 9.178419439318382e-06, |
|
"loss": 0.0828, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3721881390593047, |
|
"grad_norm": 1.8125655303827894, |
|
"learning_rate": 9.169576851563715e-06, |
|
"loss": 0.0707, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.37423312883435583, |
|
"grad_norm": 1.5346489369821823, |
|
"learning_rate": 9.160691239525675e-06, |
|
"loss": 0.0707, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.37627811860940696, |
|
"grad_norm": 2.0774049712635745, |
|
"learning_rate": 9.151762694891522e-06, |
|
"loss": 0.0892, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3783231083844581, |
|
"grad_norm": 1.6068313703103427, |
|
"learning_rate": 9.142791309791528e-06, |
|
"loss": 0.0737, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3803680981595092, |
|
"grad_norm": 2.491559077597992, |
|
"learning_rate": 9.133777176798013e-06, |
|
"loss": 0.1063, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3824130879345603, |
|
"grad_norm": 1.936364688582553, |
|
"learning_rate": 9.124720388924403e-06, |
|
"loss": 0.0879, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.38445807770961143, |
|
"grad_norm": 1.7501246261711056, |
|
"learning_rate": 9.115621039624256e-06, |
|
"loss": 0.0831, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.38650306748466257, |
|
"grad_norm": 1.9375047463204769, |
|
"learning_rate": 9.106479222790312e-06, |
|
"loss": 0.0798, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3885480572597137, |
|
"grad_norm": 1.9799704235731947, |
|
"learning_rate": 9.09729503275351e-06, |
|
"loss": 0.0818, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.39059304703476483, |
|
"grad_norm": 2.1027233151637046, |
|
"learning_rate": 9.08806856428203e-06, |
|
"loss": 0.0737, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.39263803680981596, |
|
"grad_norm": 2.2130274217863377, |
|
"learning_rate": 9.078799912580305e-06, |
|
"loss": 0.1049, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3946830265848671, |
|
"grad_norm": 1.8596492941083875, |
|
"learning_rate": 9.069489173288037e-06, |
|
"loss": 0.0788, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3967280163599182, |
|
"grad_norm": 1.8220962906735956, |
|
"learning_rate": 9.060136442479215e-06, |
|
"loss": 0.0789, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3987730061349693, |
|
"grad_norm": 2.1684932411419773, |
|
"learning_rate": 9.050741816661128e-06, |
|
"loss": 0.1101, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.40081799591002043, |
|
"grad_norm": 2.2585167924890674, |
|
"learning_rate": 9.041305392773355e-06, |
|
"loss": 0.0899, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.40286298568507156, |
|
"grad_norm": 2.2529963379779514, |
|
"learning_rate": 9.03182726818678e-06, |
|
"loss": 0.1001, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4049079754601227, |
|
"grad_norm": 2.019146584665829, |
|
"learning_rate": 9.022307540702576e-06, |
|
"loss": 0.0889, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4069529652351738, |
|
"grad_norm": 2.0147227938530214, |
|
"learning_rate": 9.012746308551208e-06, |
|
"loss": 0.0779, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.40899795501022496, |
|
"grad_norm": 1.6785890661043144, |
|
"learning_rate": 9.003143670391403e-06, |
|
"loss": 0.0714, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40899795501022496, |
|
"eval_loss": 0.09443490207195282, |
|
"eval_runtime": 1.6107, |
|
"eval_samples_per_second": 24.835, |
|
"eval_steps_per_second": 6.209, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4110429447852761, |
|
"grad_norm": 1.7907653453087733, |
|
"learning_rate": 8.993499725309148e-06, |
|
"loss": 0.0644, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4130879345603272, |
|
"grad_norm": 2.0499291659974572, |
|
"learning_rate": 8.983814572816656e-06, |
|
"loss": 0.0764, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.41513292433537835, |
|
"grad_norm": 2.027050105104232, |
|
"learning_rate": 8.974088312851346e-06, |
|
"loss": 0.0896, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4171779141104294, |
|
"grad_norm": 1.8185300386254655, |
|
"learning_rate": 8.964321045774808e-06, |
|
"loss": 0.0904, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.41922290388548056, |
|
"grad_norm": 1.8351321980331647, |
|
"learning_rate": 8.954512872371768e-06, |
|
"loss": 0.0798, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4212678936605317, |
|
"grad_norm": 2.2777878812250734, |
|
"learning_rate": 8.944663893849053e-06, |
|
"loss": 0.094, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4233128834355828, |
|
"grad_norm": 2.078616561352449, |
|
"learning_rate": 8.934774211834538e-06, |
|
"loss": 0.097, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.42535787321063395, |
|
"grad_norm": 1.5026879665719408, |
|
"learning_rate": 8.924843928376105e-06, |
|
"loss": 0.0667, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4274028629856851, |
|
"grad_norm": 2.031373760012224, |
|
"learning_rate": 8.914873145940585e-06, |
|
"loss": 0.0983, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.4294478527607362, |
|
"grad_norm": 1.7750919975425428, |
|
"learning_rate": 8.904861967412702e-06, |
|
"loss": 0.0832, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.43149284253578735, |
|
"grad_norm": 1.6859653025880537, |
|
"learning_rate": 8.894810496094016e-06, |
|
"loss": 0.0739, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4335378323108384, |
|
"grad_norm": 2.4773597386512374, |
|
"learning_rate": 8.88471883570185e-06, |
|
"loss": 0.104, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43558282208588955, |
|
"grad_norm": 1.7481062215506529, |
|
"learning_rate": 8.874587090368221e-06, |
|
"loss": 0.0685, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4376278118609407, |
|
"grad_norm": 1.8687306127676215, |
|
"learning_rate": 8.86441536463877e-06, |
|
"loss": 0.0812, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4396728016359918, |
|
"grad_norm": 2.7660751966702515, |
|
"learning_rate": 8.85420376347168e-06, |
|
"loss": 0.1228, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.44171779141104295, |
|
"grad_norm": 2.008073359861921, |
|
"learning_rate": 8.843952392236595e-06, |
|
"loss": 0.092, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4437627811860941, |
|
"grad_norm": 1.9689667185293374, |
|
"learning_rate": 8.833661356713528e-06, |
|
"loss": 0.0918, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4458077709611452, |
|
"grad_norm": 2.0550779883515844, |
|
"learning_rate": 8.823330763091775e-06, |
|
"loss": 0.0842, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.44785276073619634, |
|
"grad_norm": 2.1458614538975316, |
|
"learning_rate": 8.81296071796882e-06, |
|
"loss": 0.0955, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4498977505112474, |
|
"grad_norm": 2.0801721508502173, |
|
"learning_rate": 8.802551328349222e-06, |
|
"loss": 0.0696, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45194274028629855, |
|
"grad_norm": 1.6170897770649597, |
|
"learning_rate": 8.792102701643532e-06, |
|
"loss": 0.074, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4539877300613497, |
|
"grad_norm": 1.6010742203809665, |
|
"learning_rate": 8.78161494566717e-06, |
|
"loss": 0.068, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4560327198364008, |
|
"grad_norm": 1.8263013055696211, |
|
"learning_rate": 8.771088168639312e-06, |
|
"loss": 0.0785, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.45807770961145194, |
|
"grad_norm": 1.8074234496570727, |
|
"learning_rate": 8.760522479181784e-06, |
|
"loss": 0.0843, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.4601226993865031, |
|
"grad_norm": 1.9423241552319763, |
|
"learning_rate": 8.74991798631793e-06, |
|
"loss": 0.0902, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4621676891615542, |
|
"grad_norm": 2.426636585412464, |
|
"learning_rate": 8.739274799471492e-06, |
|
"loss": 0.1147, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.46421267893660534, |
|
"grad_norm": 1.8764452830009553, |
|
"learning_rate": 8.728593028465481e-06, |
|
"loss": 0.088, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.4662576687116564, |
|
"grad_norm": 1.8742190983636138, |
|
"learning_rate": 8.717872783521048e-06, |
|
"loss": 0.0919, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.46830265848670755, |
|
"grad_norm": 1.9812429967202114, |
|
"learning_rate": 8.707114175256335e-06, |
|
"loss": 0.1032, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4703476482617587, |
|
"grad_norm": 1.5710292326402762, |
|
"learning_rate": 8.696317314685342e-06, |
|
"loss": 0.0735, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4723926380368098, |
|
"grad_norm": 2.135568048299338, |
|
"learning_rate": 8.685482313216784e-06, |
|
"loss": 0.1003, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.47443762781186094, |
|
"grad_norm": 1.8410190133874755, |
|
"learning_rate": 8.674609282652936e-06, |
|
"loss": 0.0805, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.47648261758691207, |
|
"grad_norm": 1.95093910503971, |
|
"learning_rate": 8.663698335188477e-06, |
|
"loss": 0.0799, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.4785276073619632, |
|
"grad_norm": 2.0656801774088582, |
|
"learning_rate": 8.65274958340934e-06, |
|
"loss": 0.0953, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.48057259713701433, |
|
"grad_norm": 1.7872037593524146, |
|
"learning_rate": 8.641763140291546e-06, |
|
"loss": 0.0702, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.48261758691206547, |
|
"grad_norm": 2.0351005102773634, |
|
"learning_rate": 8.630739119200035e-06, |
|
"loss": 0.0828, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.48466257668711654, |
|
"grad_norm": 1.966029733326491, |
|
"learning_rate": 8.61967763388751e-06, |
|
"loss": 0.0887, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4867075664621677, |
|
"grad_norm": 2.2496225787645714, |
|
"learning_rate": 8.608578798493237e-06, |
|
"loss": 0.0921, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4887525562372188, |
|
"grad_norm": 2.3703828414232935, |
|
"learning_rate": 8.597442727541898e-06, |
|
"loss": 0.1055, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.49079754601226994, |
|
"grad_norm": 2.072283129147399, |
|
"learning_rate": 8.586269535942386e-06, |
|
"loss": 0.096, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49284253578732107, |
|
"grad_norm": 1.763736942283961, |
|
"learning_rate": 8.575059338986632e-06, |
|
"loss": 0.0851, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4948875255623722, |
|
"grad_norm": 1.9418651840022931, |
|
"learning_rate": 8.563812252348412e-06, |
|
"loss": 0.0817, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.49693251533742333, |
|
"grad_norm": 1.4038177877319757, |
|
"learning_rate": 8.552528392082147e-06, |
|
"loss": 0.0692, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.49897750511247446, |
|
"grad_norm": 2.2775569689795225, |
|
"learning_rate": 8.541207874621718e-06, |
|
"loss": 0.1092, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5010224948875256, |
|
"grad_norm": 2.5534087713100955, |
|
"learning_rate": 8.529850816779252e-06, |
|
"loss": 0.1033, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5030674846625767, |
|
"grad_norm": 1.531811934175557, |
|
"learning_rate": 8.518457335743927e-06, |
|
"loss": 0.0761, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5051124744376279, |
|
"grad_norm": 2.3960006081387974, |
|
"learning_rate": 8.507027549080753e-06, |
|
"loss": 0.0941, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5071574642126789, |
|
"grad_norm": 2.245296156491926, |
|
"learning_rate": 8.49556157472937e-06, |
|
"loss": 0.0992, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.50920245398773, |
|
"grad_norm": 2.1662992544835467, |
|
"learning_rate": 8.484059531002822e-06, |
|
"loss": 0.1096, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5112474437627812, |
|
"grad_norm": 1.9378805133589119, |
|
"learning_rate": 8.472521536586336e-06, |
|
"loss": 0.0884, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5132924335378323, |
|
"grad_norm": 1.7472804645413123, |
|
"learning_rate": 8.460947710536108e-06, |
|
"loss": 0.0881, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5153374233128835, |
|
"grad_norm": 1.8567960096830705, |
|
"learning_rate": 8.44933817227806e-06, |
|
"loss": 0.1041, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5173824130879345, |
|
"grad_norm": 1.6639705835205088, |
|
"learning_rate": 8.437693041606619e-06, |
|
"loss": 0.0767, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5194274028629857, |
|
"grad_norm": 1.7811045494491748, |
|
"learning_rate": 8.426012438683472e-06, |
|
"loss": 0.0795, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5214723926380368, |
|
"grad_norm": 2.601937087112271, |
|
"learning_rate": 8.41429648403634e-06, |
|
"loss": 0.1157, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.523517382413088, |
|
"grad_norm": 2.2629417508652896, |
|
"learning_rate": 8.402545298557712e-06, |
|
"loss": 0.0965, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5255623721881391, |
|
"grad_norm": 1.6219382198043681, |
|
"learning_rate": 8.390759003503624e-06, |
|
"loss": 0.0804, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5276073619631901, |
|
"grad_norm": 1.6735037903910355, |
|
"learning_rate": 8.378937720492384e-06, |
|
"loss": 0.0708, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5296523517382413, |
|
"grad_norm": 1.6949968905732045, |
|
"learning_rate": 8.367081571503332e-06, |
|
"loss": 0.0796, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5316973415132924, |
|
"grad_norm": 1.5829034537038222, |
|
"learning_rate": 8.355190678875577e-06, |
|
"loss": 0.0685, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5337423312883436, |
|
"grad_norm": 2.1474520860458814, |
|
"learning_rate": 8.343265165306736e-06, |
|
"loss": 0.0966, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5357873210633947, |
|
"grad_norm": 2.685259620414307, |
|
"learning_rate": 8.331305153851659e-06, |
|
"loss": 0.1199, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5378323108384458, |
|
"grad_norm": 1.5378328527936944, |
|
"learning_rate": 8.319310767921174e-06, |
|
"loss": 0.0746, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5398773006134969, |
|
"grad_norm": 1.5728870201255574, |
|
"learning_rate": 8.307282131280805e-06, |
|
"loss": 0.0794, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5419222903885481, |
|
"grad_norm": 1.9037474406992847, |
|
"learning_rate": 8.295219368049494e-06, |
|
"loss": 0.0831, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5439672801635992, |
|
"grad_norm": 1.8713169547943331, |
|
"learning_rate": 8.283122602698324e-06, |
|
"loss": 0.0866, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5460122699386503, |
|
"grad_norm": 2.0187272804624032, |
|
"learning_rate": 8.270991960049231e-06, |
|
"loss": 0.0953, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5480572597137015, |
|
"grad_norm": 2.3890714658865857, |
|
"learning_rate": 8.258827565273717e-06, |
|
"loss": 0.0993, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5501022494887525, |
|
"grad_norm": 1.4224265522394863, |
|
"learning_rate": 8.24662954389157e-06, |
|
"loss": 0.0685, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5521472392638037, |
|
"grad_norm": 1.8253908241082366, |
|
"learning_rate": 8.234398021769541e-06, |
|
"loss": 0.0859, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5541922290388548, |
|
"grad_norm": 1.8297687093456312, |
|
"learning_rate": 8.222133125120076e-06, |
|
"loss": 0.0842, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.556237218813906, |
|
"grad_norm": 1.7325614091536314, |
|
"learning_rate": 8.209834980499995e-06, |
|
"loss": 0.0664, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.558282208588957, |
|
"grad_norm": 1.8426658391443724, |
|
"learning_rate": 8.19750371480919e-06, |
|
"loss": 0.0823, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5603271983640081, |
|
"grad_norm": 2.335513659237072, |
|
"learning_rate": 8.185139455289322e-06, |
|
"loss": 0.1004, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5623721881390593, |
|
"grad_norm": 2.281382949923011, |
|
"learning_rate": 8.172742329522493e-06, |
|
"loss": 0.0923, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5644171779141104, |
|
"grad_norm": 2.0875496660986586, |
|
"learning_rate": 8.160312465429952e-06, |
|
"loss": 0.1007, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5664621676891616, |
|
"grad_norm": 1.6706016356250908, |
|
"learning_rate": 8.147849991270753e-06, |
|
"loss": 0.0749, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5685071574642127, |
|
"grad_norm": 2.3348044470325586, |
|
"learning_rate": 8.135355035640445e-06, |
|
"loss": 0.1075, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5705521472392638, |
|
"grad_norm": 1.9325325555725485, |
|
"learning_rate": 8.122827727469737e-06, |
|
"loss": 0.0847, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5725971370143149, |
|
"grad_norm": 2.06473154517661, |
|
"learning_rate": 8.110268196023179e-06, |
|
"loss": 0.0923, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5746421267893661, |
|
"grad_norm": 1.7347784233467545, |
|
"learning_rate": 8.097676570897814e-06, |
|
"loss": 0.0767, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5766871165644172, |
|
"grad_norm": 1.7284531347044014, |
|
"learning_rate": 8.085052982021849e-06, |
|
"loss": 0.0822, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5787321063394683, |
|
"grad_norm": 2.0234039627173863, |
|
"learning_rate": 8.072397559653314e-06, |
|
"loss": 0.0903, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5807770961145194, |
|
"grad_norm": 1.8567076129812703, |
|
"learning_rate": 8.059710434378717e-06, |
|
"loss": 0.0829, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5828220858895705, |
|
"grad_norm": 1.8280706428554012, |
|
"learning_rate": 8.046991737111696e-06, |
|
"loss": 0.0846, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5848670756646217, |
|
"grad_norm": 1.6827693552674245, |
|
"learning_rate": 8.034241599091666e-06, |
|
"loss": 0.0744, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5869120654396728, |
|
"grad_norm": 1.4276933688240632, |
|
"learning_rate": 8.021460151882472e-06, |
|
"loss": 0.0644, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.588957055214724, |
|
"grad_norm": 1.7054089254136917, |
|
"learning_rate": 8.008647527371022e-06, |
|
"loss": 0.0691, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.591002044989775, |
|
"grad_norm": 2.3943112344962616, |
|
"learning_rate": 7.995803857765934e-06, |
|
"loss": 0.1105, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5930470347648262, |
|
"grad_norm": 2.025612566291375, |
|
"learning_rate": 7.982929275596164e-06, |
|
"loss": 0.0936, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5950920245398773, |
|
"grad_norm": 2.0696844237753984, |
|
"learning_rate": 7.970023913709652e-06, |
|
"loss": 0.0916, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5971370143149284, |
|
"grad_norm": 2.1125496705836184, |
|
"learning_rate": 7.957087905271934e-06, |
|
"loss": 0.0812, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5991820040899796, |
|
"grad_norm": 1.9111826855162881, |
|
"learning_rate": 7.944121383764775e-06, |
|
"loss": 0.0878, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6012269938650306, |
|
"grad_norm": 2.0166887475359507, |
|
"learning_rate": 7.931124482984802e-06, |
|
"loss": 0.088, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6032719836400818, |
|
"grad_norm": 2.4597183492348145, |
|
"learning_rate": 7.918097337042106e-06, |
|
"loss": 0.1066, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6053169734151329, |
|
"grad_norm": 1.7705184105320022, |
|
"learning_rate": 7.905040080358869e-06, |
|
"loss": 0.0784, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6073619631901841, |
|
"grad_norm": 1.7246778829446732, |
|
"learning_rate": 7.891952847667973e-06, |
|
"loss": 0.0777, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6094069529652352, |
|
"grad_norm": 2.1760471200028593, |
|
"learning_rate": 7.878835774011615e-06, |
|
"loss": 0.0983, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6114519427402862, |
|
"grad_norm": 2.1592710885226327, |
|
"learning_rate": 7.865688994739907e-06, |
|
"loss": 0.0996, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6134969325153374, |
|
"grad_norm": 1.7446253812062307, |
|
"learning_rate": 7.85251264550948e-06, |
|
"loss": 0.0767, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6155419222903885, |
|
"grad_norm": 2.784714583612841, |
|
"learning_rate": 7.83930686228209e-06, |
|
"loss": 0.0871, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6175869120654397, |
|
"grad_norm": 1.923087819950953, |
|
"learning_rate": 7.826071781323208e-06, |
|
"loss": 0.076, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6196319018404908, |
|
"grad_norm": 1.78632914754461, |
|
"learning_rate": 7.812807539200622e-06, |
|
"loss": 0.0778, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.621676891615542, |
|
"grad_norm": 1.9376192118205642, |
|
"learning_rate": 7.799514272783014e-06, |
|
"loss": 0.0817, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.623721881390593, |
|
"grad_norm": 2.550158615394769, |
|
"learning_rate": 7.786192119238568e-06, |
|
"loss": 0.1057, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6257668711656442, |
|
"grad_norm": 1.9711665467023245, |
|
"learning_rate": 7.772841216033534e-06, |
|
"loss": 0.0764, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6278118609406953, |
|
"grad_norm": 1.5340501908307014, |
|
"learning_rate": 7.759461700930824e-06, |
|
"loss": 0.0637, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6298568507157464, |
|
"grad_norm": 2.2338456267605005, |
|
"learning_rate": 7.746053711988584e-06, |
|
"loss": 0.1059, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6319018404907976, |
|
"grad_norm": 1.7891397758115173, |
|
"learning_rate": 7.732617387558769e-06, |
|
"loss": 0.0824, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6339468302658486, |
|
"grad_norm": 2.1234757737848287, |
|
"learning_rate": 7.719152866285722e-06, |
|
"loss": 0.0885, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6359918200408998, |
|
"grad_norm": 2.4102510823654457, |
|
"learning_rate": 7.70566028710473e-06, |
|
"loss": 0.0996, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6380368098159509, |
|
"grad_norm": 1.9375735772859437, |
|
"learning_rate": 7.692139789240611e-06, |
|
"loss": 0.091, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6400817995910021, |
|
"grad_norm": 2.0158092912142824, |
|
"learning_rate": 7.678591512206254e-06, |
|
"loss": 0.088, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6421267893660532, |
|
"grad_norm": 1.6480327933319945, |
|
"learning_rate": 7.665015595801198e-06, |
|
"loss": 0.0791, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6441717791411042, |
|
"grad_norm": 1.8510030476483572, |
|
"learning_rate": 7.651412180110176e-06, |
|
"loss": 0.085, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6462167689161554, |
|
"grad_norm": 1.592679706462086, |
|
"learning_rate": 7.637781405501682e-06, |
|
"loss": 0.0719, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6482617586912065, |
|
"grad_norm": 1.871195454539005, |
|
"learning_rate": 7.6241234126265115e-06, |
|
"loss": 0.0935, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6503067484662577, |
|
"grad_norm": 2.1635066751175978, |
|
"learning_rate": 7.61043834241632e-06, |
|
"loss": 0.0887, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6523517382413088, |
|
"grad_norm": 1.7458256267250807, |
|
"learning_rate": 7.596726336082158e-06, |
|
"loss": 0.0784, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.65439672801636, |
|
"grad_norm": 1.9970410164681027, |
|
"learning_rate": 7.5829875351130224e-06, |
|
"loss": 0.0825, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.656441717791411, |
|
"grad_norm": 1.8581711995026613, |
|
"learning_rate": 7.569222081274396e-06, |
|
"loss": 0.074, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6584867075664622, |
|
"grad_norm": 1.5023298192040886, |
|
"learning_rate": 7.555430116606778e-06, |
|
"loss": 0.0707, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6605316973415133, |
|
"grad_norm": 1.9742828072984793, |
|
"learning_rate": 7.5416117834242254e-06, |
|
"loss": 0.0839, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6625766871165644, |
|
"grad_norm": 1.7579407302668417, |
|
"learning_rate": 7.527767224312883e-06, |
|
"loss": 0.0802, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6646216768916156, |
|
"grad_norm": 1.7128227508559022, |
|
"learning_rate": 7.513896582129507e-06, |
|
"loss": 0.0745, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 1.9293198934120017, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0856, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6687116564417178, |
|
"grad_norm": 2.0925311155648703, |
|
"learning_rate": 7.4860776213179264e-06, |
|
"loss": 0.0839, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6707566462167689, |
|
"grad_norm": 2.082947312061181, |
|
"learning_rate": 7.472129589743034e-06, |
|
"loss": 0.0844, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6728016359918201, |
|
"grad_norm": 2.0524639760050127, |
|
"learning_rate": 7.458156049199775e-06, |
|
"loss": 0.1008, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6748466257668712, |
|
"grad_norm": 1.8254793507601215, |
|
"learning_rate": 7.44415714387582e-06, |
|
"loss": 0.0692, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6768916155419223, |
|
"grad_norm": 1.9185120612100472, |
|
"learning_rate": 7.430133018220567e-06, |
|
"loss": 0.0902, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6789366053169734, |
|
"grad_norm": 1.5528728788442376, |
|
"learning_rate": 7.416083816943653e-06, |
|
"loss": 0.0681, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6809815950920245, |
|
"grad_norm": 1.8960655345457742, |
|
"learning_rate": 7.4020096850134635e-06, |
|
"loss": 0.0862, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6830265848670757, |
|
"grad_norm": 1.8164525363712967, |
|
"learning_rate": 7.38791076765563e-06, |
|
"loss": 0.08, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6850715746421268, |
|
"grad_norm": 1.8489841001332317, |
|
"learning_rate": 7.37378721035154e-06, |
|
"loss": 0.0863, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6871165644171779, |
|
"grad_norm": 1.9227410779505356, |
|
"learning_rate": 7.359639158836828e-06, |
|
"loss": 0.0797, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.689161554192229, |
|
"grad_norm": 2.1782307041733855, |
|
"learning_rate": 7.345466759099875e-06, |
|
"loss": 0.0946, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6912065439672802, |
|
"grad_norm": 2.1346962188887626, |
|
"learning_rate": 7.331270157380304e-06, |
|
"loss": 0.0953, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6932515337423313, |
|
"grad_norm": 1.759960430802437, |
|
"learning_rate": 7.317049500167466e-06, |
|
"loss": 0.0969, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6952965235173824, |
|
"grad_norm": 2.0404870097493646, |
|
"learning_rate": 7.302804934198937e-06, |
|
"loss": 0.0852, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6973415132924335, |
|
"grad_norm": 2.3585223108650037, |
|
"learning_rate": 7.28853660645899e-06, |
|
"loss": 0.1054, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6993865030674846, |
|
"grad_norm": 1.8518134360019116, |
|
"learning_rate": 7.2742446641770985e-06, |
|
"loss": 0.0942, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7014314928425358, |
|
"grad_norm": 1.6802043170675642, |
|
"learning_rate": 7.259929254826393e-06, |
|
"loss": 0.0703, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7034764826175869, |
|
"grad_norm": 2.3222003347544233, |
|
"learning_rate": 7.2455905261221585e-06, |
|
"loss": 0.0981, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7055214723926381, |
|
"grad_norm": 1.7096656290299208, |
|
"learning_rate": 7.231228626020303e-06, |
|
"loss": 0.0686, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7075664621676891, |
|
"grad_norm": 2.301527792978425, |
|
"learning_rate": 7.216843702715831e-06, |
|
"loss": 0.0806, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7096114519427403, |
|
"grad_norm": 1.7573853731950437, |
|
"learning_rate": 7.202435904641316e-06, |
|
"loss": 0.0766, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7116564417177914, |
|
"grad_norm": 1.882419627052227, |
|
"learning_rate": 7.188005380465365e-06, |
|
"loss": 0.0733, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7137014314928425, |
|
"grad_norm": 2.470103268920824, |
|
"learning_rate": 7.173552279091087e-06, |
|
"loss": 0.1016, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7157464212678937, |
|
"grad_norm": 1.4869158817717396, |
|
"learning_rate": 7.159076749654559e-06, |
|
"loss": 0.0624, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7177914110429447, |
|
"grad_norm": 1.5968050085844632, |
|
"learning_rate": 7.144578941523283e-06, |
|
"loss": 0.0707, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7198364008179959, |
|
"grad_norm": 1.6356481647041587, |
|
"learning_rate": 7.130059004294647e-06, |
|
"loss": 0.066, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.721881390593047, |
|
"grad_norm": 2.9392656768707504, |
|
"learning_rate": 7.115517087794381e-06, |
|
"loss": 0.1009, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7239263803680982, |
|
"grad_norm": 2.2918804151158065, |
|
"learning_rate": 7.10095334207501e-06, |
|
"loss": 0.0962, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7259713701431493, |
|
"grad_norm": 1.8475331071622312, |
|
"learning_rate": 7.086367917414307e-06, |
|
"loss": 0.082, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7280163599182005, |
|
"grad_norm": 1.9726367085045817, |
|
"learning_rate": 7.071760964313739e-06, |
|
"loss": 0.0732, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7300613496932515, |
|
"grad_norm": 2.1502810171764244, |
|
"learning_rate": 7.057132633496924e-06, |
|
"loss": 0.1049, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7321063394683026, |
|
"grad_norm": 1.8592273232420053, |
|
"learning_rate": 7.042483075908062e-06, |
|
"loss": 0.0862, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7341513292433538, |
|
"grad_norm": 2.355170511162385, |
|
"learning_rate": 7.027812442710385e-06, |
|
"loss": 0.0937, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7361963190184049, |
|
"grad_norm": 1.6779561691380307, |
|
"learning_rate": 7.013120885284599e-06, |
|
"loss": 0.0675, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7382413087934561, |
|
"grad_norm": 2.3918539767349762, |
|
"learning_rate": 6.9984085552273136e-06, |
|
"loss": 0.0964, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7402862985685071, |
|
"grad_norm": 2.0029627191660087, |
|
"learning_rate": 6.983675604349492e-06, |
|
"loss": 0.0808, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7423312883435583, |
|
"grad_norm": 2.361971189154723, |
|
"learning_rate": 6.968922184674868e-06, |
|
"loss": 0.0902, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7443762781186094, |
|
"grad_norm": 1.7941380948957237, |
|
"learning_rate": 6.954148448438389e-06, |
|
"loss": 0.093, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7464212678936605, |
|
"grad_norm": 1.8475776231124883, |
|
"learning_rate": 6.9393545480846405e-06, |
|
"loss": 0.0803, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7484662576687117, |
|
"grad_norm": 1.391463463223123, |
|
"learning_rate": 6.924540636266272e-06, |
|
"loss": 0.0604, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7505112474437627, |
|
"grad_norm": 1.4587955996368223, |
|
"learning_rate": 6.909706865842429e-06, |
|
"loss": 0.0707, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7525562372188139, |
|
"grad_norm": 1.4497943658621633, |
|
"learning_rate": 6.894853389877163e-06, |
|
"loss": 0.0562, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.754601226993865, |
|
"grad_norm": 2.2816948101972474, |
|
"learning_rate": 6.879980361637865e-06, |
|
"loss": 0.0933, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7566462167689162, |
|
"grad_norm": 2.2765511971102925, |
|
"learning_rate": 6.86508793459368e-06, |
|
"loss": 0.0799, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7586912065439673, |
|
"grad_norm": 1.8489677964373195, |
|
"learning_rate": 6.8501762624139125e-06, |
|
"loss": 0.0828, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7607361963190185, |
|
"grad_norm": 2.2599682805893244, |
|
"learning_rate": 6.835245498966461e-06, |
|
"loss": 0.1019, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7627811860940695, |
|
"grad_norm": 1.7535048313819637, |
|
"learning_rate": 6.820295798316214e-06, |
|
"loss": 0.0877, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7648261758691206, |
|
"grad_norm": 2.1348338096756962, |
|
"learning_rate": 6.805327314723469e-06, |
|
"loss": 0.0713, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7668711656441718, |
|
"grad_norm": 1.471825773848477, |
|
"learning_rate": 6.790340202642333e-06, |
|
"loss": 0.0648, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7689161554192229, |
|
"grad_norm": 1.9667987135525467, |
|
"learning_rate": 6.775334616719136e-06, |
|
"loss": 0.0933, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7709611451942741, |
|
"grad_norm": 1.9656786527852497, |
|
"learning_rate": 6.760310711790831e-06, |
|
"loss": 0.0886, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7730061349693251, |
|
"grad_norm": 1.7703569506269972, |
|
"learning_rate": 6.7452686428834045e-06, |
|
"loss": 0.0774, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7750511247443763, |
|
"grad_norm": 2.247523798525931, |
|
"learning_rate": 6.73020856521026e-06, |
|
"loss": 0.1031, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7770961145194274, |
|
"grad_norm": 1.872342874790795, |
|
"learning_rate": 6.715130634170636e-06, |
|
"loss": 0.0895, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7791411042944786, |
|
"grad_norm": 2.070656684465323, |
|
"learning_rate": 6.700035005347983e-06, |
|
"loss": 0.0868, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7811860940695297, |
|
"grad_norm": 2.2454799924898667, |
|
"learning_rate": 6.6849218345083785e-06, |
|
"loss": 0.0978, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7832310838445807, |
|
"grad_norm": 1.891754000824279, |
|
"learning_rate": 6.6697912775989045e-06, |
|
"loss": 0.0785, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7852760736196319, |
|
"grad_norm": 1.7579771296347333, |
|
"learning_rate": 6.654643490746042e-06, |
|
"loss": 0.0858, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.787321063394683, |
|
"grad_norm": 1.9641471370237964, |
|
"learning_rate": 6.6394786302540645e-06, |
|
"loss": 0.082, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7893660531697342, |
|
"grad_norm": 1.8254808653521009, |
|
"learning_rate": 6.624296852603419e-06, |
|
"loss": 0.0882, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7914110429447853, |
|
"grad_norm": 1.4088372814526477, |
|
"learning_rate": 6.609098314449116e-06, |
|
"loss": 0.0671, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7934560327198364, |
|
"grad_norm": 1.9617841850743343, |
|
"learning_rate": 6.593883172619111e-06, |
|
"loss": 0.0933, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7955010224948875, |
|
"grad_norm": 1.5767225526580313, |
|
"learning_rate": 6.578651584112687e-06, |
|
"loss": 0.0636, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7975460122699386, |
|
"grad_norm": 2.2228834140058336, |
|
"learning_rate": 6.563403706098833e-06, |
|
"loss": 0.1077, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7995910020449898, |
|
"grad_norm": 1.9792433955524278, |
|
"learning_rate": 6.5481396959146225e-06, |
|
"loss": 0.0891, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8016359918200409, |
|
"grad_norm": 1.2215680463568073, |
|
"learning_rate": 6.532859711063594e-06, |
|
"loss": 0.0563, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.803680981595092, |
|
"grad_norm": 1.6824250107088006, |
|
"learning_rate": 6.517563909214119e-06, |
|
"loss": 0.0783, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8057259713701431, |
|
"grad_norm": 1.7462647827998714, |
|
"learning_rate": 6.502252448197782e-06, |
|
"loss": 0.0814, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8077709611451943, |
|
"grad_norm": 1.3887650073154911, |
|
"learning_rate": 6.486925486007743e-06, |
|
"loss": 0.0641, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8098159509202454, |
|
"grad_norm": 2.0714118588443613, |
|
"learning_rate": 6.471583180797121e-06, |
|
"loss": 0.1055, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8118609406952966, |
|
"grad_norm": 1.5588416633458682, |
|
"learning_rate": 6.456225690877345e-06, |
|
"loss": 0.0744, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8139059304703476, |
|
"grad_norm": 1.6448175082442864, |
|
"learning_rate": 6.440853174716535e-06, |
|
"loss": 0.0679, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8159509202453987, |
|
"grad_norm": 1.7938499571539583, |
|
"learning_rate": 6.4254657909378615e-06, |
|
"loss": 0.0701, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8179959100204499, |
|
"grad_norm": 2.1584932014661606, |
|
"learning_rate": 6.410063698317901e-06, |
|
"loss": 0.0896, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8179959100204499, |
|
"eval_loss": 0.08662194758653641, |
|
"eval_runtime": 1.5943, |
|
"eval_samples_per_second": 25.089, |
|
"eval_steps_per_second": 6.272, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.820040899795501, |
|
"grad_norm": 1.6606377004284583, |
|
"learning_rate": 6.394647055785017e-06, |
|
"loss": 0.0699, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8220858895705522, |
|
"grad_norm": 2.2914577704716113, |
|
"learning_rate": 6.379216022417695e-06, |
|
"loss": 0.0858, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8241308793456033, |
|
"grad_norm": 1.7940636149724014, |
|
"learning_rate": 6.363770757442927e-06, |
|
"loss": 0.0838, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8261758691206544, |
|
"grad_norm": 2.1090208330887363, |
|
"learning_rate": 6.348311420234542e-06, |
|
"loss": 0.0837, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8282208588957055, |
|
"grad_norm": 1.761887269760676, |
|
"learning_rate": 6.332838170311586e-06, |
|
"loss": 0.0791, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8302658486707567, |
|
"grad_norm": 2.0316688681749846, |
|
"learning_rate": 6.31735116733666e-06, |
|
"loss": 0.0762, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8323108384458078, |
|
"grad_norm": 1.4824433767272704, |
|
"learning_rate": 6.301850571114282e-06, |
|
"loss": 0.0531, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8343558282208589, |
|
"grad_norm": 1.9042239460112056, |
|
"learning_rate": 6.286336541589224e-06, |
|
"loss": 0.0685, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.83640081799591, |
|
"grad_norm": 1.631266470020269, |
|
"learning_rate": 6.270809238844881e-06, |
|
"loss": 0.0713, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8384458077709611, |
|
"grad_norm": 1.8805596275114955, |
|
"learning_rate": 6.255268823101604e-06, |
|
"loss": 0.0751, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8404907975460123, |
|
"grad_norm": 2.295370695981097, |
|
"learning_rate": 6.239715454715054e-06, |
|
"loss": 0.0984, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8425357873210634, |
|
"grad_norm": 2.269325740615013, |
|
"learning_rate": 6.224149294174549e-06, |
|
"loss": 0.0966, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8445807770961146, |
|
"grad_norm": 2.060132528646075, |
|
"learning_rate": 6.208570502101393e-06, |
|
"loss": 0.0817, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8466257668711656, |
|
"grad_norm": 1.8016710838966334, |
|
"learning_rate": 6.192979239247243e-06, |
|
"loss": 0.0858, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8486707566462167, |
|
"grad_norm": 1.9922284651178528, |
|
"learning_rate": 6.177375666492431e-06, |
|
"loss": 0.0735, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8507157464212679, |
|
"grad_norm": 1.689681220388234, |
|
"learning_rate": 6.161759944844308e-06, |
|
"loss": 0.0756, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.852760736196319, |
|
"grad_norm": 2.618019309211191, |
|
"learning_rate": 6.146132235435591e-06, |
|
"loss": 0.0829, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8548057259713702, |
|
"grad_norm": 2.0274624599323414, |
|
"learning_rate": 6.1304926995226895e-06, |
|
"loss": 0.0836, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8568507157464212, |
|
"grad_norm": 2.0858291852426496, |
|
"learning_rate": 6.114841498484049e-06, |
|
"loss": 0.09, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8588957055214724, |
|
"grad_norm": 1.656532684919004, |
|
"learning_rate": 6.099178793818479e-06, |
|
"loss": 0.0674, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8609406952965235, |
|
"grad_norm": 1.781888769859481, |
|
"learning_rate": 6.083504747143496e-06, |
|
"loss": 0.0706, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8629856850715747, |
|
"grad_norm": 2.2606057911008217, |
|
"learning_rate": 6.0678195201936455e-06, |
|
"loss": 0.0969, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8650306748466258, |
|
"grad_norm": 2.3434090242083943, |
|
"learning_rate": 6.0521232748188416e-06, |
|
"loss": 0.1064, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8670756646216768, |
|
"grad_norm": 2.064354269601007, |
|
"learning_rate": 6.0364161729826905e-06, |
|
"loss": 0.0896, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.869120654396728, |
|
"grad_norm": 1.7331387406948884, |
|
"learning_rate": 6.020698376760824e-06, |
|
"loss": 0.0753, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8711656441717791, |
|
"grad_norm": 1.6248452960794957, |
|
"learning_rate": 6.0049700483392256e-06, |
|
"loss": 0.0683, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8732106339468303, |
|
"grad_norm": 1.7788246413520943, |
|
"learning_rate": 5.9892313500125545e-06, |
|
"loss": 0.0808, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8752556237218814, |
|
"grad_norm": 1.6403389067415772, |
|
"learning_rate": 5.9734824441824745e-06, |
|
"loss": 0.0763, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8773006134969326, |
|
"grad_norm": 1.968967047123883, |
|
"learning_rate": 5.957723493355977e-06, |
|
"loss": 0.0946, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8793456032719836, |
|
"grad_norm": 1.5050654888065231, |
|
"learning_rate": 5.941954660143703e-06, |
|
"loss": 0.0673, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8813905930470347, |
|
"grad_norm": 1.5627708754572884, |
|
"learning_rate": 5.926176107258265e-06, |
|
"loss": 0.0662, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8834355828220859, |
|
"grad_norm": 1.9429047212464141, |
|
"learning_rate": 5.910387997512573e-06, |
|
"loss": 0.0845, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.885480572597137, |
|
"grad_norm": 1.8862289067048144, |
|
"learning_rate": 5.894590493818149e-06, |
|
"loss": 0.074, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8875255623721882, |
|
"grad_norm": 1.4871525287185456, |
|
"learning_rate": 5.8787837591834415e-06, |
|
"loss": 0.0642, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8895705521472392, |
|
"grad_norm": 1.9230413221781277, |
|
"learning_rate": 5.86296795671216e-06, |
|
"loss": 0.0854, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8916155419222904, |
|
"grad_norm": 1.8042936065902104, |
|
"learning_rate": 5.847143249601575e-06, |
|
"loss": 0.0733, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8936605316973415, |
|
"grad_norm": 1.89659500750371, |
|
"learning_rate": 5.831309801140841e-06, |
|
"loss": 0.0717, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8957055214723927, |
|
"grad_norm": 1.988875729296592, |
|
"learning_rate": 5.815467774709314e-06, |
|
"loss": 0.0901, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8977505112474438, |
|
"grad_norm": 2.1651335543706365, |
|
"learning_rate": 5.799617333774861e-06, |
|
"loss": 0.0942, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8997955010224948, |
|
"grad_norm": 1.694629036784553, |
|
"learning_rate": 5.783758641892172e-06, |
|
"loss": 0.0691, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.901840490797546, |
|
"grad_norm": 1.8724577454949232, |
|
"learning_rate": 5.767891862701081e-06, |
|
"loss": 0.0704, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9038854805725971, |
|
"grad_norm": 2.1444156343749103, |
|
"learning_rate": 5.7520171599248704e-06, |
|
"loss": 0.0862, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9059304703476483, |
|
"grad_norm": 1.6044981562664562, |
|
"learning_rate": 5.73613469736858e-06, |
|
"loss": 0.0695, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9079754601226994, |
|
"grad_norm": 1.7887677604270025, |
|
"learning_rate": 5.7202446389173225e-06, |
|
"loss": 0.0776, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9100204498977505, |
|
"grad_norm": 2.0623558286912487, |
|
"learning_rate": 5.704347148534589e-06, |
|
"loss": 0.0939, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9120654396728016, |
|
"grad_norm": 1.7656943163705168, |
|
"learning_rate": 5.688442390260559e-06, |
|
"loss": 0.0699, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9141104294478528, |
|
"grad_norm": 1.950808092154816, |
|
"learning_rate": 5.672530528210405e-06, |
|
"loss": 0.0764, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9161554192229039, |
|
"grad_norm": 1.5958859437062274, |
|
"learning_rate": 5.656611726572601e-06, |
|
"loss": 0.0707, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.918200408997955, |
|
"grad_norm": 2.106375056034876, |
|
"learning_rate": 5.640686149607228e-06, |
|
"loss": 0.0884, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9202453987730062, |
|
"grad_norm": 1.6944267542875595, |
|
"learning_rate": 5.624753961644281e-06, |
|
"loss": 0.0705, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9222903885480572, |
|
"grad_norm": 1.7841030194649183, |
|
"learning_rate": 5.608815327081969e-06, |
|
"loss": 0.0765, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9243353783231084, |
|
"grad_norm": 1.7712077615995716, |
|
"learning_rate": 5.592870410385021e-06, |
|
"loss": 0.0733, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9263803680981595, |
|
"grad_norm": 2.1116933877527835, |
|
"learning_rate": 5.57691937608299e-06, |
|
"loss": 0.0913, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9284253578732107, |
|
"grad_norm": 1.4163030121649893, |
|
"learning_rate": 5.560962388768554e-06, |
|
"loss": 0.0545, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9304703476482618, |
|
"grad_norm": 1.810312240995325, |
|
"learning_rate": 5.5449996130958185e-06, |
|
"loss": 0.0754, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9325153374233128, |
|
"grad_norm": 1.7804851440319986, |
|
"learning_rate": 5.529031213778615e-06, |
|
"loss": 0.0647, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.934560327198364, |
|
"grad_norm": 2.2045196131947624, |
|
"learning_rate": 5.513057355588804e-06, |
|
"loss": 0.0891, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9366053169734151, |
|
"grad_norm": 1.9852749682627289, |
|
"learning_rate": 5.497078203354577e-06, |
|
"loss": 0.0775, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9386503067484663, |
|
"grad_norm": 1.831470663502445, |
|
"learning_rate": 5.481093921958749e-06, |
|
"loss": 0.0845, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9406952965235174, |
|
"grad_norm": 2.11329473791922, |
|
"learning_rate": 5.4651046763370615e-06, |
|
"loss": 0.0797, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9427402862985685, |
|
"grad_norm": 1.936029472084334, |
|
"learning_rate": 5.449110631476481e-06, |
|
"loss": 0.0626, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9447852760736196, |
|
"grad_norm": 2.8880649224481254, |
|
"learning_rate": 5.433111952413496e-06, |
|
"loss": 0.0876, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9468302658486708, |
|
"grad_norm": 1.6788136591444187, |
|
"learning_rate": 5.417108804232409e-06, |
|
"loss": 0.0802, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9488752556237219, |
|
"grad_norm": 1.7603381558531794, |
|
"learning_rate": 5.4011013520636466e-06, |
|
"loss": 0.0711, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.950920245398773, |
|
"grad_norm": 1.6546291038527539, |
|
"learning_rate": 5.385089761082039e-06, |
|
"loss": 0.0718, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9529652351738241, |
|
"grad_norm": 1.7527461122946937, |
|
"learning_rate": 5.3690741965051255e-06, |
|
"loss": 0.0772, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9550102249488752, |
|
"grad_norm": 2.153339872012431, |
|
"learning_rate": 5.353054823591446e-06, |
|
"loss": 0.0984, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9570552147239264, |
|
"grad_norm": 1.663490695062259, |
|
"learning_rate": 5.3370318076388405e-06, |
|
"loss": 0.0719, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9591002044989775, |
|
"grad_norm": 2.039791879502307, |
|
"learning_rate": 5.3210053139827374e-06, |
|
"loss": 0.0852, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9611451942740287, |
|
"grad_norm": 1.5152660819257473, |
|
"learning_rate": 5.304975507994453e-06, |
|
"loss": 0.0705, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9631901840490797, |
|
"grad_norm": 2.5741046076702485, |
|
"learning_rate": 5.288942555079479e-06, |
|
"loss": 0.0841, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9652351738241309, |
|
"grad_norm": 1.9038985725819735, |
|
"learning_rate": 5.27290662067578e-06, |
|
"loss": 0.0852, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.967280163599182, |
|
"grad_norm": 2.287787910789673, |
|
"learning_rate": 5.256867870252087e-06, |
|
"loss": 0.0943, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9693251533742331, |
|
"grad_norm": 2.001848621526479, |
|
"learning_rate": 5.240826469306187e-06, |
|
"loss": 0.0784, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9713701431492843, |
|
"grad_norm": 2.1169709747380865, |
|
"learning_rate": 5.224782583363215e-06, |
|
"loss": 0.0841, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9734151329243353, |
|
"grad_norm": 1.929731685506713, |
|
"learning_rate": 5.208736377973954e-06, |
|
"loss": 0.0749, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.9754601226993865, |
|
"grad_norm": 1.67776042592261, |
|
"learning_rate": 5.1926880187131134e-06, |
|
"loss": 0.0724, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9775051124744376, |
|
"grad_norm": 2.298605193205057, |
|
"learning_rate": 5.176637671177631e-06, |
|
"loss": 0.1006, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9795501022494888, |
|
"grad_norm": 1.6533628146778505, |
|
"learning_rate": 5.160585500984962e-06, |
|
"loss": 0.0646, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9815950920245399, |
|
"grad_norm": 1.837841443310845, |
|
"learning_rate": 5.144531673771364e-06, |
|
"loss": 0.0735, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.983640081799591, |
|
"grad_norm": 2.0263842675819426, |
|
"learning_rate": 5.1284763551901995e-06, |
|
"loss": 0.0826, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9856850715746421, |
|
"grad_norm": 1.7313226963602824, |
|
"learning_rate": 5.112419710910213e-06, |
|
"loss": 0.0672, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.9877300613496932, |
|
"grad_norm": 2.015542364940025, |
|
"learning_rate": 5.096361906613836e-06, |
|
"loss": 0.0782, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9897750511247444, |
|
"grad_norm": 1.514253640731746, |
|
"learning_rate": 5.080303107995461e-06, |
|
"loss": 0.0737, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9918200408997955, |
|
"grad_norm": 1.8395138697043611, |
|
"learning_rate": 5.064243480759749e-06, |
|
"loss": 0.0718, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9938650306748467, |
|
"grad_norm": 1.884215459624302, |
|
"learning_rate": 5.048183190619904e-06, |
|
"loss": 0.0698, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9959100204498977, |
|
"grad_norm": 2.146037958767005, |
|
"learning_rate": 5.032122403295977e-06, |
|
"loss": 0.0902, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9979550102249489, |
|
"grad_norm": 2.050214345057994, |
|
"learning_rate": 5.016061284513142e-06, |
|
"loss": 0.0682, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.118455250575704, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0774, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.0020449897750512, |
|
"grad_norm": 1.2462794649364155, |
|
"learning_rate": 4.983938715486858e-06, |
|
"loss": 0.033, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0040899795501022, |
|
"grad_norm": 1.1459651928513004, |
|
"learning_rate": 4.967877596704026e-06, |
|
"loss": 0.0332, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.0061349693251533, |
|
"grad_norm": 1.2067420298473397, |
|
"learning_rate": 4.951816809380098e-06, |
|
"loss": 0.0286, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.0081799591002045, |
|
"grad_norm": 1.5512568893071932, |
|
"learning_rate": 4.935756519240253e-06, |
|
"loss": 0.0371, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.0102249488752557, |
|
"grad_norm": 1.0286360100738143, |
|
"learning_rate": 4.919696892004539e-06, |
|
"loss": 0.0302, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.0122699386503067, |
|
"grad_norm": 1.1516477911852547, |
|
"learning_rate": 4.903638093386167e-06, |
|
"loss": 0.0369, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.0143149284253579, |
|
"grad_norm": 1.245679943150789, |
|
"learning_rate": 4.887580289089788e-06, |
|
"loss": 0.0301, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.016359918200409, |
|
"grad_norm": 1.703967009754419, |
|
"learning_rate": 4.871523644809802e-06, |
|
"loss": 0.0466, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.01840490797546, |
|
"grad_norm": 1.546303848260575, |
|
"learning_rate": 4.855468326228638e-06, |
|
"loss": 0.0318, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.0204498977505112, |
|
"grad_norm": 1.251270953754372, |
|
"learning_rate": 4.839414499015041e-06, |
|
"loss": 0.0263, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0224948875255624, |
|
"grad_norm": 1.3764180941343123, |
|
"learning_rate": 4.82336232882237e-06, |
|
"loss": 0.0345, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0245398773006136, |
|
"grad_norm": 1.3372315078519637, |
|
"learning_rate": 4.807311981286888e-06, |
|
"loss": 0.0292, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.0265848670756645, |
|
"grad_norm": 1.544786930842698, |
|
"learning_rate": 4.791263622026048e-06, |
|
"loss": 0.0307, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.0286298568507157, |
|
"grad_norm": 1.3878895340607698, |
|
"learning_rate": 4.775217416636786e-06, |
|
"loss": 0.0326, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.030674846625767, |
|
"grad_norm": 1.6162202894342939, |
|
"learning_rate": 4.7591735306938144e-06, |
|
"loss": 0.0352, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.032719836400818, |
|
"grad_norm": 1.7805340361439255, |
|
"learning_rate": 4.7431321297479135e-06, |
|
"loss": 0.0372, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.034764826175869, |
|
"grad_norm": 1.758896549825897, |
|
"learning_rate": 4.727093379324222e-06, |
|
"loss": 0.0372, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.0368098159509203, |
|
"grad_norm": 1.431786070102582, |
|
"learning_rate": 4.711057444920522e-06, |
|
"loss": 0.0384, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.0388548057259714, |
|
"grad_norm": 1.4839513210400883, |
|
"learning_rate": 4.6950244920055475e-06, |
|
"loss": 0.0383, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.0408997955010224, |
|
"grad_norm": 1.4451001891145334, |
|
"learning_rate": 4.678994686017263e-06, |
|
"loss": 0.0352, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.0429447852760736, |
|
"grad_norm": 1.866286088536185, |
|
"learning_rate": 4.662968192361161e-06, |
|
"loss": 0.0395, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0449897750511248, |
|
"grad_norm": 1.715211234345797, |
|
"learning_rate": 4.646945176408555e-06, |
|
"loss": 0.0313, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.047034764826176, |
|
"grad_norm": 1.466087161229122, |
|
"learning_rate": 4.630925803494877e-06, |
|
"loss": 0.0386, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.049079754601227, |
|
"grad_norm": 1.9408338143723025, |
|
"learning_rate": 4.614910238917963e-06, |
|
"loss": 0.042, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0511247443762781, |
|
"grad_norm": 1.4266586107150059, |
|
"learning_rate": 4.598898647936354e-06, |
|
"loss": 0.0392, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0531697341513293, |
|
"grad_norm": 1.2356672924767518, |
|
"learning_rate": 4.582891195767591e-06, |
|
"loss": 0.0263, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.0552147239263803, |
|
"grad_norm": 1.7715488824913876, |
|
"learning_rate": 4.5668880475865074e-06, |
|
"loss": 0.0405, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.0572597137014315, |
|
"grad_norm": 1.5415710251918855, |
|
"learning_rate": 4.55088936852352e-06, |
|
"loss": 0.0357, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.0593047034764826, |
|
"grad_norm": 1.4428168778927553, |
|
"learning_rate": 4.534895323662939e-06, |
|
"loss": 0.0317, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.0613496932515338, |
|
"grad_norm": 2.2250878381040953, |
|
"learning_rate": 4.518906078041252e-06, |
|
"loss": 0.0415, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.0633946830265848, |
|
"grad_norm": 1.9143713762008936, |
|
"learning_rate": 4.502921796645424e-06, |
|
"loss": 0.0525, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.065439672801636, |
|
"grad_norm": 1.3675839835879693, |
|
"learning_rate": 4.486942644411197e-06, |
|
"loss": 0.0308, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.0674846625766872, |
|
"grad_norm": 1.561173769749881, |
|
"learning_rate": 4.4709687862213866e-06, |
|
"loss": 0.0314, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.0695296523517381, |
|
"grad_norm": 1.7909630810420591, |
|
"learning_rate": 4.455000386904185e-06, |
|
"loss": 0.0434, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.0715746421267893, |
|
"grad_norm": 1.5523968118402804, |
|
"learning_rate": 4.439037611231448e-06, |
|
"loss": 0.0303, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.0736196319018405, |
|
"grad_norm": 1.4974277205131226, |
|
"learning_rate": 4.423080623917012e-06, |
|
"loss": 0.026, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0756646216768917, |
|
"grad_norm": 1.4404941302278607, |
|
"learning_rate": 4.40712958961498e-06, |
|
"loss": 0.0439, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.0777096114519427, |
|
"grad_norm": 1.4209943648710208, |
|
"learning_rate": 4.391184672918034e-06, |
|
"loss": 0.033, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0797546012269938, |
|
"grad_norm": 1.555014037990696, |
|
"learning_rate": 4.3752460383557195e-06, |
|
"loss": 0.0326, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.081799591002045, |
|
"grad_norm": 2.090310209970452, |
|
"learning_rate": 4.3593138503927725e-06, |
|
"loss": 0.0365, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.0838445807770962, |
|
"grad_norm": 1.4619403629973973, |
|
"learning_rate": 4.3433882734274e-06, |
|
"loss": 0.0317, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.0858895705521472, |
|
"grad_norm": 1.714317746744886, |
|
"learning_rate": 4.327469471789597e-06, |
|
"loss": 0.0384, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.0879345603271984, |
|
"grad_norm": 1.590590872486597, |
|
"learning_rate": 4.311557609739442e-06, |
|
"loss": 0.0259, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.0899795501022496, |
|
"grad_norm": 1.1141369910738055, |
|
"learning_rate": 4.295652851465412e-06, |
|
"loss": 0.0252, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.0920245398773005, |
|
"grad_norm": 1.4321694078456395, |
|
"learning_rate": 4.27975536108268e-06, |
|
"loss": 0.0266, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.0940695296523517, |
|
"grad_norm": 1.7443372340301369, |
|
"learning_rate": 4.263865302631423e-06, |
|
"loss": 0.04, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.096114519427403, |
|
"grad_norm": 1.569615321607395, |
|
"learning_rate": 4.24798284007513e-06, |
|
"loss": 0.0349, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.098159509202454, |
|
"grad_norm": 1.895924350646276, |
|
"learning_rate": 4.2321081372989195e-06, |
|
"loss": 0.0424, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.100204498977505, |
|
"grad_norm": 1.5923290036258984, |
|
"learning_rate": 4.216241358107831e-06, |
|
"loss": 0.0327, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.1022494887525562, |
|
"grad_norm": 1.6634363551936802, |
|
"learning_rate": 4.200382666225141e-06, |
|
"loss": 0.0486, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.1042944785276074, |
|
"grad_norm": 1.134137554773839, |
|
"learning_rate": 4.184532225290687e-06, |
|
"loss": 0.0223, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1063394683026584, |
|
"grad_norm": 1.426415380744953, |
|
"learning_rate": 4.16869019885916e-06, |
|
"loss": 0.0312, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.1083844580777096, |
|
"grad_norm": 1.1710483257547355, |
|
"learning_rate": 4.152856750398426e-06, |
|
"loss": 0.0223, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.1104294478527608, |
|
"grad_norm": 1.5906880940400463, |
|
"learning_rate": 4.137032043287841e-06, |
|
"loss": 0.0343, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.112474437627812, |
|
"grad_norm": 1.4350816491421392, |
|
"learning_rate": 4.121216240816559e-06, |
|
"loss": 0.035, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.114519427402863, |
|
"grad_norm": 1.7558271509204728, |
|
"learning_rate": 4.105409506181855e-06, |
|
"loss": 0.0378, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.116564417177914, |
|
"grad_norm": 1.427323382586195, |
|
"learning_rate": 4.089612002487428e-06, |
|
"loss": 0.0312, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.1186094069529653, |
|
"grad_norm": 1.6522808692997277, |
|
"learning_rate": 4.0738238927417354e-06, |
|
"loss": 0.0359, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.1206543967280163, |
|
"grad_norm": 2.1347991414153986, |
|
"learning_rate": 4.0580453398563005e-06, |
|
"loss": 0.0336, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.1226993865030674, |
|
"grad_norm": 1.1279124304678423, |
|
"learning_rate": 4.042276506644024e-06, |
|
"loss": 0.0245, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.1247443762781186, |
|
"grad_norm": 1.1967369793368918, |
|
"learning_rate": 4.026517555817527e-06, |
|
"loss": 0.034, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1267893660531698, |
|
"grad_norm": 1.543038828112648, |
|
"learning_rate": 4.010768649987446e-06, |
|
"loss": 0.0323, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.1288343558282208, |
|
"grad_norm": 2.005013019238116, |
|
"learning_rate": 3.995029951660777e-06, |
|
"loss": 0.0466, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.130879345603272, |
|
"grad_norm": 1.6139182636953708, |
|
"learning_rate": 3.979301623239177e-06, |
|
"loss": 0.0358, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.1329243353783232, |
|
"grad_norm": 1.3981520538108454, |
|
"learning_rate": 3.963583827017311e-06, |
|
"loss": 0.0377, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.1349693251533743, |
|
"grad_norm": 1.2494252534129648, |
|
"learning_rate": 3.94787672518116e-06, |
|
"loss": 0.0239, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.1370143149284253, |
|
"grad_norm": 2.204641878373284, |
|
"learning_rate": 3.932180479806357e-06, |
|
"loss": 0.0456, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.1390593047034765, |
|
"grad_norm": 1.565570789271408, |
|
"learning_rate": 3.916495252856506e-06, |
|
"loss": 0.0324, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.1411042944785277, |
|
"grad_norm": 1.6789711476896356, |
|
"learning_rate": 3.900821206181521e-06, |
|
"loss": 0.0368, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.1431492842535786, |
|
"grad_norm": 1.2216110777737839, |
|
"learning_rate": 3.885158501515954e-06, |
|
"loss": 0.0279, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.1451942740286298, |
|
"grad_norm": 1.3633350295226845, |
|
"learning_rate": 3.869507300477311e-06, |
|
"loss": 0.0328, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.147239263803681, |
|
"grad_norm": 1.4166924061545885, |
|
"learning_rate": 3.853867764564409e-06, |
|
"loss": 0.0329, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.149284253578732, |
|
"grad_norm": 1.6034718030946522, |
|
"learning_rate": 3.838240055155692e-06, |
|
"loss": 0.0334, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.1513292433537832, |
|
"grad_norm": 1.077423978891052, |
|
"learning_rate": 3.8226243335075715e-06, |
|
"loss": 0.0224, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1533742331288344, |
|
"grad_norm": 1.3542581793543431, |
|
"learning_rate": 3.8070207607527587e-06, |
|
"loss": 0.0319, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.1554192229038855, |
|
"grad_norm": 1.6634879057734975, |
|
"learning_rate": 3.7914294978986083e-06, |
|
"loss": 0.0393, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.1574642126789365, |
|
"grad_norm": 1.628283419385412, |
|
"learning_rate": 3.7758507058254547e-06, |
|
"loss": 0.036, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.1595092024539877, |
|
"grad_norm": 1.5659369656664788, |
|
"learning_rate": 3.760284545284947e-06, |
|
"loss": 0.0277, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.1615541922290389, |
|
"grad_norm": 1.4921911012284916, |
|
"learning_rate": 3.744731176898396e-06, |
|
"loss": 0.0389, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.16359918200409, |
|
"grad_norm": 1.8188186488286036, |
|
"learning_rate": 3.7291907611551197e-06, |
|
"loss": 0.0521, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.165644171779141, |
|
"grad_norm": 1.7461144499706955, |
|
"learning_rate": 3.7136634584107787e-06, |
|
"loss": 0.0314, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.1676891615541922, |
|
"grad_norm": 1.5152391192451116, |
|
"learning_rate": 3.69814942888572e-06, |
|
"loss": 0.0395, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.1697341513292434, |
|
"grad_norm": 1.2278434943664795, |
|
"learning_rate": 3.6826488326633393e-06, |
|
"loss": 0.03, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.1717791411042944, |
|
"grad_norm": 1.3578782487107308, |
|
"learning_rate": 3.6671618296884147e-06, |
|
"loss": 0.0329, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.1738241308793456, |
|
"grad_norm": 1.0722909218685073, |
|
"learning_rate": 3.6516885797654593e-06, |
|
"loss": 0.024, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.1758691206543967, |
|
"grad_norm": 1.3302319522941752, |
|
"learning_rate": 3.6362292425570754e-06, |
|
"loss": 0.0281, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.177914110429448, |
|
"grad_norm": 1.574439290096253, |
|
"learning_rate": 3.620783977582305e-06, |
|
"loss": 0.0342, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.179959100204499, |
|
"grad_norm": 1.2172183727962829, |
|
"learning_rate": 3.605352944214986e-06, |
|
"loss": 0.026, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.18200408997955, |
|
"grad_norm": 1.4383648089060725, |
|
"learning_rate": 3.5899363016821e-06, |
|
"loss": 0.0265, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.1840490797546013, |
|
"grad_norm": 1.716222220393621, |
|
"learning_rate": 3.5745342090621406e-06, |
|
"loss": 0.0316, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.1860940695296525, |
|
"grad_norm": 1.3303476968967134, |
|
"learning_rate": 3.5591468252834654e-06, |
|
"loss": 0.0298, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.1881390593047034, |
|
"grad_norm": 1.226061907060063, |
|
"learning_rate": 3.543774309122657e-06, |
|
"loss": 0.0209, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.1901840490797546, |
|
"grad_norm": 1.656144699228516, |
|
"learning_rate": 3.528416819202881e-06, |
|
"loss": 0.0332, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.1922290388548058, |
|
"grad_norm": 1.5013453797233454, |
|
"learning_rate": 3.5130745139922572e-06, |
|
"loss": 0.0288, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.1942740286298568, |
|
"grad_norm": 1.9928513490408657, |
|
"learning_rate": 3.497747551802221e-06, |
|
"loss": 0.0521, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.196319018404908, |
|
"grad_norm": 1.2521586168450574, |
|
"learning_rate": 3.4824360907858824e-06, |
|
"loss": 0.0274, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.1983640081799591, |
|
"grad_norm": 1.2256691948629876, |
|
"learning_rate": 3.467140288936407e-06, |
|
"loss": 0.0282, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.20040899795501, |
|
"grad_norm": 1.6527331639972576, |
|
"learning_rate": 3.4518603040853783e-06, |
|
"loss": 0.0436, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.2024539877300613, |
|
"grad_norm": 1.368490830870422, |
|
"learning_rate": 3.43659629390117e-06, |
|
"loss": 0.0254, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.2044989775051125, |
|
"grad_norm": 1.8028951429047948, |
|
"learning_rate": 3.421348415887315e-06, |
|
"loss": 0.0408, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.2065439672801637, |
|
"grad_norm": 1.497550290975889, |
|
"learning_rate": 3.4061168273808896e-06, |
|
"loss": 0.0381, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2085889570552146, |
|
"grad_norm": 1.6321033308692612, |
|
"learning_rate": 3.390901685550887e-06, |
|
"loss": 0.0383, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.2106339468302658, |
|
"grad_norm": 1.5159929907176852, |
|
"learning_rate": 3.3757031473965827e-06, |
|
"loss": 0.0304, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.212678936605317, |
|
"grad_norm": 2.4724108292496187, |
|
"learning_rate": 3.360521369745937e-06, |
|
"loss": 0.0518, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.2147239263803682, |
|
"grad_norm": 1.437590640293289, |
|
"learning_rate": 3.3453565092539586e-06, |
|
"loss": 0.0257, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.2167689161554192, |
|
"grad_norm": 1.169486503639217, |
|
"learning_rate": 3.330208722401097e-06, |
|
"loss": 0.0235, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.2188139059304703, |
|
"grad_norm": 1.1268592276904335, |
|
"learning_rate": 3.315078165491622e-06, |
|
"loss": 0.0279, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.2208588957055215, |
|
"grad_norm": 1.5683278793352897, |
|
"learning_rate": 3.299964994652017e-06, |
|
"loss": 0.0305, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.2229038854805725, |
|
"grad_norm": 1.9967429863243036, |
|
"learning_rate": 3.2848693658293675e-06, |
|
"loss": 0.0397, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.2249488752556237, |
|
"grad_norm": 1.4152854852084966, |
|
"learning_rate": 3.269791434789741e-06, |
|
"loss": 0.0256, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.2269938650306749, |
|
"grad_norm": 1.2653327623743533, |
|
"learning_rate": 3.254731357116597e-06, |
|
"loss": 0.029, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2269938650306749, |
|
"eval_loss": 0.07708186656236649, |
|
"eval_runtime": 1.5947, |
|
"eval_samples_per_second": 25.083, |
|
"eval_steps_per_second": 6.271, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.229038854805726, |
|
"grad_norm": 1.775392827404977, |
|
"learning_rate": 3.2396892882091678e-06, |
|
"loss": 0.0379, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.231083844580777, |
|
"grad_norm": 1.3830173529601073, |
|
"learning_rate": 3.2246653832808674e-06, |
|
"loss": 0.0288, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.2331288343558282, |
|
"grad_norm": 2.1155311480126264, |
|
"learning_rate": 3.209659797357669e-06, |
|
"loss": 0.0615, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.2351738241308794, |
|
"grad_norm": 2.4445073717181662, |
|
"learning_rate": 3.1946726852765325e-06, |
|
"loss": 0.0635, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.2372188139059306, |
|
"grad_norm": 2.110428549540264, |
|
"learning_rate": 3.179704201683786e-06, |
|
"loss": 0.0364, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.2392638036809815, |
|
"grad_norm": 1.7469628121389942, |
|
"learning_rate": 3.16475450103354e-06, |
|
"loss": 0.0372, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.2413087934560327, |
|
"grad_norm": 0.9152243306182197, |
|
"learning_rate": 3.149823737586089e-06, |
|
"loss": 0.0161, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.243353783231084, |
|
"grad_norm": 1.3056210800799668, |
|
"learning_rate": 3.1349120654063224e-06, |
|
"loss": 0.0266, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.2453987730061349, |
|
"grad_norm": 1.2879845659396767, |
|
"learning_rate": 3.1200196383621363e-06, |
|
"loss": 0.0274, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.247443762781186, |
|
"grad_norm": 1.382752868609299, |
|
"learning_rate": 3.105146610122839e-06, |
|
"loss": 0.0303, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2494887525562373, |
|
"grad_norm": 1.433137069436945, |
|
"learning_rate": 3.090293134157572e-06, |
|
"loss": 0.0259, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.2515337423312882, |
|
"grad_norm": 1.2272571061501605, |
|
"learning_rate": 3.0754593637337276e-06, |
|
"loss": 0.0305, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.2535787321063394, |
|
"grad_norm": 1.8924881887997287, |
|
"learning_rate": 3.0606454519153608e-06, |
|
"loss": 0.0478, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.2556237218813906, |
|
"grad_norm": 1.5252407085003916, |
|
"learning_rate": 3.0458515515616117e-06, |
|
"loss": 0.0382, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.2576687116564418, |
|
"grad_norm": 1.2566622827157716, |
|
"learning_rate": 3.0310778153251325e-06, |
|
"loss": 0.0265, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.259713701431493, |
|
"grad_norm": 1.5092416262198107, |
|
"learning_rate": 3.0163243956505093e-06, |
|
"loss": 0.0313, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.261758691206544, |
|
"grad_norm": 2.0141840286367083, |
|
"learning_rate": 3.001591444772687e-06, |
|
"loss": 0.0373, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.2638036809815951, |
|
"grad_norm": 1.0802484572404294, |
|
"learning_rate": 2.986879114715403e-06, |
|
"loss": 0.0266, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.2658486707566463, |
|
"grad_norm": 1.359876254126494, |
|
"learning_rate": 2.972187557289616e-06, |
|
"loss": 0.0305, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.2678936605316973, |
|
"grad_norm": 1.3671926484976908, |
|
"learning_rate": 2.95751692409194e-06, |
|
"loss": 0.0296, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2699386503067485, |
|
"grad_norm": 1.553369266205434, |
|
"learning_rate": 2.9428673665030772e-06, |
|
"loss": 0.0352, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.2719836400817996, |
|
"grad_norm": 1.9647938781064505, |
|
"learning_rate": 2.9282390356862606e-06, |
|
"loss": 0.0414, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.2740286298568506, |
|
"grad_norm": 1.7057696677799985, |
|
"learning_rate": 2.9136320825856967e-06, |
|
"loss": 0.0364, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.2760736196319018, |
|
"grad_norm": 1.6026279841764746, |
|
"learning_rate": 2.899046657924992e-06, |
|
"loss": 0.0411, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.278118609406953, |
|
"grad_norm": 1.5405330605244225, |
|
"learning_rate": 2.884482912205621e-06, |
|
"loss": 0.0358, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.280163599182004, |
|
"grad_norm": 1.4374105894350884, |
|
"learning_rate": 2.8699409957053535e-06, |
|
"loss": 0.0267, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.2822085889570551, |
|
"grad_norm": 1.4661224711168332, |
|
"learning_rate": 2.8554210584767188e-06, |
|
"loss": 0.0271, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.2842535787321063, |
|
"grad_norm": 2.5207979346685963, |
|
"learning_rate": 2.840923250345442e-06, |
|
"loss": 0.0481, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.2862985685071575, |
|
"grad_norm": 1.617876668968538, |
|
"learning_rate": 2.8264477209089147e-06, |
|
"loss": 0.0369, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.2883435582822087, |
|
"grad_norm": 1.808249674605706, |
|
"learning_rate": 2.8119946195346375e-06, |
|
"loss": 0.0391, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.2903885480572597, |
|
"grad_norm": 1.3734714216015984, |
|
"learning_rate": 2.7975640953586846e-06, |
|
"loss": 0.0294, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.2924335378323109, |
|
"grad_norm": 1.391392647849848, |
|
"learning_rate": 2.78315629728417e-06, |
|
"loss": 0.0304, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.294478527607362, |
|
"grad_norm": 1.4706567091432965, |
|
"learning_rate": 2.7687713739796972e-06, |
|
"loss": 0.0302, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.296523517382413, |
|
"grad_norm": 1.3791514521388877, |
|
"learning_rate": 2.7544094738778436e-06, |
|
"loss": 0.0338, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.2985685071574642, |
|
"grad_norm": 1.9205903741411616, |
|
"learning_rate": 2.7400707451736103e-06, |
|
"loss": 0.0352, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.3006134969325154, |
|
"grad_norm": 1.2860131091648965, |
|
"learning_rate": 2.725755335822903e-06, |
|
"loss": 0.0305, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.3026584867075663, |
|
"grad_norm": 2.0441358593719396, |
|
"learning_rate": 2.7114633935410083e-06, |
|
"loss": 0.0381, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.3047034764826175, |
|
"grad_norm": 1.606441709854988, |
|
"learning_rate": 2.6971950658010666e-06, |
|
"loss": 0.0343, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.3067484662576687, |
|
"grad_norm": 1.1232676032960067, |
|
"learning_rate": 2.6829504998325352e-06, |
|
"loss": 0.0223, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.30879345603272, |
|
"grad_norm": 2.0695929953679353, |
|
"learning_rate": 2.6687298426196974e-06, |
|
"loss": 0.0437, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.310838445807771, |
|
"grad_norm": 1.2094015200347492, |
|
"learning_rate": 2.6545332409001267e-06, |
|
"loss": 0.0251, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.312883435582822, |
|
"grad_norm": 1.3109256389089359, |
|
"learning_rate": 2.6403608411631744e-06, |
|
"loss": 0.0319, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.3149284253578732, |
|
"grad_norm": 1.3994815087983556, |
|
"learning_rate": 2.62621278964846e-06, |
|
"loss": 0.0281, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.3169734151329244, |
|
"grad_norm": 1.3379418428305758, |
|
"learning_rate": 2.612089232344371e-06, |
|
"loss": 0.0301, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.3190184049079754, |
|
"grad_norm": 1.2825425624367577, |
|
"learning_rate": 2.5979903149865386e-06, |
|
"loss": 0.016, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.3210633946830266, |
|
"grad_norm": 1.1904405038306072, |
|
"learning_rate": 2.5839161830563475e-06, |
|
"loss": 0.0282, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.3231083844580778, |
|
"grad_norm": 1.4411097446473085, |
|
"learning_rate": 2.569866981779433e-06, |
|
"loss": 0.0312, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.3251533742331287, |
|
"grad_norm": 1.5429385898328931, |
|
"learning_rate": 2.555842856124182e-06, |
|
"loss": 0.0288, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.32719836400818, |
|
"grad_norm": 1.6647644695086803, |
|
"learning_rate": 2.541843950800226e-06, |
|
"loss": 0.0345, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.329243353783231, |
|
"grad_norm": 1.6594984133499624, |
|
"learning_rate": 2.527870410256966e-06, |
|
"loss": 0.0355, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.331288343558282, |
|
"grad_norm": 1.4712049332054007, |
|
"learning_rate": 2.513922378682075e-06, |
|
"loss": 0.0326, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 2.058004614790052, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.0497, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.3353783231083844, |
|
"grad_norm": 1.63215913934179, |
|
"learning_rate": 2.486103417870493e-06, |
|
"loss": 0.0407, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.3374233128834356, |
|
"grad_norm": 1.4235640655071966, |
|
"learning_rate": 2.472232775687119e-06, |
|
"loss": 0.0256, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.3394683026584868, |
|
"grad_norm": 1.4966832324121504, |
|
"learning_rate": 2.4583882165757766e-06, |
|
"loss": 0.0341, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.3415132924335378, |
|
"grad_norm": 1.579776697797337, |
|
"learning_rate": 2.4445698833932236e-06, |
|
"loss": 0.0318, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.343558282208589, |
|
"grad_norm": 1.5153710401247442, |
|
"learning_rate": 2.4307779187256064e-06, |
|
"loss": 0.041, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.3456032719836402, |
|
"grad_norm": 2.3410319215359343, |
|
"learning_rate": 2.417012464886978e-06, |
|
"loss": 0.0493, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.3476482617586911, |
|
"grad_norm": 1.7830705068876032, |
|
"learning_rate": 2.4032736639178443e-06, |
|
"loss": 0.038, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.3496932515337423, |
|
"grad_norm": 1.401656109282602, |
|
"learning_rate": 2.389561657583681e-06, |
|
"loss": 0.0314, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.3517382413087935, |
|
"grad_norm": 1.6005336710481401, |
|
"learning_rate": 2.3758765873734897e-06, |
|
"loss": 0.0339, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.3537832310838445, |
|
"grad_norm": 1.4717588678443954, |
|
"learning_rate": 2.3622185944983187e-06, |
|
"loss": 0.024, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.3558282208588956, |
|
"grad_norm": 1.4073986715417073, |
|
"learning_rate": 2.3485878198898253e-06, |
|
"loss": 0.0314, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.3578732106339468, |
|
"grad_norm": 1.1155919297375605, |
|
"learning_rate": 2.3349844041988044e-06, |
|
"loss": 0.0238, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.359918200408998, |
|
"grad_norm": 1.4447238413299701, |
|
"learning_rate": 2.3214084877937464e-06, |
|
"loss": 0.024, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.3619631901840492, |
|
"grad_norm": 1.4727637644326748, |
|
"learning_rate": 2.30786021075939e-06, |
|
"loss": 0.0352, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.3640081799591002, |
|
"grad_norm": 1.0917991783905898, |
|
"learning_rate": 2.294339712895271e-06, |
|
"loss": 0.02, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.3660531697341514, |
|
"grad_norm": 1.5740943427206295, |
|
"learning_rate": 2.28084713371428e-06, |
|
"loss": 0.0323, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.3680981595092025, |
|
"grad_norm": 1.4720527439260216, |
|
"learning_rate": 2.2673826124412314e-06, |
|
"loss": 0.0286, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.3701431492842535, |
|
"grad_norm": 1.4833939892417702, |
|
"learning_rate": 2.253946288011419e-06, |
|
"loss": 0.0342, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.3721881390593047, |
|
"grad_norm": 1.6876515961228076, |
|
"learning_rate": 2.240538299069178e-06, |
|
"loss": 0.0311, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.3742331288343559, |
|
"grad_norm": 2.1720167724269874, |
|
"learning_rate": 2.2271587839664673e-06, |
|
"loss": 0.0381, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.3762781186094069, |
|
"grad_norm": 1.5126928906252048, |
|
"learning_rate": 2.213807880761434e-06, |
|
"loss": 0.0332, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.378323108384458, |
|
"grad_norm": 1.6737538431685655, |
|
"learning_rate": 2.2004857272169878e-06, |
|
"loss": 0.0345, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.3803680981595092, |
|
"grad_norm": 1.426935375770983, |
|
"learning_rate": 2.18719246079938e-06, |
|
"loss": 0.0398, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3824130879345602, |
|
"grad_norm": 1.4051149662672344, |
|
"learning_rate": 2.173928218676792e-06, |
|
"loss": 0.0232, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.3844580777096114, |
|
"grad_norm": 1.7917331547528335, |
|
"learning_rate": 2.160693137717912e-06, |
|
"loss": 0.0368, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3865030674846626, |
|
"grad_norm": 1.8111522355910634, |
|
"learning_rate": 2.1474873544905204e-06, |
|
"loss": 0.0269, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.3885480572597138, |
|
"grad_norm": 1.6693031730647383, |
|
"learning_rate": 2.134311005260093e-06, |
|
"loss": 0.0362, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.390593047034765, |
|
"grad_norm": 1.4202013946415086, |
|
"learning_rate": 2.121164225988387e-06, |
|
"loss": 0.0298, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.392638036809816, |
|
"grad_norm": 1.3927664117864682, |
|
"learning_rate": 2.108047152332028e-06, |
|
"loss": 0.026, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.394683026584867, |
|
"grad_norm": 1.405359317118805, |
|
"learning_rate": 2.0949599196411326e-06, |
|
"loss": 0.0312, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.3967280163599183, |
|
"grad_norm": 1.2371988179013782, |
|
"learning_rate": 2.081902662957895e-06, |
|
"loss": 0.0214, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.3987730061349692, |
|
"grad_norm": 2.047236610352014, |
|
"learning_rate": 2.0688755170152e-06, |
|
"loss": 0.0421, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.4008179959100204, |
|
"grad_norm": 1.2055104899996096, |
|
"learning_rate": 2.0558786162352245e-06, |
|
"loss": 0.0218, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.4028629856850716, |
|
"grad_norm": 1.2042481090348163, |
|
"learning_rate": 2.042912094728068e-06, |
|
"loss": 0.0232, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.4049079754601226, |
|
"grad_norm": 1.965874166063246, |
|
"learning_rate": 2.029976086290347e-06, |
|
"loss": 0.0422, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.4069529652351738, |
|
"grad_norm": 1.7221753979316607, |
|
"learning_rate": 2.017070724403835e-06, |
|
"loss": 0.0315, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.408997955010225, |
|
"grad_norm": 1.319102902846999, |
|
"learning_rate": 2.004196142234068e-06, |
|
"loss": 0.0315, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.4110429447852761, |
|
"grad_norm": 0.9513064566229582, |
|
"learning_rate": 1.9913524726289784e-06, |
|
"loss": 0.0168, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4130879345603273, |
|
"grad_norm": 1.9447952357011042, |
|
"learning_rate": 1.9785398481175295e-06, |
|
"loss": 0.0413, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.4151329243353783, |
|
"grad_norm": 1.5286895548644743, |
|
"learning_rate": 1.965758400908334e-06, |
|
"loss": 0.0274, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.4171779141104295, |
|
"grad_norm": 1.1539526277463092, |
|
"learning_rate": 1.9530082628883058e-06, |
|
"loss": 0.0239, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.4192229038854807, |
|
"grad_norm": 1.6908331934705023, |
|
"learning_rate": 1.9402895656212834e-06, |
|
"loss": 0.0342, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.4212678936605316, |
|
"grad_norm": 2.2914630227874886, |
|
"learning_rate": 1.927602440346687e-06, |
|
"loss": 0.0414, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.4233128834355828, |
|
"grad_norm": 1.4300685831945064, |
|
"learning_rate": 1.914947017978153e-06, |
|
"loss": 0.0272, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.425357873210634, |
|
"grad_norm": 1.119854466298958, |
|
"learning_rate": 1.9023234291021875e-06, |
|
"loss": 0.0237, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.427402862985685, |
|
"grad_norm": 2.157933270356651, |
|
"learning_rate": 1.889731803976822e-06, |
|
"loss": 0.0365, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.4294478527607362, |
|
"grad_norm": 2.1495827518419017, |
|
"learning_rate": 1.8771722725302644e-06, |
|
"loss": 0.0421, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.4314928425357873, |
|
"grad_norm": 1.4403502460755069, |
|
"learning_rate": 1.8646449643595565e-06, |
|
"loss": 0.0256, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4335378323108383, |
|
"grad_norm": 1.612284239493657, |
|
"learning_rate": 1.8521500087292466e-06, |
|
"loss": 0.0314, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.4355828220858895, |
|
"grad_norm": 1.1017923126212417, |
|
"learning_rate": 1.8396875345700498e-06, |
|
"loss": 0.022, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.4376278118609407, |
|
"grad_norm": 1.6446468659290325, |
|
"learning_rate": 1.8272576704775074e-06, |
|
"loss": 0.0416, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.4396728016359919, |
|
"grad_norm": 1.3298795930204095, |
|
"learning_rate": 1.81486054471068e-06, |
|
"loss": 0.0269, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.441717791411043, |
|
"grad_norm": 1.0537263463371598, |
|
"learning_rate": 1.8024962851908106e-06, |
|
"loss": 0.022, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.443762781186094, |
|
"grad_norm": 1.569014799305421, |
|
"learning_rate": 1.790165019500007e-06, |
|
"loss": 0.027, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.4458077709611452, |
|
"grad_norm": 1.1169910353575982, |
|
"learning_rate": 1.7778668748799244e-06, |
|
"loss": 0.0214, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.4478527607361964, |
|
"grad_norm": 1.4170681283218884, |
|
"learning_rate": 1.7656019782304602e-06, |
|
"loss": 0.0241, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.4498977505112474, |
|
"grad_norm": 1.548818986804255, |
|
"learning_rate": 1.7533704561084331e-06, |
|
"loss": 0.0362, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.4519427402862985, |
|
"grad_norm": 2.0680503202924028, |
|
"learning_rate": 1.7411724347262826e-06, |
|
"loss": 0.0431, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.4539877300613497, |
|
"grad_norm": 1.4173336455080414, |
|
"learning_rate": 1.729008039950772e-06, |
|
"loss": 0.0279, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.4560327198364007, |
|
"grad_norm": 1.7820106072819453, |
|
"learning_rate": 1.7168773973016779e-06, |
|
"loss": 0.0353, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.4580777096114519, |
|
"grad_norm": 1.3988149854171141, |
|
"learning_rate": 1.7047806319505079e-06, |
|
"loss": 0.0271, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.460122699386503, |
|
"grad_norm": 1.390929649329335, |
|
"learning_rate": 1.6927178687191953e-06, |
|
"loss": 0.0256, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.4621676891615543, |
|
"grad_norm": 1.5277454496972025, |
|
"learning_rate": 1.680689232078827e-06, |
|
"loss": 0.0312, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.4642126789366054, |
|
"grad_norm": 1.9787662527459544, |
|
"learning_rate": 1.6686948461483432e-06, |
|
"loss": 0.0297, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.4662576687116564, |
|
"grad_norm": 1.3331939153009726, |
|
"learning_rate": 1.656734834693266e-06, |
|
"loss": 0.0269, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.4683026584867076, |
|
"grad_norm": 2.1133774298806784, |
|
"learning_rate": 1.6448093211244232e-06, |
|
"loss": 0.048, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.4703476482617588, |
|
"grad_norm": 1.9547321525275854, |
|
"learning_rate": 1.6329184284966675e-06, |
|
"loss": 0.0428, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.4723926380368098, |
|
"grad_norm": 1.5090987203091184, |
|
"learning_rate": 1.621062279507617e-06, |
|
"loss": 0.0305, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.474437627811861, |
|
"grad_norm": 0.9195367104860552, |
|
"learning_rate": 1.6092409964963779e-06, |
|
"loss": 0.0189, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.4764826175869121, |
|
"grad_norm": 1.9963374669225287, |
|
"learning_rate": 1.597454701442288e-06, |
|
"loss": 0.0385, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.478527607361963, |
|
"grad_norm": 1.5113737285476996, |
|
"learning_rate": 1.5857035159636625e-06, |
|
"loss": 0.033, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.4805725971370143, |
|
"grad_norm": 2.004845056871369, |
|
"learning_rate": 1.5739875613165283e-06, |
|
"loss": 0.0339, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.4826175869120655, |
|
"grad_norm": 0.9984169610067929, |
|
"learning_rate": 1.5623069583933836e-06, |
|
"loss": 0.02, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.4846625766871164, |
|
"grad_norm": 1.6259268058261294, |
|
"learning_rate": 1.550661827721941e-06, |
|
"loss": 0.0273, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.4867075664621676, |
|
"grad_norm": 1.6297643950263438, |
|
"learning_rate": 1.5390522894638937e-06, |
|
"loss": 0.028, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.4887525562372188, |
|
"grad_norm": 1.53638106009823, |
|
"learning_rate": 1.5274784634136658e-06, |
|
"loss": 0.0293, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.49079754601227, |
|
"grad_norm": 1.268974698538747, |
|
"learning_rate": 1.5159404689971797e-06, |
|
"loss": 0.0248, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.4928425357873212, |
|
"grad_norm": 1.427953166829002, |
|
"learning_rate": 1.5044384252706312e-06, |
|
"loss": 0.025, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.4948875255623721, |
|
"grad_norm": 1.0778297960602063, |
|
"learning_rate": 1.492972450919249e-06, |
|
"loss": 0.0196, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.4969325153374233, |
|
"grad_norm": 1.6048151777257864, |
|
"learning_rate": 1.4815426642560753e-06, |
|
"loss": 0.0254, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.4989775051124745, |
|
"grad_norm": 1.3837639161000226, |
|
"learning_rate": 1.4701491832207481e-06, |
|
"loss": 0.0234, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.5010224948875255, |
|
"grad_norm": 1.6210880071717662, |
|
"learning_rate": 1.458792125378285e-06, |
|
"loss": 0.0279, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.5030674846625767, |
|
"grad_norm": 1.6055051497727444, |
|
"learning_rate": 1.4474716079178541e-06, |
|
"loss": 0.047, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.5051124744376279, |
|
"grad_norm": 1.4164487131203813, |
|
"learning_rate": 1.436187747651589e-06, |
|
"loss": 0.0294, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.5071574642126788, |
|
"grad_norm": 1.404797134072682, |
|
"learning_rate": 1.4249406610133686e-06, |
|
"loss": 0.0333, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.50920245398773, |
|
"grad_norm": 1.5568137049723834, |
|
"learning_rate": 1.4137304640576161e-06, |
|
"loss": 0.0261, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.5112474437627812, |
|
"grad_norm": 1.4289478333095673, |
|
"learning_rate": 1.4025572724581037e-06, |
|
"loss": 0.0261, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.5132924335378322, |
|
"grad_norm": 2.5332634796920264, |
|
"learning_rate": 1.3914212015067653e-06, |
|
"loss": 0.0444, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5153374233128836, |
|
"grad_norm": 1.788966871785357, |
|
"learning_rate": 1.3803223661124938e-06, |
|
"loss": 0.0283, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.5173824130879345, |
|
"grad_norm": 1.450672721983178, |
|
"learning_rate": 1.3692608807999652e-06, |
|
"loss": 0.0362, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.5194274028629857, |
|
"grad_norm": 1.2779026779976663, |
|
"learning_rate": 1.3582368597084566e-06, |
|
"loss": 0.0259, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.521472392638037, |
|
"grad_norm": 1.181583768603287, |
|
"learning_rate": 1.3472504165906614e-06, |
|
"loss": 0.0189, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.5235173824130879, |
|
"grad_norm": 0.9817943493019303, |
|
"learning_rate": 1.3363016648115246e-06, |
|
"loss": 0.0184, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.525562372188139, |
|
"grad_norm": 1.270037833596693, |
|
"learning_rate": 1.325390717347065e-06, |
|
"loss": 0.0268, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.5276073619631902, |
|
"grad_norm": 1.3472246238651557, |
|
"learning_rate": 1.3145176867832165e-06, |
|
"loss": 0.0262, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.5296523517382412, |
|
"grad_norm": 1.4783552939397928, |
|
"learning_rate": 1.3036826853146601e-06, |
|
"loss": 0.0256, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.5316973415132924, |
|
"grad_norm": 1.5785020479524052, |
|
"learning_rate": 1.2928858247436672e-06, |
|
"loss": 0.0303, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.5337423312883436, |
|
"grad_norm": 0.9545819980628849, |
|
"learning_rate": 1.2821272164789544e-06, |
|
"loss": 0.0154, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5357873210633946, |
|
"grad_norm": 1.7853036227571542, |
|
"learning_rate": 1.2714069715345195e-06, |
|
"loss": 0.0366, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.537832310838446, |
|
"grad_norm": 1.2881320204863016, |
|
"learning_rate": 1.2607252005285109e-06, |
|
"loss": 0.0271, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.539877300613497, |
|
"grad_norm": 1.8402584593837081, |
|
"learning_rate": 1.2500820136820735e-06, |
|
"loss": 0.0397, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.5419222903885481, |
|
"grad_norm": 0.9104264280152901, |
|
"learning_rate": 1.2394775208182175e-06, |
|
"loss": 0.0185, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.5439672801635993, |
|
"grad_norm": 1.6576714713446372, |
|
"learning_rate": 1.2289118313606895e-06, |
|
"loss": 0.0329, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.5460122699386503, |
|
"grad_norm": 1.516510626114462, |
|
"learning_rate": 1.2183850543328313e-06, |
|
"loss": 0.029, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.5480572597137015, |
|
"grad_norm": 1.7170915167008158, |
|
"learning_rate": 1.2078972983564686e-06, |
|
"loss": 0.0281, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.5501022494887526, |
|
"grad_norm": 1.572147913277003, |
|
"learning_rate": 1.1974486716507782e-06, |
|
"loss": 0.0275, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.5521472392638036, |
|
"grad_norm": 1.6917430084108376, |
|
"learning_rate": 1.187039282031182e-06, |
|
"loss": 0.0357, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.5541922290388548, |
|
"grad_norm": 1.5988116947928293, |
|
"learning_rate": 1.1766692369082255e-06, |
|
"loss": 0.037, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.556237218813906, |
|
"grad_norm": 1.5739169249494382, |
|
"learning_rate": 1.1663386432864725e-06, |
|
"loss": 0.0323, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.558282208588957, |
|
"grad_norm": 0.8239355040656156, |
|
"learning_rate": 1.156047607763407e-06, |
|
"loss": 0.0153, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.5603271983640081, |
|
"grad_norm": 1.4324066370868447, |
|
"learning_rate": 1.145796236528322e-06, |
|
"loss": 0.0281, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.5623721881390593, |
|
"grad_norm": 1.167864770241578, |
|
"learning_rate": 1.135584635361232e-06, |
|
"loss": 0.0206, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.5644171779141103, |
|
"grad_norm": 1.2252633383184313, |
|
"learning_rate": 1.1254129096317807e-06, |
|
"loss": 0.0219, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.5664621676891617, |
|
"grad_norm": 1.2772246245687098, |
|
"learning_rate": 1.115281164298153e-06, |
|
"loss": 0.0228, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.5685071574642127, |
|
"grad_norm": 1.1793575214560597, |
|
"learning_rate": 1.1051895039059851e-06, |
|
"loss": 0.0239, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.5705521472392638, |
|
"grad_norm": 1.3979051592502238, |
|
"learning_rate": 1.095138032587298e-06, |
|
"loss": 0.0284, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.572597137014315, |
|
"grad_norm": 1.1554168176295245, |
|
"learning_rate": 1.0851268540594168e-06, |
|
"loss": 0.0233, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.574642126789366, |
|
"grad_norm": 1.1645512388718606, |
|
"learning_rate": 1.0751560716238968e-06, |
|
"loss": 0.0229, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.5766871165644172, |
|
"grad_norm": 1.7131522059742506, |
|
"learning_rate": 1.0652257881654625e-06, |
|
"loss": 0.0406, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.5787321063394684, |
|
"grad_norm": 1.2606812526165108, |
|
"learning_rate": 1.0553361061509482e-06, |
|
"loss": 0.0235, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.5807770961145193, |
|
"grad_norm": 1.1957626319021837, |
|
"learning_rate": 1.0454871276282335e-06, |
|
"loss": 0.0254, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.5828220858895705, |
|
"grad_norm": 1.221410722093273, |
|
"learning_rate": 1.0356789542251939e-06, |
|
"loss": 0.0285, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.5848670756646217, |
|
"grad_norm": 1.4005487946112367, |
|
"learning_rate": 1.0259116871486557e-06, |
|
"loss": 0.0237, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.5869120654396727, |
|
"grad_norm": 1.363179363127451, |
|
"learning_rate": 1.0161854271833444e-06, |
|
"loss": 0.023, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.588957055214724, |
|
"grad_norm": 1.3303699717121924, |
|
"learning_rate": 1.0065002746908532e-06, |
|
"loss": 0.0219, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.591002044989775, |
|
"grad_norm": 1.4319116309801472, |
|
"learning_rate": 9.96856329608597e-07, |
|
"loss": 0.031, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.5930470347648262, |
|
"grad_norm": 1.1984953249513992, |
|
"learning_rate": 9.87253691448794e-07, |
|
"loss": 0.0245, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.5950920245398774, |
|
"grad_norm": 1.2215565328948168, |
|
"learning_rate": 9.776924592974257e-07, |
|
"loss": 0.0248, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.5971370143149284, |
|
"grad_norm": 1.4160156872424536, |
|
"learning_rate": 9.681727318132228e-07, |
|
"loss": 0.0242, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.5991820040899796, |
|
"grad_norm": 1.1174710591294479, |
|
"learning_rate": 9.586946072266479e-07, |
|
"loss": 0.0191, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.6012269938650308, |
|
"grad_norm": 1.0676003932307012, |
|
"learning_rate": 9.492581833388736e-07, |
|
"loss": 0.0188, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.6032719836400817, |
|
"grad_norm": 1.0900550444215484, |
|
"learning_rate": 9.398635575207854e-07, |
|
"loss": 0.0218, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.605316973415133, |
|
"grad_norm": 1.2361313996180479, |
|
"learning_rate": 9.305108267119645e-07, |
|
"loss": 0.0207, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.607361963190184, |
|
"grad_norm": 1.218779379666619, |
|
"learning_rate": 9.212000874196953e-07, |
|
"loss": 0.0226, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.609406952965235, |
|
"grad_norm": 1.5316948706786864, |
|
"learning_rate": 9.119314357179687e-07, |
|
"loss": 0.0263, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.6114519427402862, |
|
"grad_norm": 1.3658846792851305, |
|
"learning_rate": 9.027049672464916e-07, |
|
"loss": 0.0207, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.6134969325153374, |
|
"grad_norm": 2.4597956315625455, |
|
"learning_rate": 8.935207772096904e-07, |
|
"loss": 0.0254, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.6155419222903884, |
|
"grad_norm": 1.3358397828434039, |
|
"learning_rate": 8.843789603757446e-07, |
|
"loss": 0.0265, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.6175869120654398, |
|
"grad_norm": 1.2481079015069951, |
|
"learning_rate": 8.752796110755985e-07, |
|
"loss": 0.02, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.6196319018404908, |
|
"grad_norm": 0.9661429436209987, |
|
"learning_rate": 8.662228232019876e-07, |
|
"loss": 0.0166, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.621676891615542, |
|
"grad_norm": 1.7556913252148523, |
|
"learning_rate": 8.572086902084731e-07, |
|
"loss": 0.0341, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.6237218813905931, |
|
"grad_norm": 1.418921330732568, |
|
"learning_rate": 8.482373051084791e-07, |
|
"loss": 0.0283, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.6257668711656441, |
|
"grad_norm": 2.369535130694504, |
|
"learning_rate": 8.393087604743283e-07, |
|
"loss": 0.0445, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.6278118609406953, |
|
"grad_norm": 1.6601126609364323, |
|
"learning_rate": 8.304231484362868e-07, |
|
"loss": 0.0293, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.6298568507157465, |
|
"grad_norm": 1.2796195343972467, |
|
"learning_rate": 8.215805606816191e-07, |
|
"loss": 0.0199, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.6319018404907975, |
|
"grad_norm": 1.207648315269951, |
|
"learning_rate": 8.127810884536402e-07, |
|
"loss": 0.0181, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.6339468302658486, |
|
"grad_norm": 2.1150186432662728, |
|
"learning_rate": 8.040248225507641e-07, |
|
"loss": 0.0473, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.6359918200408998, |
|
"grad_norm": 1.4200026666542498, |
|
"learning_rate": 7.953118533255821e-07, |
|
"loss": 0.0247, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6359918200408998, |
|
"eval_loss": 0.07060948759317398, |
|
"eval_runtime": 1.5943, |
|
"eval_samples_per_second": 25.09, |
|
"eval_steps_per_second": 6.272, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6380368098159508, |
|
"grad_norm": 1.5772837122475736, |
|
"learning_rate": 7.866422706839239e-07, |
|
"loss": 0.0264, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.6400817995910022, |
|
"grad_norm": 1.1550918911272414, |
|
"learning_rate": 7.780161640839257e-07, |
|
"loss": 0.0224, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.6421267893660532, |
|
"grad_norm": 1.4676067465705516, |
|
"learning_rate": 7.694336225351107e-07, |
|
"loss": 0.0237, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.6441717791411041, |
|
"grad_norm": 1.4993385397429064, |
|
"learning_rate": 7.60894734597476e-07, |
|
"loss": 0.0295, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.6462167689161555, |
|
"grad_norm": 1.2385669586685766, |
|
"learning_rate": 7.52399588380568e-07, |
|
"loss": 0.0243, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.6482617586912065, |
|
"grad_norm": 1.4635374861697166, |
|
"learning_rate": 7.439482715425806e-07, |
|
"loss": 0.0252, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.6503067484662577, |
|
"grad_norm": 1.2402570999087212, |
|
"learning_rate": 7.355408712894508e-07, |
|
"loss": 0.0211, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.6523517382413089, |
|
"grad_norm": 1.5520153711347568, |
|
"learning_rate": 7.271774743739546e-07, |
|
"loss": 0.0303, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.6543967280163598, |
|
"grad_norm": 1.2762250260415324, |
|
"learning_rate": 7.18858167094817e-07, |
|
"loss": 0.0242, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.656441717791411, |
|
"grad_norm": 1.4244259857298884, |
|
"learning_rate": 7.105830352958143e-07, |
|
"loss": 0.0278, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.6584867075664622, |
|
"grad_norm": 1.4760993572706773, |
|
"learning_rate": 7.023521643648984e-07, |
|
"loss": 0.0292, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.6605316973415132, |
|
"grad_norm": 1.3443460519107557, |
|
"learning_rate": 6.941656392333046e-07, |
|
"loss": 0.0232, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.6625766871165644, |
|
"grad_norm": 1.3709203745792065, |
|
"learning_rate": 6.86023544374686e-07, |
|
"loss": 0.027, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.6646216768916156, |
|
"grad_norm": 1.4289920722764744, |
|
"learning_rate": 6.779259638042318e-07, |
|
"loss": 0.0231, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 1.2467075238350902, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.0288, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.668711656441718, |
|
"grad_norm": 1.5823026933811752, |
|
"learning_rate": 6.618646792910893e-07, |
|
"loss": 0.0326, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.670756646216769, |
|
"grad_norm": 1.5584280269321396, |
|
"learning_rate": 6.539011410787105e-07, |
|
"loss": 0.0262, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.67280163599182, |
|
"grad_norm": 1.1208057763458479, |
|
"learning_rate": 6.459824486134015e-07, |
|
"loss": 0.0212, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.6748466257668713, |
|
"grad_norm": 1.3862339324803945, |
|
"learning_rate": 6.381086836051498e-07, |
|
"loss": 0.0258, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6768916155419222, |
|
"grad_norm": 1.1160447785511467, |
|
"learning_rate": 6.302799273003546e-07, |
|
"loss": 0.0166, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.6789366053169734, |
|
"grad_norm": 1.3240491165501231, |
|
"learning_rate": 6.22496260480982e-07, |
|
"loss": 0.0248, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.6809815950920246, |
|
"grad_norm": 1.338838004083599, |
|
"learning_rate": 6.147577634637413e-07, |
|
"loss": 0.0262, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.6830265848670756, |
|
"grad_norm": 1.3968985445629194, |
|
"learning_rate": 6.070645160992523e-07, |
|
"loss": 0.0281, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.6850715746421268, |
|
"grad_norm": 1.171408977887829, |
|
"learning_rate": 5.994165977712175e-07, |
|
"loss": 0.0213, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.687116564417178, |
|
"grad_norm": 1.3360283784514455, |
|
"learning_rate": 5.918140873956063e-07, |
|
"loss": 0.0203, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.689161554192229, |
|
"grad_norm": 1.2733261388021238, |
|
"learning_rate": 5.842570634198453e-07, |
|
"loss": 0.0193, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.6912065439672803, |
|
"grad_norm": 1.6784098486146612, |
|
"learning_rate": 5.767456038219987e-07, |
|
"loss": 0.0262, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.6932515337423313, |
|
"grad_norm": 1.0355585556125833, |
|
"learning_rate": 5.692797861099719e-07, |
|
"loss": 0.0215, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.6952965235173822, |
|
"grad_norm": 1.4014112675195356, |
|
"learning_rate": 5.618596873207083e-07, |
|
"loss": 0.0225, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.6973415132924337, |
|
"grad_norm": 1.6204759478058526, |
|
"learning_rate": 5.544853840193981e-07, |
|
"loss": 0.0283, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.6993865030674846, |
|
"grad_norm": 1.1175326576111029, |
|
"learning_rate": 5.471569522986775e-07, |
|
"loss": 0.0197, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.7014314928425358, |
|
"grad_norm": 1.5156333961192319, |
|
"learning_rate": 5.398744677778595e-07, |
|
"loss": 0.0286, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.703476482617587, |
|
"grad_norm": 1.3492765083670422, |
|
"learning_rate": 5.326380056021419e-07, |
|
"loss": 0.0259, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.705521472392638, |
|
"grad_norm": 1.911784218966074, |
|
"learning_rate": 5.254476404418341e-07, |
|
"loss": 0.036, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.7075664621676891, |
|
"grad_norm": 1.3456317179935473, |
|
"learning_rate": 5.183034464915898e-07, |
|
"loss": 0.0248, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.7096114519427403, |
|
"grad_norm": 1.3465884976486044, |
|
"learning_rate": 5.112054974696395e-07, |
|
"loss": 0.0214, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.7116564417177913, |
|
"grad_norm": 1.2682146752514654, |
|
"learning_rate": 5.041538666170282e-07, |
|
"loss": 0.0245, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.7137014314928425, |
|
"grad_norm": 1.0732597160929007, |
|
"learning_rate": 4.971486266968634e-07, |
|
"loss": 0.0248, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.7157464212678937, |
|
"grad_norm": 1.2390245442361538, |
|
"learning_rate": 4.901898499935609e-07, |
|
"loss": 0.022, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.7177914110429446, |
|
"grad_norm": 1.1298732472922557, |
|
"learning_rate": 4.832776083120983e-07, |
|
"loss": 0.019, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.719836400817996, |
|
"grad_norm": 1.2513860400146173, |
|
"learning_rate": 4.764119729772809e-07, |
|
"loss": 0.0254, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.721881390593047, |
|
"grad_norm": 1.5812395858247674, |
|
"learning_rate": 4.695930148329958e-07, |
|
"loss": 0.0303, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.7239263803680982, |
|
"grad_norm": 1.2260179416900976, |
|
"learning_rate": 4.628208042414889e-07, |
|
"loss": 0.0231, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.7259713701431494, |
|
"grad_norm": 0.9260246632190309, |
|
"learning_rate": 4.5609541108263377e-07, |
|
"loss": 0.0191, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.7280163599182004, |
|
"grad_norm": 1.8092568351032716, |
|
"learning_rate": 4.494169047532154e-07, |
|
"loss": 0.0377, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.7300613496932515, |
|
"grad_norm": 1.4342896955808682, |
|
"learning_rate": 4.4278535416620914e-07, |
|
"loss": 0.0296, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.7321063394683027, |
|
"grad_norm": 1.411079843320368, |
|
"learning_rate": 4.362008277500701e-07, |
|
"loss": 0.0252, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.7341513292433537, |
|
"grad_norm": 1.4065270120904347, |
|
"learning_rate": 4.2966339344803376e-07, |
|
"loss": 0.0236, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.7361963190184049, |
|
"grad_norm": 2.637324684778294, |
|
"learning_rate": 4.231731187174065e-07, |
|
"loss": 0.0406, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.738241308793456, |
|
"grad_norm": 1.5036834826794743, |
|
"learning_rate": 4.167300705288718e-07, |
|
"loss": 0.0238, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.740286298568507, |
|
"grad_norm": 1.7305730073425691, |
|
"learning_rate": 4.10334315365804e-07, |
|
"loss": 0.03, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.7423312883435584, |
|
"grad_norm": 1.3670965259099597, |
|
"learning_rate": 4.0398591922357787e-07, |
|
"loss": 0.0244, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.7443762781186094, |
|
"grad_norm": 1.4873125793549382, |
|
"learning_rate": 3.9768494760888455e-07, |
|
"loss": 0.0281, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.7464212678936604, |
|
"grad_norm": 1.3256819619759466, |
|
"learning_rate": 3.914314655390633e-07, |
|
"loss": 0.018, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.7484662576687118, |
|
"grad_norm": 1.0528899986433782, |
|
"learning_rate": 3.852255375414271e-07, |
|
"loss": 0.0185, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.7505112474437627, |
|
"grad_norm": 1.5167752108851527, |
|
"learning_rate": 3.7906722765259364e-07, |
|
"loss": 0.0285, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.752556237218814, |
|
"grad_norm": 1.2661873569980087, |
|
"learning_rate": 3.7295659941782856e-07, |
|
"loss": 0.0229, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.7546012269938651, |
|
"grad_norm": 1.2713073653615368, |
|
"learning_rate": 3.6689371589039013e-07, |
|
"loss": 0.022, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.756646216768916, |
|
"grad_norm": 1.410691086480624, |
|
"learning_rate": 3.60878639630875e-07, |
|
"loss": 0.0296, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.7586912065439673, |
|
"grad_norm": 0.9920426356145646, |
|
"learning_rate": 3.5491143270657445e-07, |
|
"loss": 0.015, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.7607361963190185, |
|
"grad_norm": 1.5216849169101498, |
|
"learning_rate": 3.489921566908372e-07, |
|
"loss": 0.0271, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.7627811860940694, |
|
"grad_norm": 1.4674709021434214, |
|
"learning_rate": 3.4312087266242964e-07, |
|
"loss": 0.0263, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.7648261758691206, |
|
"grad_norm": 1.7675475614023826, |
|
"learning_rate": 3.3729764120490447e-07, |
|
"loss": 0.0384, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.7668711656441718, |
|
"grad_norm": 1.4676888698930726, |
|
"learning_rate": 3.315225224059809e-07, |
|
"loss": 0.0301, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.7689161554192228, |
|
"grad_norm": 1.4800320849283661, |
|
"learning_rate": 3.25795575856922e-07, |
|
"loss": 0.0283, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.7709611451942742, |
|
"grad_norm": 1.6806826350105444, |
|
"learning_rate": 3.2011686065191894e-07, |
|
"loss": 0.0391, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.7730061349693251, |
|
"grad_norm": 1.3249873571873563, |
|
"learning_rate": 3.1448643538748045e-07, |
|
"loss": 0.0203, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.7750511247443763, |
|
"grad_norm": 1.8551891141720298, |
|
"learning_rate": 3.0890435816183226e-07, |
|
"loss": 0.0393, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.7770961145194275, |
|
"grad_norm": 1.2327805158687992, |
|
"learning_rate": 3.03370686574313e-07, |
|
"loss": 0.0236, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.7791411042944785, |
|
"grad_norm": 1.3314203527215986, |
|
"learning_rate": 2.9788547772478416e-07, |
|
"loss": 0.0235, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7811860940695297, |
|
"grad_norm": 1.1861648902004243, |
|
"learning_rate": 2.9244878821303556e-07, |
|
"loss": 0.0154, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.7832310838445808, |
|
"grad_norm": 1.3988331617040364, |
|
"learning_rate": 2.870606741382059e-07, |
|
"loss": 0.0349, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.7852760736196318, |
|
"grad_norm": 1.4786599382381074, |
|
"learning_rate": 2.817211910982037e-07, |
|
"loss": 0.0281, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.787321063394683, |
|
"grad_norm": 1.7984122833021066, |
|
"learning_rate": 2.7643039418912996e-07, |
|
"loss": 0.0291, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.7893660531697342, |
|
"grad_norm": 1.7454260608433505, |
|
"learning_rate": 2.711883380047131e-07, |
|
"loss": 0.0292, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.7914110429447851, |
|
"grad_norm": 1.435756459453004, |
|
"learning_rate": 2.6599507663574387e-07, |
|
"loss": 0.0293, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.7934560327198366, |
|
"grad_norm": 1.4644482699217904, |
|
"learning_rate": 2.6085066366951907e-07, |
|
"loss": 0.0245, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.7955010224948875, |
|
"grad_norm": 1.3875758357595886, |
|
"learning_rate": 2.557551521892859e-07, |
|
"loss": 0.0271, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.7975460122699385, |
|
"grad_norm": 0.9876574184926428, |
|
"learning_rate": 2.5070859477369645e-07, |
|
"loss": 0.0148, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.79959100204499, |
|
"grad_norm": 1.265878789206368, |
|
"learning_rate": 2.457110434962645e-07, |
|
"loss": 0.0216, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.8016359918200409, |
|
"grad_norm": 1.40284126411437, |
|
"learning_rate": 2.407625499248273e-07, |
|
"loss": 0.0249, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.803680981595092, |
|
"grad_norm": 1.1876694796769958, |
|
"learning_rate": 2.3586316512101416e-07, |
|
"loss": 0.018, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.8057259713701432, |
|
"grad_norm": 0.8367133769318583, |
|
"learning_rate": 2.3101293963972094e-07, |
|
"loss": 0.0178, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.8077709611451942, |
|
"grad_norm": 1.0995322120882318, |
|
"learning_rate": 2.2621192352858702e-07, |
|
"loss": 0.0198, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.8098159509202454, |
|
"grad_norm": 1.703675555853278, |
|
"learning_rate": 2.2146016632747624e-07, |
|
"loss": 0.0341, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.8118609406952966, |
|
"grad_norm": 1.6229142547220725, |
|
"learning_rate": 2.1675771706797132e-07, |
|
"loss": 0.0278, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.8139059304703475, |
|
"grad_norm": 1.6632882474057635, |
|
"learning_rate": 2.1210462427286528e-07, |
|
"loss": 0.0264, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.8159509202453987, |
|
"grad_norm": 1.696506311524546, |
|
"learning_rate": 2.0750093595565735e-07, |
|
"loss": 0.0315, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.81799591002045, |
|
"grad_norm": 1.783147077677834, |
|
"learning_rate": 2.0294669962006352e-07, |
|
"loss": 0.0306, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.8200408997955009, |
|
"grad_norm": 1.0803640055203296, |
|
"learning_rate": 1.984419622595224e-07, |
|
"loss": 0.0159, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.8220858895705523, |
|
"grad_norm": 1.549113936998901, |
|
"learning_rate": 1.9398677035671222e-07, |
|
"loss": 0.0356, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.8241308793456033, |
|
"grad_norm": 1.217663448407663, |
|
"learning_rate": 1.8958116988306852e-07, |
|
"loss": 0.0214, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.8261758691206544, |
|
"grad_norm": 1.2606236244237474, |
|
"learning_rate": 1.8522520629831396e-07, |
|
"loss": 0.0264, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.8282208588957056, |
|
"grad_norm": 1.1212441204936592, |
|
"learning_rate": 1.8091892454998595e-07, |
|
"loss": 0.017, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.8302658486707566, |
|
"grad_norm": 1.042748614877236, |
|
"learning_rate": 1.7666236907297407e-07, |
|
"loss": 0.0164, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.8323108384458078, |
|
"grad_norm": 1.3863959126170518, |
|
"learning_rate": 1.7245558378906012e-07, |
|
"loss": 0.0266, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.834355828220859, |
|
"grad_norm": 1.3029901304956657, |
|
"learning_rate": 1.682986121064689e-07, |
|
"loss": 0.025, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.83640081799591, |
|
"grad_norm": 0.8924861887554183, |
|
"learning_rate": 1.641914969194147e-07, |
|
"loss": 0.014, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.8384458077709611, |
|
"grad_norm": 1.0234983500191113, |
|
"learning_rate": 1.6013428060766168e-07, |
|
"loss": 0.019, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.8404907975460123, |
|
"grad_norm": 0.9136453201589728, |
|
"learning_rate": 1.561270050360897e-07, |
|
"loss": 0.0146, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8425357873210633, |
|
"grad_norm": 1.8298008002925186, |
|
"learning_rate": 1.5216971155425474e-07, |
|
"loss": 0.0367, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.8445807770961147, |
|
"grad_norm": 0.9475181347283721, |
|
"learning_rate": 1.4826244099596986e-07, |
|
"loss": 0.0148, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.8466257668711656, |
|
"grad_norm": 0.9550648746556579, |
|
"learning_rate": 1.444052336788787e-07, |
|
"loss": 0.015, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.8486707566462166, |
|
"grad_norm": 1.4927311894076911, |
|
"learning_rate": 1.4059812940404093e-07, |
|
"loss": 0.0286, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.850715746421268, |
|
"grad_norm": 1.1696983318525789, |
|
"learning_rate": 1.3684116745552423e-07, |
|
"loss": 0.0212, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.852760736196319, |
|
"grad_norm": 1.2578768723641045, |
|
"learning_rate": 1.33134386599994e-07, |
|
"loss": 0.0244, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.8548057259713702, |
|
"grad_norm": 1.558622255405316, |
|
"learning_rate": 1.2947782508631823e-07, |
|
"loss": 0.0237, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.8568507157464214, |
|
"grad_norm": 1.52292583646827, |
|
"learning_rate": 1.2587152064516828e-07, |
|
"loss": 0.0246, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.8588957055214723, |
|
"grad_norm": 1.1936675481636827, |
|
"learning_rate": 1.2231551048863421e-07, |
|
"loss": 0.022, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.8609406952965235, |
|
"grad_norm": 1.4367150151701205, |
|
"learning_rate": 1.1880983130983626e-07, |
|
"loss": 0.0274, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.8629856850715747, |
|
"grad_norm": 1.4947550938995606, |
|
"learning_rate": 1.1535451928254948e-07, |
|
"loss": 0.0225, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.8650306748466257, |
|
"grad_norm": 1.6128578500137207, |
|
"learning_rate": 1.1194961006082972e-07, |
|
"loss": 0.0332, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.8670756646216768, |
|
"grad_norm": 1.1175499571067549, |
|
"learning_rate": 1.0859513877864381e-07, |
|
"loss": 0.0202, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.869120654396728, |
|
"grad_norm": 1.7323202236444266, |
|
"learning_rate": 1.0529114004951047e-07, |
|
"loss": 0.0423, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.871165644171779, |
|
"grad_norm": 1.1600018835452393, |
|
"learning_rate": 1.0203764796614057e-07, |
|
"loss": 0.0194, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.8732106339468304, |
|
"grad_norm": 1.3204191190409245, |
|
"learning_rate": 9.883469610008578e-08, |
|
"loss": 0.027, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.8752556237218814, |
|
"grad_norm": 1.5789271332032802, |
|
"learning_rate": 9.568231750139212e-08, |
|
"loss": 0.0381, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.8773006134969326, |
|
"grad_norm": 1.8636082047134532, |
|
"learning_rate": 9.258054469825972e-08, |
|
"loss": 0.0343, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.8793456032719837, |
|
"grad_norm": 1.729715689169104, |
|
"learning_rate": 8.952940969670809e-08, |
|
"loss": 0.0333, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.8813905930470347, |
|
"grad_norm": 1.075641909438574, |
|
"learning_rate": 8.652894398024137e-08, |
|
"loss": 0.0191, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.883435582822086, |
|
"grad_norm": 1.3038211172361949, |
|
"learning_rate": 8.357917850952802e-08, |
|
"loss": 0.0235, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.885480572597137, |
|
"grad_norm": 1.06035453866717, |
|
"learning_rate": 8.06801437220811e-08, |
|
"loss": 0.0191, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.887525562372188, |
|
"grad_norm": 1.2603710476757168, |
|
"learning_rate": 7.783186953194189e-08, |
|
"loss": 0.0246, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.8895705521472392, |
|
"grad_norm": 1.6288893230239516, |
|
"learning_rate": 7.503438532937169e-08, |
|
"loss": 0.036, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.8916155419222904, |
|
"grad_norm": 1.3517453504226422, |
|
"learning_rate": 7.228771998054995e-08, |
|
"loss": 0.0239, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.8936605316973414, |
|
"grad_norm": 1.1095416942390794, |
|
"learning_rate": 6.959190182727616e-08, |
|
"loss": 0.0165, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.8957055214723928, |
|
"grad_norm": 1.356854695078147, |
|
"learning_rate": 6.694695868667556e-08, |
|
"loss": 0.0236, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.8977505112474438, |
|
"grad_norm": 1.3824579801789842, |
|
"learning_rate": 6.43529178509139e-08, |
|
"loss": 0.0267, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.8997955010224947, |
|
"grad_norm": 1.5910728635319653, |
|
"learning_rate": 6.180980608691656e-08, |
|
"loss": 0.0269, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.9018404907975461, |
|
"grad_norm": 1.3467484522834312, |
|
"learning_rate": 5.9317649636088656e-08, |
|
"loss": 0.0272, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.903885480572597, |
|
"grad_norm": 1.404654837104888, |
|
"learning_rate": 5.687647421404874e-08, |
|
"loss": 0.0242, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.9059304703476483, |
|
"grad_norm": 1.2357699402907227, |
|
"learning_rate": 5.4486305010361116e-08, |
|
"loss": 0.0211, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.9079754601226995, |
|
"grad_norm": 1.1078419095507943, |
|
"learning_rate": 5.214716668827558e-08, |
|
"loss": 0.0174, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.9100204498977504, |
|
"grad_norm": 1.1455453685150343, |
|
"learning_rate": 4.985908338447476e-08, |
|
"loss": 0.0215, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.9120654396728016, |
|
"grad_norm": 1.5709389406578784, |
|
"learning_rate": 4.7622078708822184e-08, |
|
"loss": 0.0269, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.9141104294478528, |
|
"grad_norm": 1.1654623477471513, |
|
"learning_rate": 4.543617574412185e-08, |
|
"loss": 0.0207, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.9161554192229038, |
|
"grad_norm": 1.2721022886120923, |
|
"learning_rate": 4.330139704587788e-08, |
|
"loss": 0.0247, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.918200408997955, |
|
"grad_norm": 1.7353353754797876, |
|
"learning_rate": 4.1217764642062505e-08, |
|
"loss": 0.0325, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.9202453987730062, |
|
"grad_norm": 1.4523042929349372, |
|
"learning_rate": 3.9185300032889005e-08, |
|
"loss": 0.0245, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.9222903885480571, |
|
"grad_norm": 1.038008423835432, |
|
"learning_rate": 3.720402419058966e-08, |
|
"loss": 0.0172, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9243353783231085, |
|
"grad_norm": 1.5830227670771397, |
|
"learning_rate": 3.5273957559199265e-08, |
|
"loss": 0.0363, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.9263803680981595, |
|
"grad_norm": 1.3333668595314416, |
|
"learning_rate": 3.339512005434309e-08, |
|
"loss": 0.0299, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.9284253578732107, |
|
"grad_norm": 1.013011546713111, |
|
"learning_rate": 3.156753106303367e-08, |
|
"loss": 0.0211, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.9304703476482619, |
|
"grad_norm": 1.0888245467200752, |
|
"learning_rate": 2.979120944346936e-08, |
|
"loss": 0.0197, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.9325153374233128, |
|
"grad_norm": 1.5607072199045122, |
|
"learning_rate": 2.8066173524839978e-08, |
|
"loss": 0.0254, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.934560327198364, |
|
"grad_norm": 1.299299607811048, |
|
"learning_rate": 2.6392441107137013e-08, |
|
"loss": 0.021, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.9366053169734152, |
|
"grad_norm": 1.6718512782375123, |
|
"learning_rate": 2.4770029460970956e-08, |
|
"loss": 0.0261, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.9386503067484662, |
|
"grad_norm": 1.5210112393820647, |
|
"learning_rate": 2.319895532739369e-08, |
|
"loss": 0.0301, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.9406952965235174, |
|
"grad_norm": 1.1866446525320051, |
|
"learning_rate": 2.1679234917721946e-08, |
|
"loss": 0.0219, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.9427402862985685, |
|
"grad_norm": 1.5585668068927292, |
|
"learning_rate": 2.0210883913376334e-08, |
|
"loss": 0.0271, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9447852760736195, |
|
"grad_norm": 1.7577228374712899, |
|
"learning_rate": 1.8793917465713686e-08, |
|
"loss": 0.0368, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.946830265848671, |
|
"grad_norm": 1.122105721519514, |
|
"learning_rate": 1.742835019587441e-08, |
|
"loss": 0.0195, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.9488752556237219, |
|
"grad_norm": 1.0658375784177427, |
|
"learning_rate": 1.6114196194628174e-08, |
|
"loss": 0.017, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.9509202453987728, |
|
"grad_norm": 1.3923391468611417, |
|
"learning_rate": 1.4851469022234e-08, |
|
"loss": 0.0273, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.9529652351738243, |
|
"grad_norm": 1.3524898267742, |
|
"learning_rate": 1.3640181708293731e-08, |
|
"loss": 0.0259, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.9550102249488752, |
|
"grad_norm": 1.8660062595825002, |
|
"learning_rate": 1.2480346751622686e-08, |
|
"loss": 0.0324, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.9570552147239264, |
|
"grad_norm": 1.179664700215166, |
|
"learning_rate": 1.137197612011809e-08, |
|
"loss": 0.0259, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.9591002044989776, |
|
"grad_norm": 1.8058704230919458, |
|
"learning_rate": 1.0315081250636405e-08, |
|
"loss": 0.0265, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.9611451942740286, |
|
"grad_norm": 1.2051173078887687, |
|
"learning_rate": 9.30967304887509e-09, |
|
"loss": 0.0195, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.9631901840490797, |
|
"grad_norm": 1.4007351229371985, |
|
"learning_rate": 8.35576188926046e-09, |
|
"loss": 0.0304, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.965235173824131, |
|
"grad_norm": 1.1573585526709194, |
|
"learning_rate": 7.453357614841116e-09, |
|
"loss": 0.0202, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.967280163599182, |
|
"grad_norm": 1.3119134280852782, |
|
"learning_rate": 6.60246953718302e-09, |
|
"loss": 0.0284, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.969325153374233, |
|
"grad_norm": 2.16522611001059, |
|
"learning_rate": 5.803106436279571e-09, |
|
"loss": 0.039, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.9713701431492843, |
|
"grad_norm": 1.2979533184475112, |
|
"learning_rate": 5.055276560454459e-09, |
|
"loss": 0.025, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.9734151329243352, |
|
"grad_norm": 1.0802181068863799, |
|
"learning_rate": 4.358987626281175e-09, |
|
"loss": 0.0151, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.9754601226993866, |
|
"grad_norm": 1.7117951934036464, |
|
"learning_rate": 3.71424681850141e-09, |
|
"loss": 0.0355, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.9775051124744376, |
|
"grad_norm": 1.3376552108682394, |
|
"learning_rate": 3.1210607899512244e-09, |
|
"loss": 0.0251, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.9795501022494888, |
|
"grad_norm": 1.1725724434752407, |
|
"learning_rate": 2.579435661492213e-09, |
|
"loss": 0.0204, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.98159509202454, |
|
"grad_norm": 1.696450698444297, |
|
"learning_rate": 2.0893770219493347e-09, |
|
"loss": 0.0299, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.983640081799591, |
|
"grad_norm": 1.325923649921468, |
|
"learning_rate": 1.6508899280515134e-09, |
|
"loss": 0.0192, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.9856850715746421, |
|
"grad_norm": 1.1813786320601327, |
|
"learning_rate": 1.2639789043805695e-09, |
|
"loss": 0.0196, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.9877300613496933, |
|
"grad_norm": 0.9284331487465304, |
|
"learning_rate": 9.286479433257e-10, |
|
"loss": 0.0144, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.9897750511247443, |
|
"grad_norm": 1.7289144251248756, |
|
"learning_rate": 6.4490050503907e-10, |
|
"loss": 0.0365, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.9918200408997955, |
|
"grad_norm": 1.3982000044906164, |
|
"learning_rate": 4.127395174036153e-10, |
|
"loss": 0.0259, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.9938650306748467, |
|
"grad_norm": 1.1534216307337495, |
|
"learning_rate": 2.321673760002918e-10, |
|
"loss": 0.0185, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.9959100204498976, |
|
"grad_norm": 1.4226080527702496, |
|
"learning_rate": 1.0318594408476045e-10, |
|
"loss": 0.0227, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.997955010224949, |
|
"grad_norm": 2.2227043437975627, |
|
"learning_rate": 2.57965525674031e-11, |
|
"loss": 0.0355, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7779216395225974, |
|
"learning_rate": 0.0, |
|
"loss": 0.029, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 978, |
|
"total_flos": 4304231890944.0, |
|
"train_loss": 0.056620436601515986, |
|
"train_runtime": 754.4403, |
|
"train_samples_per_second": 10.36, |
|
"train_steps_per_second": 1.296 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 978, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4304231890944.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|