|
{ |
|
"best_metric": 1.084336519241333, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.057406780861259075, |
|
"eval_steps": 25, |
|
"global_step": 228, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00025178412658446965, |
|
"grad_norm": 0.2894246280193329, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 1.3358, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00025178412658446965, |
|
"eval_loss": 1.4041720628738403, |
|
"eval_runtime": 1.5236, |
|
"eval_samples_per_second": 32.818, |
|
"eval_steps_per_second": 8.533, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005035682531689393, |
|
"grad_norm": 0.3336651027202606, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.4653, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0007553523797534089, |
|
"grad_norm": 0.36238351464271545, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.4747, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0010071365063378786, |
|
"grad_norm": 0.38116249442100525, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.4752, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0012589206329223482, |
|
"grad_norm": 0.4064643979072571, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 1.4008, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0015107047595068178, |
|
"grad_norm": 0.43084007501602173, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.383, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0017624888860912876, |
|
"grad_norm": 0.46177080273628235, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 1.36, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.002014273012675757, |
|
"grad_norm": 0.37692713737487793, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 1.3537, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0022660571392602268, |
|
"grad_norm": 0.25504958629608154, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3239, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0025178412658446963, |
|
"grad_norm": 0.23159584403038025, |
|
"learning_rate": 9.999536994034917e-05, |
|
"loss": 1.3255, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002769625392429166, |
|
"grad_norm": 0.28963929414749146, |
|
"learning_rate": 9.99814807141723e-05, |
|
"loss": 1.3385, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0030214095190136355, |
|
"grad_norm": 0.31955209374427795, |
|
"learning_rate": 9.995833517960035e-05, |
|
"loss": 1.3097, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003273193645598105, |
|
"grad_norm": 0.23069429397583008, |
|
"learning_rate": 9.992593809953133e-05, |
|
"loss": 1.2359, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.003524977772182575, |
|
"grad_norm": 0.27882739901542664, |
|
"learning_rate": 9.988429614065029e-05, |
|
"loss": 1.3039, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0037767618987670447, |
|
"grad_norm": 0.30989697575569153, |
|
"learning_rate": 9.983341787205736e-05, |
|
"loss": 1.3263, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004028546025351514, |
|
"grad_norm": 0.25735488533973694, |
|
"learning_rate": 9.97733137635045e-05, |
|
"loss": 1.3418, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0042803301519359835, |
|
"grad_norm": 0.18925218284130096, |
|
"learning_rate": 9.970399618324092e-05, |
|
"loss": 1.2892, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0045321142785204535, |
|
"grad_norm": 0.15477243065834045, |
|
"learning_rate": 9.962547939546805e-05, |
|
"loss": 1.2695, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0047838984051049235, |
|
"grad_norm": 0.1201956495642662, |
|
"learning_rate": 9.953777955740415e-05, |
|
"loss": 1.2515, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005035682531689393, |
|
"grad_norm": 0.08931563794612885, |
|
"learning_rate": 9.944091471595951e-05, |
|
"loss": 1.2399, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005287466658273863, |
|
"grad_norm": 0.10346703231334686, |
|
"learning_rate": 9.933490480402273e-05, |
|
"loss": 1.2807, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005539250784858332, |
|
"grad_norm": 0.14112868905067444, |
|
"learning_rate": 9.921977163635899e-05, |
|
"loss": 1.2333, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.005791034911442802, |
|
"grad_norm": 0.1877644807100296, |
|
"learning_rate": 9.90955389051209e-05, |
|
"loss": 1.2533, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.006042819038027271, |
|
"grad_norm": 0.215586319565773, |
|
"learning_rate": 9.89622321749732e-05, |
|
"loss": 1.2433, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006294603164611741, |
|
"grad_norm": 0.2447165846824646, |
|
"learning_rate": 9.881987887783194e-05, |
|
"loss": 1.1634, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006294603164611741, |
|
"eval_loss": 1.2141509056091309, |
|
"eval_runtime": 1.5236, |
|
"eval_samples_per_second": 32.817, |
|
"eval_steps_per_second": 8.532, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00654638729119621, |
|
"grad_norm": 0.1898128092288971, |
|
"learning_rate": 9.866850830721973e-05, |
|
"loss": 1.2322, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00679817141778068, |
|
"grad_norm": 0.16772133111953735, |
|
"learning_rate": 9.85081516122375e-05, |
|
"loss": 1.2542, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00704995554436515, |
|
"grad_norm": 0.11975707113742828, |
|
"learning_rate": 9.83388417911547e-05, |
|
"loss": 1.2536, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0073017396709496194, |
|
"grad_norm": 0.09228307008743286, |
|
"learning_rate": 9.816061368461896e-05, |
|
"loss": 1.2566, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0075535237975340895, |
|
"grad_norm": 0.0792098417878151, |
|
"learning_rate": 9.79735039684865e-05, |
|
"loss": 1.226, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007805307924118559, |
|
"grad_norm": 0.0848434716463089, |
|
"learning_rate": 9.777755114627491e-05, |
|
"loss": 1.1899, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.008057092050703029, |
|
"grad_norm": 0.09312910586595535, |
|
"learning_rate": 9.757279554124004e-05, |
|
"loss": 1.1568, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.008308876177287499, |
|
"grad_norm": 0.11826145648956299, |
|
"learning_rate": 9.735927928807813e-05, |
|
"loss": 1.2118, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.008560660303871967, |
|
"grad_norm": 0.1303245574235916, |
|
"learning_rate": 9.713704632425529e-05, |
|
"loss": 1.1613, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.008812444430456437, |
|
"grad_norm": 0.11749281734228134, |
|
"learning_rate": 9.690614238096617e-05, |
|
"loss": 1.2169, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.009064228557040907, |
|
"grad_norm": 0.11288820207118988, |
|
"learning_rate": 9.666661497372324e-05, |
|
"loss": 1.1986, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.009316012683625377, |
|
"grad_norm": 0.10559144616127014, |
|
"learning_rate": 9.641851339257912e-05, |
|
"loss": 1.2018, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.009567796810209847, |
|
"grad_norm": 0.10261145234107971, |
|
"learning_rate": 9.616188869198361e-05, |
|
"loss": 1.0372, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.009819580936794315, |
|
"grad_norm": 0.09313081204891205, |
|
"learning_rate": 9.589679368027765e-05, |
|
"loss": 1.2235, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.010071365063378785, |
|
"grad_norm": 0.08457491546869278, |
|
"learning_rate": 9.562328290882643e-05, |
|
"loss": 1.2094, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010323149189963255, |
|
"grad_norm": 0.08044976741075516, |
|
"learning_rate": 9.534141266079385e-05, |
|
"loss": 1.2016, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.010574933316547725, |
|
"grad_norm": 0.08418776094913483, |
|
"learning_rate": 9.505124093956045e-05, |
|
"loss": 1.2094, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.010826717443132194, |
|
"grad_norm": 0.09928078949451447, |
|
"learning_rate": 9.475282745678749e-05, |
|
"loss": 1.1695, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.011078501569716664, |
|
"grad_norm": 0.09286114573478699, |
|
"learning_rate": 9.444623362012944e-05, |
|
"loss": 1.0966, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.011330285696301134, |
|
"grad_norm": 0.09147851914167404, |
|
"learning_rate": 9.413152252059749e-05, |
|
"loss": 1.1769, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.011582069822885604, |
|
"grad_norm": 0.09161302447319031, |
|
"learning_rate": 9.380875891957674e-05, |
|
"loss": 1.1761, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.011833853949470074, |
|
"grad_norm": 0.09320323169231415, |
|
"learning_rate": 9.347800923549942e-05, |
|
"loss": 1.1795, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.012085638076054542, |
|
"grad_norm": 0.08234869688749313, |
|
"learning_rate": 9.313934153017741e-05, |
|
"loss": 1.1745, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.012337422202639012, |
|
"grad_norm": 0.08729473501443863, |
|
"learning_rate": 9.279282549479634e-05, |
|
"loss": 1.1614, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.012589206329223482, |
|
"grad_norm": 0.11173596233129501, |
|
"learning_rate": 9.243853243557462e-05, |
|
"loss": 1.0938, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012589206329223482, |
|
"eval_loss": 1.1531009674072266, |
|
"eval_runtime": 1.5229, |
|
"eval_samples_per_second": 32.832, |
|
"eval_steps_per_second": 8.536, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012840990455807952, |
|
"grad_norm": 0.10851637274026871, |
|
"learning_rate": 9.207653525908994e-05, |
|
"loss": 1.1919, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.01309277458239242, |
|
"grad_norm": 0.12280933558940887, |
|
"learning_rate": 9.170690845727655e-05, |
|
"loss": 1.2008, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01334455870897689, |
|
"grad_norm": 0.11200592666864395, |
|
"learning_rate": 9.132972809209626e-05, |
|
"loss": 1.1743, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.01359634283556136, |
|
"grad_norm": 0.10003667324781418, |
|
"learning_rate": 9.094507177988643e-05, |
|
"loss": 1.1631, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01384812696214583, |
|
"grad_norm": 0.09316058456897736, |
|
"learning_rate": 9.055301867538794e-05, |
|
"loss": 1.2185, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0140999110887303, |
|
"grad_norm": 0.07689037919044495, |
|
"learning_rate": 9.01536494554568e-05, |
|
"loss": 1.1249, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.014351695215314769, |
|
"grad_norm": 0.0907893106341362, |
|
"learning_rate": 8.974704630246239e-05, |
|
"loss": 1.1772, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.014603479341899239, |
|
"grad_norm": 0.09316601604223251, |
|
"learning_rate": 8.933329288737597e-05, |
|
"loss": 1.1482, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.014855263468483709, |
|
"grad_norm": 0.09033600986003876, |
|
"learning_rate": 8.89124743525527e-05, |
|
"loss": 1.1258, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.015107047595068179, |
|
"grad_norm": 0.10970163345336914, |
|
"learning_rate": 8.848467729421124e-05, |
|
"loss": 1.1349, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.015358831721652647, |
|
"grad_norm": 0.10259843617677689, |
|
"learning_rate": 8.804998974461371e-05, |
|
"loss": 1.1576, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.015610615848237117, |
|
"grad_norm": 0.11174706369638443, |
|
"learning_rate": 8.760850115395054e-05, |
|
"loss": 1.1705, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.015862399974821587, |
|
"grad_norm": 0.08973610401153564, |
|
"learning_rate": 8.716030237193325e-05, |
|
"loss": 1.1515, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.016114184101406057, |
|
"grad_norm": 0.08280681818723679, |
|
"learning_rate": 8.670548562909947e-05, |
|
"loss": 1.1607, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.016365968227990527, |
|
"grad_norm": 0.09221348911523819, |
|
"learning_rate": 8.624414451783364e-05, |
|
"loss": 1.1482, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.016617752354574997, |
|
"grad_norm": 0.09574998915195465, |
|
"learning_rate": 8.577637397310749e-05, |
|
"loss": 1.241, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.016869536481159467, |
|
"grad_norm": 0.09795909374952316, |
|
"learning_rate": 8.530227025294435e-05, |
|
"loss": 1.1739, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.017121320607743934, |
|
"grad_norm": 0.09504640102386475, |
|
"learning_rate": 8.482193091861112e-05, |
|
"loss": 1.1334, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.017373104734328404, |
|
"grad_norm": 0.08822384476661682, |
|
"learning_rate": 8.433545481454206e-05, |
|
"loss": 1.137, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.017624888860912874, |
|
"grad_norm": 0.0878458246588707, |
|
"learning_rate": 8.384294204799853e-05, |
|
"loss": 1.1719, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.017876672987497344, |
|
"grad_norm": 0.08970195800065994, |
|
"learning_rate": 8.334449396846886e-05, |
|
"loss": 1.1298, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.018128457114081814, |
|
"grad_norm": 0.0950387567281723, |
|
"learning_rate": 8.284021314681265e-05, |
|
"loss": 1.1556, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.018380241240666284, |
|
"grad_norm": 0.09765107929706573, |
|
"learning_rate": 8.233020335415371e-05, |
|
"loss": 1.1732, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.018632025367250754, |
|
"grad_norm": 0.11165706068277359, |
|
"learning_rate": 8.18145695405259e-05, |
|
"loss": 1.1746, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.018883809493835224, |
|
"grad_norm": 0.15581081807613373, |
|
"learning_rate": 8.129341781327658e-05, |
|
"loss": 1.0963, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.018883809493835224, |
|
"eval_loss": 1.122991681098938, |
|
"eval_runtime": 1.5233, |
|
"eval_samples_per_second": 32.822, |
|
"eval_steps_per_second": 8.534, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.019135593620419694, |
|
"grad_norm": 0.08741319179534912, |
|
"learning_rate": 8.07668554152317e-05, |
|
"loss": 1.1688, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01938737774700416, |
|
"grad_norm": 0.08875293284654617, |
|
"learning_rate": 8.02349907026274e-05, |
|
"loss": 1.1829, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.01963916187358863, |
|
"grad_norm": 0.08861377090215683, |
|
"learning_rate": 7.969793312281237e-05, |
|
"loss": 1.1803, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0198909460001731, |
|
"grad_norm": 0.09791383892297745, |
|
"learning_rate": 7.915579319172573e-05, |
|
"loss": 1.2001, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.02014273012675757, |
|
"grad_norm": 0.0991906151175499, |
|
"learning_rate": 7.860868247115505e-05, |
|
"loss": 1.1669, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02039451425334204, |
|
"grad_norm": 0.09529578685760498, |
|
"learning_rate": 7.805671354577908e-05, |
|
"loss": 1.1522, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.02064629837992651, |
|
"grad_norm": 0.09793104231357574, |
|
"learning_rate": 7.75e-05, |
|
"loss": 1.1407, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.02089808250651098, |
|
"grad_norm": 0.09607716649770737, |
|
"learning_rate": 7.693865639457011e-05, |
|
"loss": 1.1291, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.02114986663309545, |
|
"grad_norm": 0.10056506842374802, |
|
"learning_rate": 7.637279824301728e-05, |
|
"loss": 1.1124, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02140165075967992, |
|
"grad_norm": 0.10144215822219849, |
|
"learning_rate": 7.580254198787463e-05, |
|
"loss": 1.1022, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.021653434886264387, |
|
"grad_norm": 0.10326708108186722, |
|
"learning_rate": 7.522800497671897e-05, |
|
"loss": 1.073, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.021905219012848857, |
|
"grad_norm": 0.1260983794927597, |
|
"learning_rate": 7.464930543802289e-05, |
|
"loss": 1.1218, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.022157003139433327, |
|
"grad_norm": 0.10883725434541702, |
|
"learning_rate": 7.406656245682565e-05, |
|
"loss": 1.1167, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.022408787266017798, |
|
"grad_norm": 0.09835278987884521, |
|
"learning_rate": 7.34798959502279e-05, |
|
"loss": 1.1506, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.022660571392602268, |
|
"grad_norm": 0.10083261877298355, |
|
"learning_rate": 7.288942664271503e-05, |
|
"loss": 1.1792, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.022912355519186738, |
|
"grad_norm": 0.10250851511955261, |
|
"learning_rate": 7.229527604131436e-05, |
|
"loss": 1.1897, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.023164139645771208, |
|
"grad_norm": 0.1085507944226265, |
|
"learning_rate": 7.16975664105915e-05, |
|
"loss": 1.1402, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.023415923772355678, |
|
"grad_norm": 0.10734712332487106, |
|
"learning_rate": 7.109642074749067e-05, |
|
"loss": 1.0878, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.023667707898940148, |
|
"grad_norm": 0.10455948859453201, |
|
"learning_rate": 7.049196275602421e-05, |
|
"loss": 1.116, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.023919492025524614, |
|
"grad_norm": 0.10420308262109756, |
|
"learning_rate": 6.988431682181693e-05, |
|
"loss": 1.1243, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.024171276152109084, |
|
"grad_norm": 0.10482881218194962, |
|
"learning_rate": 6.927360798650978e-05, |
|
"loss": 1.1198, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.024423060278693554, |
|
"grad_norm": 0.11121159791946411, |
|
"learning_rate": 6.865996192202884e-05, |
|
"loss": 1.1097, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.024674844405278024, |
|
"grad_norm": 0.11902576684951782, |
|
"learning_rate": 6.804350490472446e-05, |
|
"loss": 1.1056, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.024926628531862494, |
|
"grad_norm": 0.12510505318641663, |
|
"learning_rate": 6.742436378938612e-05, |
|
"loss": 1.109, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.025178412658446964, |
|
"grad_norm": 0.1589539796113968, |
|
"learning_rate": 6.680266598313802e-05, |
|
"loss": 1.065, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025178412658446964, |
|
"eval_loss": 1.106641411781311, |
|
"eval_runtime": 1.5233, |
|
"eval_samples_per_second": 32.824, |
|
"eval_steps_per_second": 8.534, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025430196785031434, |
|
"grad_norm": 0.09652504324913025, |
|
"learning_rate": 6.617853941922146e-05, |
|
"loss": 1.0807, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.025681980911615904, |
|
"grad_norm": 0.10196559131145477, |
|
"learning_rate": 6.555211253066844e-05, |
|
"loss": 1.1715, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.025933765038200374, |
|
"grad_norm": 0.10689554363489151, |
|
"learning_rate": 6.49235142238728e-05, |
|
"loss": 1.1938, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.02618554916478484, |
|
"grad_norm": 0.11235436052083969, |
|
"learning_rate": 6.429287385206368e-05, |
|
"loss": 1.2069, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.02643733329136931, |
|
"grad_norm": 0.11319782584905624, |
|
"learning_rate": 6.366032118868734e-05, |
|
"loss": 1.1022, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.02668911741795378, |
|
"grad_norm": 0.12285393476486206, |
|
"learning_rate": 6.302598640070218e-05, |
|
"loss": 1.131, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.02694090154453825, |
|
"grad_norm": 0.11379806697368622, |
|
"learning_rate": 6.23900000217929e-05, |
|
"loss": 1.0844, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.02719268567112272, |
|
"grad_norm": 0.1146794855594635, |
|
"learning_rate": 6.175249292550937e-05, |
|
"loss": 1.1122, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.02744446979770719, |
|
"grad_norm": 0.1138162612915039, |
|
"learning_rate": 6.111359629833533e-05, |
|
"loss": 1.1059, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.02769625392429166, |
|
"grad_norm": 0.13073591887950897, |
|
"learning_rate": 6.0473441612692705e-05, |
|
"loss": 1.1073, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02794803805087613, |
|
"grad_norm": 0.13190491497516632, |
|
"learning_rate": 5.9832160599887344e-05, |
|
"loss": 1.1323, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0281998221774606, |
|
"grad_norm": 0.1431579738855362, |
|
"learning_rate": 5.9189885223001094e-05, |
|
"loss": 1.1094, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.028451606304045068, |
|
"grad_norm": 0.11174172163009644, |
|
"learning_rate": 5.85467476497365e-05, |
|
"loss": 1.0261, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.028703390430629538, |
|
"grad_norm": 0.10128598660230637, |
|
"learning_rate": 5.790288022521925e-05, |
|
"loss": 1.1019, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.028955174557214008, |
|
"grad_norm": 0.10618976503610611, |
|
"learning_rate": 5.725841544476413e-05, |
|
"loss": 1.1591, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.029206958683798478, |
|
"grad_norm": 0.11264601349830627, |
|
"learning_rate": 5.661348592661009e-05, |
|
"loss": 1.1057, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.029458742810382948, |
|
"grad_norm": 0.11875821650028229, |
|
"learning_rate": 5.596822438463001e-05, |
|
"loss": 1.1278, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.029710526936967418, |
|
"grad_norm": 0.13417275249958038, |
|
"learning_rate": 5.532276360102076e-05, |
|
"loss": 1.0758, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.029962311063551888, |
|
"grad_norm": 0.12961986660957336, |
|
"learning_rate": 5.467723639897926e-05, |
|
"loss": 1.0888, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.030214095190136358, |
|
"grad_norm": 0.12340384721755981, |
|
"learning_rate": 5.4031775615370004e-05, |
|
"loss": 1.098, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.030465879316720828, |
|
"grad_norm": 0.11747000366449356, |
|
"learning_rate": 5.3386514073389936e-05, |
|
"loss": 1.1203, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.030717663443305294, |
|
"grad_norm": 0.11870179325342178, |
|
"learning_rate": 5.274158455523588e-05, |
|
"loss": 1.1258, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.030969447569889764, |
|
"grad_norm": 0.12783485651016235, |
|
"learning_rate": 5.209711977478078e-05, |
|
"loss": 1.097, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.031221231696474235, |
|
"grad_norm": 0.14884835481643677, |
|
"learning_rate": 5.145325235026351e-05, |
|
"loss": 1.103, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.031473015823058705, |
|
"grad_norm": 0.17791038751602173, |
|
"learning_rate": 5.081011477699893e-05, |
|
"loss": 1.0389, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.031473015823058705, |
|
"eval_loss": 1.096808671951294, |
|
"eval_runtime": 1.5229, |
|
"eval_samples_per_second": 32.831, |
|
"eval_steps_per_second": 8.536, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.031724799949643175, |
|
"grad_norm": 0.11103296279907227, |
|
"learning_rate": 5.016783940011267e-05, |
|
"loss": 1.1911, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.031976584076227645, |
|
"grad_norm": 0.11420492827892303, |
|
"learning_rate": 4.952655838730731e-05, |
|
"loss": 1.1282, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.032228368202812115, |
|
"grad_norm": 0.11212627589702606, |
|
"learning_rate": 4.888640370166469e-05, |
|
"loss": 1.1621, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.032480152329396585, |
|
"grad_norm": 0.11292574554681778, |
|
"learning_rate": 4.824750707449064e-05, |
|
"loss": 1.1511, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.032731936455981055, |
|
"grad_norm": 0.12400747090578079, |
|
"learning_rate": 4.760999997820711e-05, |
|
"loss": 1.1402, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.032983720582565525, |
|
"grad_norm": 0.1370488405227661, |
|
"learning_rate": 4.6974013599297837e-05, |
|
"loss": 1.0905, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.033235504709149995, |
|
"grad_norm": 0.1564428210258484, |
|
"learning_rate": 4.633967881131266e-05, |
|
"loss": 1.1266, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.033487288835734465, |
|
"grad_norm": 0.12354940921068192, |
|
"learning_rate": 4.570712614793633e-05, |
|
"loss": 1.1013, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.033739072962318935, |
|
"grad_norm": 0.12833015620708466, |
|
"learning_rate": 4.507648577612722e-05, |
|
"loss": 1.1246, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.033990857088903405, |
|
"grad_norm": 0.12590007483959198, |
|
"learning_rate": 4.4447887469331574e-05, |
|
"loss": 1.1417, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.03424264121548787, |
|
"grad_norm": 0.13550645112991333, |
|
"learning_rate": 4.382146058077855e-05, |
|
"loss": 1.13, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.03449442534207234, |
|
"grad_norm": 0.14064566791057587, |
|
"learning_rate": 4.319733401686199e-05, |
|
"loss": 1.0597, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.03474620946865681, |
|
"grad_norm": 0.1269257515668869, |
|
"learning_rate": 4.25756362106139e-05, |
|
"loss": 1.1038, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.03499799359524128, |
|
"grad_norm": 0.11279810220003128, |
|
"learning_rate": 4.195649509527555e-05, |
|
"loss": 1.1551, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.03524977772182575, |
|
"grad_norm": 0.1206924095749855, |
|
"learning_rate": 4.134003807797116e-05, |
|
"loss": 1.1122, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03550156184841022, |
|
"grad_norm": 0.12054243683815002, |
|
"learning_rate": 4.0726392013490235e-05, |
|
"loss": 1.188, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.03575334597499469, |
|
"grad_norm": 0.1204872876405716, |
|
"learning_rate": 4.0115683178183084e-05, |
|
"loss": 1.0985, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.03600513010157916, |
|
"grad_norm": 0.12411932647228241, |
|
"learning_rate": 3.95080372439758e-05, |
|
"loss": 1.1114, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.03625691422816363, |
|
"grad_norm": 0.12376714497804642, |
|
"learning_rate": 3.8903579252509345e-05, |
|
"loss": 1.0835, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0365086983547481, |
|
"grad_norm": 0.12805478274822235, |
|
"learning_rate": 3.8302433589408525e-05, |
|
"loss": 1.1225, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03676048248133257, |
|
"grad_norm": 0.12796953320503235, |
|
"learning_rate": 3.770472395868566e-05, |
|
"loss": 1.1206, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.03701226660791704, |
|
"grad_norm": 0.12267071008682251, |
|
"learning_rate": 3.711057335728499e-05, |
|
"loss": 1.1043, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.03726405073450151, |
|
"grad_norm": 0.12217283993959427, |
|
"learning_rate": 3.65201040497721e-05, |
|
"loss": 1.0964, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.03751583486108598, |
|
"grad_norm": 0.1348910927772522, |
|
"learning_rate": 3.5933437543174363e-05, |
|
"loss": 1.1044, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.03776761898767045, |
|
"grad_norm": 0.16145659983158112, |
|
"learning_rate": 3.5350694561977125e-05, |
|
"loss": 1.0322, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03776761898767045, |
|
"eval_loss": 1.090155839920044, |
|
"eval_runtime": 1.5223, |
|
"eval_samples_per_second": 32.844, |
|
"eval_steps_per_second": 8.539, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03801940311425492, |
|
"grad_norm": 0.11269905418157578, |
|
"learning_rate": 3.4771995023281044e-05, |
|
"loss": 1.1413, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.03827118724083939, |
|
"grad_norm": 0.11654523015022278, |
|
"learning_rate": 3.419745801212538e-05, |
|
"loss": 1.1552, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.03852297136742386, |
|
"grad_norm": 0.12446989119052887, |
|
"learning_rate": 3.362720175698275e-05, |
|
"loss": 1.1491, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.03877475549400832, |
|
"grad_norm": 0.1188817173242569, |
|
"learning_rate": 3.30613436054299e-05, |
|
"loss": 1.1733, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.03902653962059279, |
|
"grad_norm": 0.12001082301139832, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 1.1019, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03927832374717726, |
|
"grad_norm": 0.12011527270078659, |
|
"learning_rate": 3.194328645422094e-05, |
|
"loss": 1.1118, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.03953010787376173, |
|
"grad_norm": 0.1201213002204895, |
|
"learning_rate": 3.1391317528844965e-05, |
|
"loss": 1.1333, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.0397818920003462, |
|
"grad_norm": 0.1220758855342865, |
|
"learning_rate": 3.0844206808274287e-05, |
|
"loss": 1.096, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.04003367612693067, |
|
"grad_norm": 0.12169603258371353, |
|
"learning_rate": 3.030206687718765e-05, |
|
"loss": 1.1063, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.04028546025351514, |
|
"grad_norm": 0.1275063306093216, |
|
"learning_rate": 2.9765009297372602e-05, |
|
"loss": 1.1171, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04053724438009961, |
|
"grad_norm": 0.13592584431171417, |
|
"learning_rate": 2.92331445847683e-05, |
|
"loss": 1.1148, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.04078902850668408, |
|
"grad_norm": 0.1522008776664734, |
|
"learning_rate": 2.8706582186723417e-05, |
|
"loss": 1.0848, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.04104081263326855, |
|
"grad_norm": 0.12438967078924179, |
|
"learning_rate": 2.8185430459474105e-05, |
|
"loss": 1.1159, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.04129259675985302, |
|
"grad_norm": 0.11593794822692871, |
|
"learning_rate": 2.76697966458463e-05, |
|
"loss": 1.1523, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.04154438088643749, |
|
"grad_norm": 0.1169944554567337, |
|
"learning_rate": 2.7159786853187362e-05, |
|
"loss": 1.1226, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.04179616501302196, |
|
"grad_norm": 0.1212480217218399, |
|
"learning_rate": 2.6655506031531153e-05, |
|
"loss": 1.1441, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.04204794913960643, |
|
"grad_norm": 0.12019576132297516, |
|
"learning_rate": 2.6157057952001485e-05, |
|
"loss": 1.1018, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0422997332661909, |
|
"grad_norm": 0.12491545081138611, |
|
"learning_rate": 2.5664545185457933e-05, |
|
"loss": 1.1019, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.04255151739277537, |
|
"grad_norm": 0.13169804215431213, |
|
"learning_rate": 2.517806908138888e-05, |
|
"loss": 1.0623, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.04280330151935984, |
|
"grad_norm": 0.13421769440174103, |
|
"learning_rate": 2.469772974705565e-05, |
|
"loss": 1.0871, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04305508564594431, |
|
"grad_norm": 0.12427503615617752, |
|
"learning_rate": 2.422362602689254e-05, |
|
"loss": 1.0794, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.043306869772528775, |
|
"grad_norm": 0.12563304603099823, |
|
"learning_rate": 2.3755855482166378e-05, |
|
"loss": 1.1098, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.043558653899113245, |
|
"grad_norm": 0.13072235882282257, |
|
"learning_rate": 2.3294514370900542e-05, |
|
"loss": 1.0911, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.043810438025697715, |
|
"grad_norm": 0.14273561537265778, |
|
"learning_rate": 2.283969762806676e-05, |
|
"loss": 1.109, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.044062222152282185, |
|
"grad_norm": 0.1899888515472412, |
|
"learning_rate": 2.239149884604948e-05, |
|
"loss": 0.9888, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.044062222152282185, |
|
"eval_loss": 1.0866016149520874, |
|
"eval_runtime": 1.5236, |
|
"eval_samples_per_second": 32.816, |
|
"eval_steps_per_second": 8.532, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.044314006278866655, |
|
"grad_norm": 0.10499967634677887, |
|
"learning_rate": 2.1950010255386297e-05, |
|
"loss": 1.0887, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.044565790405451125, |
|
"grad_norm": 0.11740684509277344, |
|
"learning_rate": 2.1515322705788788e-05, |
|
"loss": 1.1305, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.044817574532035595, |
|
"grad_norm": 0.12669669091701508, |
|
"learning_rate": 2.108752564744731e-05, |
|
"loss": 1.1639, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.045069358658620065, |
|
"grad_norm": 0.12538516521453857, |
|
"learning_rate": 2.0666707112624058e-05, |
|
"loss": 1.1097, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.045321142785204535, |
|
"grad_norm": 0.12294058501720428, |
|
"learning_rate": 2.025295369753761e-05, |
|
"loss": 1.122, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.045572926911789005, |
|
"grad_norm": 0.12443839758634567, |
|
"learning_rate": 1.9846350544543215e-05, |
|
"loss": 1.0877, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.045824711038373475, |
|
"grad_norm": 0.12381776422262192, |
|
"learning_rate": 1.944698132461207e-05, |
|
"loss": 1.0927, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.046076495164957945, |
|
"grad_norm": 0.12740595638751984, |
|
"learning_rate": 1.905492822011359e-05, |
|
"loss": 1.1031, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.046328279291542415, |
|
"grad_norm": 0.12863051891326904, |
|
"learning_rate": 1.8670271907903737e-05, |
|
"loss": 1.1065, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.046580063418126885, |
|
"grad_norm": 0.1300704926252365, |
|
"learning_rate": 1.829309154272346e-05, |
|
"loss": 1.1169, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.046831847544711355, |
|
"grad_norm": 0.13567733764648438, |
|
"learning_rate": 1.7923464740910063e-05, |
|
"loss": 1.1257, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.047083631671295825, |
|
"grad_norm": 0.145668625831604, |
|
"learning_rate": 1.756146756442539e-05, |
|
"loss": 1.0824, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.047335415797880295, |
|
"grad_norm": 0.12303412705659866, |
|
"learning_rate": 1.720717450520366e-05, |
|
"loss": 1.0278, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.047587199924464765, |
|
"grad_norm": 0.11618991941213608, |
|
"learning_rate": 1.686065846982261e-05, |
|
"loss": 1.151, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.04783898405104923, |
|
"grad_norm": 0.11975349485874176, |
|
"learning_rate": 1.6521990764500582e-05, |
|
"loss": 1.1182, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0480907681776337, |
|
"grad_norm": 0.12053832411766052, |
|
"learning_rate": 1.619124108042327e-05, |
|
"loss": 1.1471, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.04834255230421817, |
|
"grad_norm": 0.12356948852539062, |
|
"learning_rate": 1.5868477479402504e-05, |
|
"loss": 1.1284, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.04859433643080264, |
|
"grad_norm": 0.12398120015859604, |
|
"learning_rate": 1.5553766379870584e-05, |
|
"loss": 1.1457, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.04884612055738711, |
|
"grad_norm": 0.12426292151212692, |
|
"learning_rate": 1.5247172543212521e-05, |
|
"loss": 1.0898, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.04909790468397158, |
|
"grad_norm": 0.12630872428417206, |
|
"learning_rate": 1.4948759060439551e-05, |
|
"loss": 1.0753, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.04934968881055605, |
|
"grad_norm": 0.1257810890674591, |
|
"learning_rate": 1.4658587339206153e-05, |
|
"loss": 1.1022, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.04960147293714052, |
|
"grad_norm": 0.12942546606063843, |
|
"learning_rate": 1.4376717091173584e-05, |
|
"loss": 1.0655, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.04985325706372499, |
|
"grad_norm": 0.13958528637886047, |
|
"learning_rate": 1.410320631972237e-05, |
|
"loss": 1.0952, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.05010504119030946, |
|
"grad_norm": 0.14069540798664093, |
|
"learning_rate": 1.38381113080164e-05, |
|
"loss": 1.0969, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.05035682531689393, |
|
"grad_norm": 0.17611843347549438, |
|
"learning_rate": 1.3581486607420874e-05, |
|
"loss": 0.9935, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05035682531689393, |
|
"eval_loss": 1.084336519241333, |
|
"eval_runtime": 1.5232, |
|
"eval_samples_per_second": 32.826, |
|
"eval_steps_per_second": 8.535, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0506086094434784, |
|
"grad_norm": 0.11260085552930832, |
|
"learning_rate": 1.333338502627676e-05, |
|
"loss": 1.139, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.05086039357006287, |
|
"grad_norm": 0.11994125694036484, |
|
"learning_rate": 1.3093857619033844e-05, |
|
"loss": 1.1337, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.05111217769664734, |
|
"grad_norm": 0.11982115358114243, |
|
"learning_rate": 1.2862953675744722e-05, |
|
"loss": 1.1353, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.05136396182323181, |
|
"grad_norm": 0.12615817785263062, |
|
"learning_rate": 1.2640720711921882e-05, |
|
"loss": 1.1105, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.05161574594981628, |
|
"grad_norm": 0.12592007219791412, |
|
"learning_rate": 1.2427204458759955e-05, |
|
"loss": 1.0824, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.05186753007640075, |
|
"grad_norm": 0.1267462968826294, |
|
"learning_rate": 1.2222448853725088e-05, |
|
"loss": 1.0621, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.05211931420298522, |
|
"grad_norm": 0.13077248632907867, |
|
"learning_rate": 1.2026496031513518e-05, |
|
"loss": 1.062, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.05237109832956968, |
|
"grad_norm": 0.12852057814598083, |
|
"learning_rate": 1.1839386315381043e-05, |
|
"loss": 1.0745, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.05262288245615415, |
|
"grad_norm": 0.12591220438480377, |
|
"learning_rate": 1.1661158208845307e-05, |
|
"loss": 1.072, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.05287466658273862, |
|
"grad_norm": 0.12850439548492432, |
|
"learning_rate": 1.1491848387762514e-05, |
|
"loss": 1.1041, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05312645070932309, |
|
"grad_norm": 0.13350600004196167, |
|
"learning_rate": 1.1331491692780279e-05, |
|
"loss": 1.096, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.05337823483590756, |
|
"grad_norm": 0.14876709878444672, |
|
"learning_rate": 1.1180121122168064e-05, |
|
"loss": 1.0571, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.05363001896249203, |
|
"grad_norm": 0.12453413009643555, |
|
"learning_rate": 1.1037767825026826e-05, |
|
"loss": 1.0797, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.0538818030890765, |
|
"grad_norm": 0.1117461547255516, |
|
"learning_rate": 1.0904461094879107e-05, |
|
"loss": 1.0913, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.05413358721566097, |
|
"grad_norm": 0.11662810295820236, |
|
"learning_rate": 1.0780228363641018e-05, |
|
"loss": 1.1452, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.05438537134224544, |
|
"grad_norm": 0.12274463474750519, |
|
"learning_rate": 1.0665095195977271e-05, |
|
"loss": 1.1162, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.05463715546882991, |
|
"grad_norm": 0.12894800305366516, |
|
"learning_rate": 1.0559085284040506e-05, |
|
"loss": 1.0798, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.05488893959541438, |
|
"grad_norm": 0.12095481902360916, |
|
"learning_rate": 1.0462220442595853e-05, |
|
"loss": 1.0852, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.05514072372199885, |
|
"grad_norm": 0.12701094150543213, |
|
"learning_rate": 1.0374520604531953e-05, |
|
"loss": 1.0814, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.05539250784858332, |
|
"grad_norm": 0.12494029849767685, |
|
"learning_rate": 1.0296003816759086e-05, |
|
"loss": 1.086, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05564429197516779, |
|
"grad_norm": 0.1267097145318985, |
|
"learning_rate": 1.0226686236495517e-05, |
|
"loss": 1.1057, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.05589607610175226, |
|
"grad_norm": 0.13075008988380432, |
|
"learning_rate": 1.0166582127942649e-05, |
|
"loss": 1.1317, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.05614786022833673, |
|
"grad_norm": 0.13261082768440247, |
|
"learning_rate": 1.0115703859349725e-05, |
|
"loss": 1.1039, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.0563996443549212, |
|
"grad_norm": 0.14049747586250305, |
|
"learning_rate": 1.0074061900468672e-05, |
|
"loss": 1.116, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.05665142848150567, |
|
"grad_norm": 0.19462983310222626, |
|
"learning_rate": 1.0041664820399652e-05, |
|
"loss": 0.9881, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.05665142848150567, |
|
"eval_loss": 1.0821517705917358, |
|
"eval_runtime": 1.523, |
|
"eval_samples_per_second": 32.83, |
|
"eval_steps_per_second": 8.536, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.056903212608090135, |
|
"grad_norm": 0.10417667776346207, |
|
"learning_rate": 1.0018519285827698e-05, |
|
"loss": 1.0784, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.057154996734674605, |
|
"grad_norm": 0.1178843155503273, |
|
"learning_rate": 1.000463005965084e-05, |
|
"loss": 1.1522, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.057406780861259075, |
|
"grad_norm": 0.12229274213314056, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1221, |
|
"step": 228 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 228, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 30, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4583097096570143e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|