|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4999552075681293, |
|
"eval_steps": 500, |
|
"global_step": 872, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005733431279451024, |
|
"grad_norm": 343.8302001953125, |
|
"learning_rate": 3.7037037037037036e-07, |
|
"loss": 15.7056, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011466862558902047, |
|
"grad_norm": 354.2492370605469, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 15.7685, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0017200293838353072, |
|
"grad_norm": 332.5870666503906, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 15.7735, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0022933725117804094, |
|
"grad_norm": 337.5965576171875, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 15.6978, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002866715639725512, |
|
"grad_norm": 337.68743896484375, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 15.7328, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0034400587676706143, |
|
"grad_norm": 352.12841796875, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 15.7606, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004013401895615717, |
|
"grad_norm": 350.4210510253906, |
|
"learning_rate": 2.5925925925925925e-06, |
|
"loss": 15.7352, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004586745023560819, |
|
"grad_norm": 326.6189270019531, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 15.7233, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005160088151505922, |
|
"grad_norm": 337.8202819824219, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 15.7473, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005733431279451024, |
|
"grad_norm": 346.01373291015625, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 15.7525, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006306774407396127, |
|
"grad_norm": 345.3333435058594, |
|
"learning_rate": 4.074074074074074e-06, |
|
"loss": 15.7312, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.006880117535341229, |
|
"grad_norm": 366.6058044433594, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 15.8701, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0074534606632863315, |
|
"grad_norm": 343.2021484375, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 15.7247, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008026803791231434, |
|
"grad_norm": 355.9106750488281, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 15.8606, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008600146919176536, |
|
"grad_norm": 348.240966796875, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 15.7669, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009173490047121638, |
|
"grad_norm": 338.352783203125, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 15.7275, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00974683317506674, |
|
"grad_norm": 345.3967590332031, |
|
"learning_rate": 6.296296296296297e-06, |
|
"loss": 15.6908, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010320176303011843, |
|
"grad_norm": 337.52459716796875, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 15.7063, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010893519430956946, |
|
"grad_norm": 348.9382019042969, |
|
"learning_rate": 7.0370370370370375e-06, |
|
"loss": 15.7312, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.011466862558902047, |
|
"grad_norm": 367.71954345703125, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 15.6506, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01204020568684715, |
|
"grad_norm": 340.8507080078125, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 15.6737, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.012613548814792253, |
|
"grad_norm": 342.04400634765625, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 15.7023, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.013186891942737354, |
|
"grad_norm": 341.126708984375, |
|
"learning_rate": 8.518518518518519e-06, |
|
"loss": 15.7475, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.013760235070682457, |
|
"grad_norm": 355.2649841308594, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 15.719, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01433357819862756, |
|
"grad_norm": 332.40753173828125, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 15.7316, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014906921326572663, |
|
"grad_norm": 337.4178771972656, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 15.7153, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.015480264454517764, |
|
"grad_norm": 337.7605895996094, |
|
"learning_rate": 1e-05, |
|
"loss": 15.6421, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.016053607582462867, |
|
"grad_norm": 339.8226013183594, |
|
"learning_rate": 9.999965443811378e-06, |
|
"loss": 15.7325, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01662695071040797, |
|
"grad_norm": 338.6639709472656, |
|
"learning_rate": 9.999861775723162e-06, |
|
"loss": 15.5632, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.017200293838353073, |
|
"grad_norm": 313.73358154296875, |
|
"learning_rate": 9.999688997168301e-06, |
|
"loss": 15.6312, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017773636966298176, |
|
"grad_norm": 324.01812744140625, |
|
"learning_rate": 9.999447110535026e-06, |
|
"loss": 15.568, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.018346980094243275, |
|
"grad_norm": 350.6103820800781, |
|
"learning_rate": 9.999136119166803e-06, |
|
"loss": 15.6763, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.018920323222188378, |
|
"grad_norm": 345.98101806640625, |
|
"learning_rate": 9.998756027362308e-06, |
|
"loss": 15.6779, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01949366635013348, |
|
"grad_norm": 332.9476013183594, |
|
"learning_rate": 9.99830684037535e-06, |
|
"loss": 15.5666, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.020067009478078584, |
|
"grad_norm": 323.1610412597656, |
|
"learning_rate": 9.9977885644148e-06, |
|
"loss": 15.5645, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.020640352606023687, |
|
"grad_norm": 345.10504150390625, |
|
"learning_rate": 9.997201206644522e-06, |
|
"loss": 15.6663, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02121369573396879, |
|
"grad_norm": 327.5580749511719, |
|
"learning_rate": 9.99654477518325e-06, |
|
"loss": 15.5561, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.021787038861913893, |
|
"grad_norm": 321.0745849609375, |
|
"learning_rate": 9.995819279104494e-06, |
|
"loss": 15.6465, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.022360381989858992, |
|
"grad_norm": 331.85736083984375, |
|
"learning_rate": 9.995024728436402e-06, |
|
"loss": 15.6326, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.022933725117804095, |
|
"grad_norm": 317.6231994628906, |
|
"learning_rate": 9.994161134161635e-06, |
|
"loss": 15.5944, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.023507068245749198, |
|
"grad_norm": 329.2905578613281, |
|
"learning_rate": 9.993228508217201e-06, |
|
"loss": 15.6057, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0240804113736943, |
|
"grad_norm": 331.61407470703125, |
|
"learning_rate": 9.9922268634943e-06, |
|
"loss": 15.5034, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.024653754501639404, |
|
"grad_norm": 308.8927917480469, |
|
"learning_rate": 9.991156213838143e-06, |
|
"loss": 15.5626, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.025227097629584506, |
|
"grad_norm": 331.08624267578125, |
|
"learning_rate": 9.990016574047757e-06, |
|
"loss": 15.628, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02580044075752961, |
|
"grad_norm": 343.9646911621094, |
|
"learning_rate": 9.988807959875785e-06, |
|
"loss": 15.52, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02637378388547471, |
|
"grad_norm": 317.4025573730469, |
|
"learning_rate": 9.987530388028269e-06, |
|
"loss": 15.5872, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02694712701341981, |
|
"grad_norm": 322.307373046875, |
|
"learning_rate": 9.986183876164412e-06, |
|
"loss": 15.4988, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.027520470141364915, |
|
"grad_norm": 317.954833984375, |
|
"learning_rate": 9.984768442896342e-06, |
|
"loss": 15.4338, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.028093813269310017, |
|
"grad_norm": 316.19512939453125, |
|
"learning_rate": 9.983284107788852e-06, |
|
"loss": 15.464, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02866715639725512, |
|
"grad_norm": 309.9515686035156, |
|
"learning_rate": 9.981730891359123e-06, |
|
"loss": 15.4762, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029240499525200223, |
|
"grad_norm": 325.8763427734375, |
|
"learning_rate": 9.980108815076456e-06, |
|
"loss": 15.4914, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.029813842653145326, |
|
"grad_norm": 310.78424072265625, |
|
"learning_rate": 9.978417901361958e-06, |
|
"loss": 15.5108, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.030387185781090426, |
|
"grad_norm": 335.9707946777344, |
|
"learning_rate": 9.976658173588244e-06, |
|
"loss": 15.5588, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03096052890903553, |
|
"grad_norm": 325.359375, |
|
"learning_rate": 9.974829656079106e-06, |
|
"loss": 15.5329, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03153387203698063, |
|
"grad_norm": 313.94293212890625, |
|
"learning_rate": 9.972932374109184e-06, |
|
"loss": 15.4339, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.032107215164925734, |
|
"grad_norm": 328.7725830078125, |
|
"learning_rate": 9.97096635390361e-06, |
|
"loss": 15.5493, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03268055829287084, |
|
"grad_norm": 329.2198791503906, |
|
"learning_rate": 9.968931622637652e-06, |
|
"loss": 15.5452, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03325390142081594, |
|
"grad_norm": 323.4937438964844, |
|
"learning_rate": 9.966828208436332e-06, |
|
"loss": 15.4639, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03382724454876104, |
|
"grad_norm": 318.2495422363281, |
|
"learning_rate": 9.96465614037404e-06, |
|
"loss": 15.4536, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.034400587676706146, |
|
"grad_norm": 315.8673095703125, |
|
"learning_rate": 9.962415448474134e-06, |
|
"loss": 15.4971, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03497393080465125, |
|
"grad_norm": 313.1468505859375, |
|
"learning_rate": 9.960106163708522e-06, |
|
"loss": 15.4236, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03554727393259635, |
|
"grad_norm": 311.7138977050781, |
|
"learning_rate": 9.95772831799724e-06, |
|
"loss": 15.4216, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03612061706054145, |
|
"grad_norm": 306.1082763671875, |
|
"learning_rate": 9.955281944207998e-06, |
|
"loss": 15.4497, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03669396018848655, |
|
"grad_norm": 320.2241516113281, |
|
"learning_rate": 9.95276707615574e-06, |
|
"loss": 15.4434, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03726730331643165, |
|
"grad_norm": 314.4595642089844, |
|
"learning_rate": 9.950183748602164e-06, |
|
"loss": 15.4662, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.037840646444376756, |
|
"grad_norm": 329.10784912109375, |
|
"learning_rate": 9.947531997255256e-06, |
|
"loss": 15.4808, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03841398957232186, |
|
"grad_norm": 312.20135498046875, |
|
"learning_rate": 9.944811858768782e-06, |
|
"loss": 15.4633, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03898733270026696, |
|
"grad_norm": 331.2897033691406, |
|
"learning_rate": 9.94202337074179e-06, |
|
"loss": 15.4861, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.039560675828212065, |
|
"grad_norm": 325.3959655761719, |
|
"learning_rate": 9.939166571718086e-06, |
|
"loss": 15.349, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04013401895615717, |
|
"grad_norm": 312.45513916015625, |
|
"learning_rate": 9.936241501185706e-06, |
|
"loss": 15.476, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04070736208410227, |
|
"grad_norm": 318.12322998046875, |
|
"learning_rate": 9.933248199576366e-06, |
|
"loss": 15.5243, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.041280705212047374, |
|
"grad_norm": 313.1653137207031, |
|
"learning_rate": 9.930186708264902e-06, |
|
"loss": 15.4566, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.041854048339992476, |
|
"grad_norm": 310.0314636230469, |
|
"learning_rate": 9.927057069568704e-06, |
|
"loss": 15.4365, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.04242739146793758, |
|
"grad_norm": 319.7293701171875, |
|
"learning_rate": 9.923859326747125e-06, |
|
"loss": 15.4605, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04300073459588268, |
|
"grad_norm": 306.68524169921875, |
|
"learning_rate": 9.920593524000887e-06, |
|
"loss": 15.3812, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.043574077723827785, |
|
"grad_norm": 309.94500732421875, |
|
"learning_rate": 9.917259706471469e-06, |
|
"loss": 15.3971, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04414742085177288, |
|
"grad_norm": 321.57647705078125, |
|
"learning_rate": 9.913857920240481e-06, |
|
"loss": 15.471, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.044720763979717984, |
|
"grad_norm": 310.59991455078125, |
|
"learning_rate": 9.91038821232903e-06, |
|
"loss": 15.4669, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04529410710766309, |
|
"grad_norm": 298.2730407714844, |
|
"learning_rate": 9.906850630697068e-06, |
|
"loss": 15.4534, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04586745023560819, |
|
"grad_norm": 303.1147766113281, |
|
"learning_rate": 9.903245224242732e-06, |
|
"loss": 15.3767, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04644079336355329, |
|
"grad_norm": 299.9115905761719, |
|
"learning_rate": 9.899572042801662e-06, |
|
"loss": 15.3181, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.047014136491498396, |
|
"grad_norm": 299.8761901855469, |
|
"learning_rate": 9.895831137146319e-06, |
|
"loss": 15.3273, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0475874796194435, |
|
"grad_norm": 305.44244384765625, |
|
"learning_rate": 9.89202255898528e-06, |
|
"loss": 15.3504, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0481608227473886, |
|
"grad_norm": 302.8594055175781, |
|
"learning_rate": 9.888146360962523e-06, |
|
"loss": 15.4113, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.048734165875333704, |
|
"grad_norm": 310.83587646484375, |
|
"learning_rate": 9.8842025966567e-06, |
|
"loss": 15.4274, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04930750900327881, |
|
"grad_norm": 292.6897277832031, |
|
"learning_rate": 9.880191320580396e-06, |
|
"loss": 15.2777, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04988085213122391, |
|
"grad_norm": 308.5329895019531, |
|
"learning_rate": 9.876112588179378e-06, |
|
"loss": 15.3073, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.05045419525916901, |
|
"grad_norm": 295.7265319824219, |
|
"learning_rate": 9.87196645583182e-06, |
|
"loss": 15.3201, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.051027538387114116, |
|
"grad_norm": 300.5785827636719, |
|
"learning_rate": 9.86775298084754e-06, |
|
"loss": 15.4455, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05160088151505922, |
|
"grad_norm": 293.6327819824219, |
|
"learning_rate": 9.863472221467189e-06, |
|
"loss": 15.4047, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052174224643004315, |
|
"grad_norm": 314.74468994140625, |
|
"learning_rate": 9.85912423686146e-06, |
|
"loss": 15.4229, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05274756777094942, |
|
"grad_norm": 292.306640625, |
|
"learning_rate": 9.854709087130261e-06, |
|
"loss": 15.3212, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05332091089889452, |
|
"grad_norm": 321.5920715332031, |
|
"learning_rate": 9.850226833301893e-06, |
|
"loss": 15.3655, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05389425402683962, |
|
"grad_norm": 288.67535400390625, |
|
"learning_rate": 9.8456775373322e-06, |
|
"loss": 15.3491, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.054467597154784726, |
|
"grad_norm": 301.61151123046875, |
|
"learning_rate": 9.841061262103713e-06, |
|
"loss": 15.4396, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05504094028272983, |
|
"grad_norm": 291.6568908691406, |
|
"learning_rate": 9.836378071424782e-06, |
|
"loss": 15.3401, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05561428341067493, |
|
"grad_norm": 292.19915771484375, |
|
"learning_rate": 9.831628030028698e-06, |
|
"loss": 15.3169, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.056187626538620035, |
|
"grad_norm": 291.9767150878906, |
|
"learning_rate": 9.826811203572785e-06, |
|
"loss": 15.3443, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05676096966656514, |
|
"grad_norm": 304.70599365234375, |
|
"learning_rate": 9.821927658637518e-06, |
|
"loss": 15.3755, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05733431279451024, |
|
"grad_norm": 302.005859375, |
|
"learning_rate": 9.81697746272557e-06, |
|
"loss": 15.332, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.057907655922455344, |
|
"grad_norm": 302.4617004394531, |
|
"learning_rate": 9.811960684260907e-06, |
|
"loss": 15.4224, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.058480999050400446, |
|
"grad_norm": 298.9280700683594, |
|
"learning_rate": 9.80687739258782e-06, |
|
"loss": 15.377, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05905434217834555, |
|
"grad_norm": 287.72869873046875, |
|
"learning_rate": 9.801727657969988e-06, |
|
"loss": 15.3631, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05962768530629065, |
|
"grad_norm": 275.61376953125, |
|
"learning_rate": 9.796511551589492e-06, |
|
"loss": 15.2808, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.06020102843423575, |
|
"grad_norm": 283.3959655761719, |
|
"learning_rate": 9.791229145545832e-06, |
|
"loss": 15.3636, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06077437156218085, |
|
"grad_norm": 295.7449035644531, |
|
"learning_rate": 9.785880512854937e-06, |
|
"loss": 15.2886, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.061347714690125954, |
|
"grad_norm": 287.8319091796875, |
|
"learning_rate": 9.78046572744815e-06, |
|
"loss": 15.2978, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.06192105781807106, |
|
"grad_norm": 294.1200256347656, |
|
"learning_rate": 9.77498486417121e-06, |
|
"loss": 15.2803, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06249440094601616, |
|
"grad_norm": 281.8334655761719, |
|
"learning_rate": 9.769437998783216e-06, |
|
"loss": 15.3278, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06306774407396126, |
|
"grad_norm": 283.81732177734375, |
|
"learning_rate": 9.763825207955577e-06, |
|
"loss": 15.2408, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06364108720190637, |
|
"grad_norm": 289.8335876464844, |
|
"learning_rate": 9.758146569270957e-06, |
|
"loss": 15.2072, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06421443032985147, |
|
"grad_norm": 283.79541015625, |
|
"learning_rate": 9.7524021612222e-06, |
|
"loss": 15.2841, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06478777345779657, |
|
"grad_norm": 278.183349609375, |
|
"learning_rate": 9.746592063211247e-06, |
|
"loss": 15.2678, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06536111658574167, |
|
"grad_norm": 285.3382568359375, |
|
"learning_rate": 9.74071635554803e-06, |
|
"loss": 15.2402, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06593445971368678, |
|
"grad_norm": 278.955078125, |
|
"learning_rate": 9.73477511944938e-06, |
|
"loss": 15.3042, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06650780284163188, |
|
"grad_norm": 279.9234924316406, |
|
"learning_rate": 9.728768437037882e-06, |
|
"loss": 15.2099, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06708114596957698, |
|
"grad_norm": 279.9479064941406, |
|
"learning_rate": 9.722696391340762e-06, |
|
"loss": 15.3344, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06765448909752209, |
|
"grad_norm": 286.9149169921875, |
|
"learning_rate": 9.716559066288716e-06, |
|
"loss": 15.2665, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06822783222546719, |
|
"grad_norm": 306.3753356933594, |
|
"learning_rate": 9.710356546714774e-06, |
|
"loss": 15.2843, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.06880117535341229, |
|
"grad_norm": 276.3395690917969, |
|
"learning_rate": 9.704088918353108e-06, |
|
"loss": 15.2029, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0693745184813574, |
|
"grad_norm": 283.53411865234375, |
|
"learning_rate": 9.697756267837856e-06, |
|
"loss": 15.2337, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0699478616093025, |
|
"grad_norm": 288.1679382324219, |
|
"learning_rate": 9.691358682701927e-06, |
|
"loss": 15.1838, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0705212047372476, |
|
"grad_norm": 275.3481750488281, |
|
"learning_rate": 9.684896251375784e-06, |
|
"loss": 15.214, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0710945478651927, |
|
"grad_norm": 271.927490234375, |
|
"learning_rate": 9.678369063186224e-06, |
|
"loss": 15.2439, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0716678909931378, |
|
"grad_norm": 280.0527648925781, |
|
"learning_rate": 9.671777208355146e-06, |
|
"loss": 15.2833, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0722412341210829, |
|
"grad_norm": 286.959228515625, |
|
"learning_rate": 9.665120777998303e-06, |
|
"loss": 15.3076, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.072814577249028, |
|
"grad_norm": 268.98553466796875, |
|
"learning_rate": 9.658399864124037e-06, |
|
"loss": 15.3274, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0733879203769731, |
|
"grad_norm": 261.5675964355469, |
|
"learning_rate": 9.65161455963202e-06, |
|
"loss": 15.2216, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0739612635049182, |
|
"grad_norm": 272.29425048828125, |
|
"learning_rate": 9.64476495831195e-06, |
|
"loss": 15.2385, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0745346066328633, |
|
"grad_norm": 282.3617248535156, |
|
"learning_rate": 9.637851154842279e-06, |
|
"loss": 15.2864, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07510794976080841, |
|
"grad_norm": 260.4862976074219, |
|
"learning_rate": 9.630873244788884e-06, |
|
"loss": 15.3039, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07568129288875351, |
|
"grad_norm": 268.15582275390625, |
|
"learning_rate": 9.623831324603755e-06, |
|
"loss": 15.2402, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07625463601669862, |
|
"grad_norm": 275.54180908203125, |
|
"learning_rate": 9.61672549162366e-06, |
|
"loss": 15.2216, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.07682797914464372, |
|
"grad_norm": 274.50299072265625, |
|
"learning_rate": 9.6095558440688e-06, |
|
"loss": 15.2265, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07740132227258882, |
|
"grad_norm": 274.8090515136719, |
|
"learning_rate": 9.602322481041457e-06, |
|
"loss": 15.2518, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07797466540053392, |
|
"grad_norm": 264.6287841796875, |
|
"learning_rate": 9.595025502524609e-06, |
|
"loss": 15.2621, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07854800852847903, |
|
"grad_norm": 261.9557189941406, |
|
"learning_rate": 9.587665009380565e-06, |
|
"loss": 15.2255, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.07912135165642413, |
|
"grad_norm": 264.4668273925781, |
|
"learning_rate": 9.580241103349562e-06, |
|
"loss": 15.1974, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07969469478436923, |
|
"grad_norm": 268.053955078125, |
|
"learning_rate": 9.572753887048353e-06, |
|
"loss": 15.1732, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.08026803791231434, |
|
"grad_norm": 275.5241394042969, |
|
"learning_rate": 9.565203463968808e-06, |
|
"loss": 15.2277, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08084138104025944, |
|
"grad_norm": 270.20001220703125, |
|
"learning_rate": 9.557589938476462e-06, |
|
"loss": 15.2393, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08141472416820454, |
|
"grad_norm": 274.71453857421875, |
|
"learning_rate": 9.549913415809084e-06, |
|
"loss": 15.1832, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08198806729614964, |
|
"grad_norm": 266.2647399902344, |
|
"learning_rate": 9.542174002075221e-06, |
|
"loss": 15.1934, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08256141042409475, |
|
"grad_norm": 270.1286315917969, |
|
"learning_rate": 9.534371804252727e-06, |
|
"loss": 15.1652, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08313475355203985, |
|
"grad_norm": 253.12673950195312, |
|
"learning_rate": 9.526506930187294e-06, |
|
"loss": 15.2471, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08370809667998495, |
|
"grad_norm": 266.5976867675781, |
|
"learning_rate": 9.518579488590947e-06, |
|
"loss": 15.26, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.08428143980793006, |
|
"grad_norm": 264.99481201171875, |
|
"learning_rate": 9.510589589040554e-06, |
|
"loss": 15.1794, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08485478293587516, |
|
"grad_norm": 255.4492950439453, |
|
"learning_rate": 9.502537341976305e-06, |
|
"loss": 15.2214, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08542812606382026, |
|
"grad_norm": 264.4046325683594, |
|
"learning_rate": 9.494422858700188e-06, |
|
"loss": 15.1397, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08600146919176536, |
|
"grad_norm": 276.0195007324219, |
|
"learning_rate": 9.48624625137445e-06, |
|
"loss": 15.2514, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08657481231971047, |
|
"grad_norm": 261.25848388671875, |
|
"learning_rate": 9.478007633020043e-06, |
|
"loss": 15.1633, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08714815544765557, |
|
"grad_norm": 273.81439208984375, |
|
"learning_rate": 9.469707117515068e-06, |
|
"loss": 15.3146, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08772149857560067, |
|
"grad_norm": 278.4958801269531, |
|
"learning_rate": 9.461344819593194e-06, |
|
"loss": 15.2173, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08829484170354576, |
|
"grad_norm": 270.7554931640625, |
|
"learning_rate": 9.452920854842085e-06, |
|
"loss": 15.2049, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08886818483149087, |
|
"grad_norm": 277.895751953125, |
|
"learning_rate": 9.44443533970178e-06, |
|
"loss": 15.2012, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08944152795943597, |
|
"grad_norm": 260.6186828613281, |
|
"learning_rate": 9.435888391463108e-06, |
|
"loss": 15.1519, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.09001487108738107, |
|
"grad_norm": 266.2400817871094, |
|
"learning_rate": 9.427280128266049e-06, |
|
"loss": 15.1982, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.09058821421532617, |
|
"grad_norm": 258.3689270019531, |
|
"learning_rate": 9.418610669098114e-06, |
|
"loss": 15.2358, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09116155734327128, |
|
"grad_norm": 255.73751831054688, |
|
"learning_rate": 9.409880133792684e-06, |
|
"loss": 15.2167, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09173490047121638, |
|
"grad_norm": 257.6156311035156, |
|
"learning_rate": 9.40108864302737e-06, |
|
"loss": 15.1499, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09230824359916148, |
|
"grad_norm": 259.1768493652344, |
|
"learning_rate": 9.392236318322339e-06, |
|
"loss": 15.1413, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.09288158672710659, |
|
"grad_norm": 259.98583984375, |
|
"learning_rate": 9.383323282038632e-06, |
|
"loss": 15.2688, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09345492985505169, |
|
"grad_norm": 270.8675537109375, |
|
"learning_rate": 9.374349657376473e-06, |
|
"loss": 15.19, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09402827298299679, |
|
"grad_norm": 252.3112030029297, |
|
"learning_rate": 9.365315568373569e-06, |
|
"loss": 15.1946, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0946016161109419, |
|
"grad_norm": 271.8454284667969, |
|
"learning_rate": 9.356221139903395e-06, |
|
"loss": 15.1801, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.095174959238887, |
|
"grad_norm": 252.07545471191406, |
|
"learning_rate": 9.347066497673462e-06, |
|
"loss": 15.169, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0957483023668321, |
|
"grad_norm": 274.141357421875, |
|
"learning_rate": 9.337851768223589e-06, |
|
"loss": 15.2279, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0963216454947772, |
|
"grad_norm": 257.8874206542969, |
|
"learning_rate": 9.328577078924151e-06, |
|
"loss": 15.2368, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0968949886227223, |
|
"grad_norm": 259.5989990234375, |
|
"learning_rate": 9.319242557974306e-06, |
|
"loss": 15.1261, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09746833175066741, |
|
"grad_norm": 268.7466735839844, |
|
"learning_rate": 9.309848334400247e-06, |
|
"loss": 15.1956, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09804167487861251, |
|
"grad_norm": 250.24107360839844, |
|
"learning_rate": 9.300394538053395e-06, |
|
"loss": 15.2186, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09861501800655761, |
|
"grad_norm": 242.27389526367188, |
|
"learning_rate": 9.29088129960862e-06, |
|
"loss": 15.208, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09918836113450272, |
|
"grad_norm": 257.0928649902344, |
|
"learning_rate": 9.281308750562426e-06, |
|
"loss": 15.2165, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.09976170426244782, |
|
"grad_norm": 252.54974365234375, |
|
"learning_rate": 9.271677023231137e-06, |
|
"loss": 15.2131, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.10033504739039292, |
|
"grad_norm": 257.41192626953125, |
|
"learning_rate": 9.261986250749068e-06, |
|
"loss": 15.1474, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10090839051833803, |
|
"grad_norm": 260.325439453125, |
|
"learning_rate": 9.252236567066686e-06, |
|
"loss": 15.1335, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.10148173364628313, |
|
"grad_norm": 265.9437561035156, |
|
"learning_rate": 9.242428106948748e-06, |
|
"loss": 15.2201, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.10205507677422823, |
|
"grad_norm": 255.51026916503906, |
|
"learning_rate": 9.23256100597246e-06, |
|
"loss": 15.167, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10262841990217333, |
|
"grad_norm": 254.357666015625, |
|
"learning_rate": 9.22263540052558e-06, |
|
"loss": 15.2428, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.10320176303011844, |
|
"grad_norm": 253.48025512695312, |
|
"learning_rate": 9.212651427804544e-06, |
|
"loss": 15.0791, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10377510615806354, |
|
"grad_norm": 258.47149658203125, |
|
"learning_rate": 9.202609225812572e-06, |
|
"loss": 15.2475, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.10434844928600863, |
|
"grad_norm": 257.2544860839844, |
|
"learning_rate": 9.192508933357753e-06, |
|
"loss": 15.1288, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.10492179241395373, |
|
"grad_norm": 250.79588317871094, |
|
"learning_rate": 9.182350690051134e-06, |
|
"loss": 15.1739, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.10549513554189884, |
|
"grad_norm": 275.7869873046875, |
|
"learning_rate": 9.172134636304783e-06, |
|
"loss": 15.1487, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.10606847866984394, |
|
"grad_norm": 256.7626647949219, |
|
"learning_rate": 9.16186091332985e-06, |
|
"loss": 15.1919, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10664182179778904, |
|
"grad_norm": 255.94090270996094, |
|
"learning_rate": 9.15152966313462e-06, |
|
"loss": 15.1635, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.10721516492573414, |
|
"grad_norm": 267.16448974609375, |
|
"learning_rate": 9.141141028522544e-06, |
|
"loss": 15.1597, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.10778850805367925, |
|
"grad_norm": 258.8427734375, |
|
"learning_rate": 9.130695153090272e-06, |
|
"loss": 15.1459, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.10836185118162435, |
|
"grad_norm": 253.86849975585938, |
|
"learning_rate": 9.120192181225658e-06, |
|
"loss": 15.1216, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.10893519430956945, |
|
"grad_norm": 265.7057189941406, |
|
"learning_rate": 9.109632258105771e-06, |
|
"loss": 15.1723, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10950853743751456, |
|
"grad_norm": 250.55398559570312, |
|
"learning_rate": 9.099015529694894e-06, |
|
"loss": 15.026, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.11008188056545966, |
|
"grad_norm": 255.69390869140625, |
|
"learning_rate": 9.088342142742493e-06, |
|
"loss": 15.1254, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.11065522369340476, |
|
"grad_norm": 254.11236572265625, |
|
"learning_rate": 9.077612244781196e-06, |
|
"loss": 15.079, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.11122856682134986, |
|
"grad_norm": 247.76478576660156, |
|
"learning_rate": 9.066825984124751e-06, |
|
"loss": 15.1122, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11180190994929497, |
|
"grad_norm": 265.3432922363281, |
|
"learning_rate": 9.055983509865988e-06, |
|
"loss": 15.305, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11237525307724007, |
|
"grad_norm": 244.3975067138672, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 15.1207, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11294859620518517, |
|
"grad_norm": 245.3219757080078, |
|
"learning_rate": 9.034130520795774e-06, |
|
"loss": 15.2254, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11352193933313028, |
|
"grad_norm": 248.05052185058594, |
|
"learning_rate": 9.023120308046726e-06, |
|
"loss": 15.0549, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11409528246107538, |
|
"grad_norm": 249.66659545898438, |
|
"learning_rate": 9.012054485815995e-06, |
|
"loss": 15.0402, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11466862558902048, |
|
"grad_norm": 247.83876037597656, |
|
"learning_rate": 9.00093320706063e-06, |
|
"loss": 15.1167, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11524196871696558, |
|
"grad_norm": 241.92027282714844, |
|
"learning_rate": 8.989756625504237e-06, |
|
"loss": 15.0883, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.11581531184491069, |
|
"grad_norm": 247.57127380371094, |
|
"learning_rate": 8.978524895634842e-06, |
|
"loss": 15.0762, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.11638865497285579, |
|
"grad_norm": 260.8078918457031, |
|
"learning_rate": 8.967238172702754e-06, |
|
"loss": 15.1708, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.11696199810080089, |
|
"grad_norm": 234.99139404296875, |
|
"learning_rate": 8.95589661271842e-06, |
|
"loss": 15.0437, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.117535341228746, |
|
"grad_norm": 252.7474822998047, |
|
"learning_rate": 8.94450037245028e-06, |
|
"loss": 15.1181, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1181086843566911, |
|
"grad_norm": 254.7908477783203, |
|
"learning_rate": 8.933049609422582e-06, |
|
"loss": 15.053, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1186820274846362, |
|
"grad_norm": 249.38302612304688, |
|
"learning_rate": 8.921544481913218e-06, |
|
"loss": 15.1128, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1192553706125813, |
|
"grad_norm": 244.7653350830078, |
|
"learning_rate": 8.909985148951528e-06, |
|
"loss": 15.0565, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.11982871374052641, |
|
"grad_norm": 246.28976440429688, |
|
"learning_rate": 8.898371770316113e-06, |
|
"loss": 14.9964, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1204020568684715, |
|
"grad_norm": 260.9649658203125, |
|
"learning_rate": 8.886704506532611e-06, |
|
"loss": 15.0536, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1209753999964166, |
|
"grad_norm": 254.28854370117188, |
|
"learning_rate": 8.874983518871488e-06, |
|
"loss": 15.1222, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.1215487431243617, |
|
"grad_norm": 233.388427734375, |
|
"learning_rate": 8.86320896934581e-06, |
|
"loss": 15.1175, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1221220862523068, |
|
"grad_norm": 264.84063720703125, |
|
"learning_rate": 8.851381020709e-06, |
|
"loss": 15.0966, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.12269542938025191, |
|
"grad_norm": 238.38485717773438, |
|
"learning_rate": 8.839499836452584e-06, |
|
"loss": 15.0013, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.12326877250819701, |
|
"grad_norm": 251.5662384033203, |
|
"learning_rate": 8.827565580803944e-06, |
|
"loss": 15.1437, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12384211563614211, |
|
"grad_norm": 250.67286682128906, |
|
"learning_rate": 8.815578418724031e-06, |
|
"loss": 15.0635, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.12441545876408722, |
|
"grad_norm": 262.172607421875, |
|
"learning_rate": 8.803538515905102e-06, |
|
"loss": 15.1516, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.12498880189203232, |
|
"grad_norm": 241.4354705810547, |
|
"learning_rate": 8.791446038768416e-06, |
|
"loss": 15.1259, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.12556214501997742, |
|
"grad_norm": 247.70347595214844, |
|
"learning_rate": 8.779301154461945e-06, |
|
"loss": 15.1325, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.12613548814792253, |
|
"grad_norm": 234.08982849121094, |
|
"learning_rate": 8.76710403085805e-06, |
|
"loss": 15.01, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12670883127586763, |
|
"grad_norm": 245.60804748535156, |
|
"learning_rate": 8.754854836551174e-06, |
|
"loss": 15.0905, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.12728217440381273, |
|
"grad_norm": 254.1485137939453, |
|
"learning_rate": 8.742553740855507e-06, |
|
"loss": 15.1127, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.12785551753175783, |
|
"grad_norm": 238.38563537597656, |
|
"learning_rate": 8.730200913802638e-06, |
|
"loss": 15.0614, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.12842886065970294, |
|
"grad_norm": 248.24403381347656, |
|
"learning_rate": 8.717796526139218e-06, |
|
"loss": 15.0618, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.12900220378764804, |
|
"grad_norm": 246.24209594726562, |
|
"learning_rate": 8.70534074932459e-06, |
|
"loss": 15.0455, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12957554691559314, |
|
"grad_norm": 237.25454711914062, |
|
"learning_rate": 8.692833755528426e-06, |
|
"loss": 15.0558, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.13014889004353825, |
|
"grad_norm": 246.07095336914062, |
|
"learning_rate": 8.680275717628336e-06, |
|
"loss": 15.0205, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.13072223317148335, |
|
"grad_norm": 242.9619903564453, |
|
"learning_rate": 8.667666809207495e-06, |
|
"loss": 15.142, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.13129557629942845, |
|
"grad_norm": 242.89532470703125, |
|
"learning_rate": 8.655007204552228e-06, |
|
"loss": 15.0199, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.13186891942737355, |
|
"grad_norm": 254.67239379882812, |
|
"learning_rate": 8.64229707864961e-06, |
|
"loss": 15.088, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13244226255531866, |
|
"grad_norm": 240.30972290039062, |
|
"learning_rate": 8.629536607185042e-06, |
|
"loss": 15.1037, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.13301560568326376, |
|
"grad_norm": 250.13949584960938, |
|
"learning_rate": 8.616725966539831e-06, |
|
"loss": 15.0717, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.13358894881120886, |
|
"grad_norm": 237.8465576171875, |
|
"learning_rate": 8.60386533378874e-06, |
|
"loss": 15.05, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.13416229193915397, |
|
"grad_norm": 244.82315063476562, |
|
"learning_rate": 8.590954886697554e-06, |
|
"loss": 15.101, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.13473563506709907, |
|
"grad_norm": 237.0764923095703, |
|
"learning_rate": 8.577994803720605e-06, |
|
"loss": 15.0211, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.13530897819504417, |
|
"grad_norm": 241.53424072265625, |
|
"learning_rate": 8.564985263998327e-06, |
|
"loss": 15.0495, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.13588232132298927, |
|
"grad_norm": 232.84251403808594, |
|
"learning_rate": 8.551926447354759e-06, |
|
"loss": 14.9438, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.13645566445093438, |
|
"grad_norm": 242.9515838623047, |
|
"learning_rate": 8.538818534295076e-06, |
|
"loss": 15.028, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.13702900757887948, |
|
"grad_norm": 248.1451416015625, |
|
"learning_rate": 8.525661706003083e-06, |
|
"loss": 15.0705, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.13760235070682458, |
|
"grad_norm": 253.95338439941406, |
|
"learning_rate": 8.512456144338717e-06, |
|
"loss": 15.097, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1381756938347697, |
|
"grad_norm": 243.39439392089844, |
|
"learning_rate": 8.499202031835532e-06, |
|
"loss": 15.0549, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1387490369627148, |
|
"grad_norm": 247.52191162109375, |
|
"learning_rate": 8.485899551698166e-06, |
|
"loss": 15.1328, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.1393223800906599, |
|
"grad_norm": 236.9805908203125, |
|
"learning_rate": 8.472548887799833e-06, |
|
"loss": 15.0222, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.139895723218605, |
|
"grad_norm": 239.95289611816406, |
|
"learning_rate": 8.45915022467975e-06, |
|
"loss": 15.0937, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1404690663465501, |
|
"grad_norm": 254.6737060546875, |
|
"learning_rate": 8.445703747540614e-06, |
|
"loss": 15.06, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1410424094744952, |
|
"grad_norm": 247.96080017089844, |
|
"learning_rate": 8.43220964224602e-06, |
|
"loss": 15.0793, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1416157526024403, |
|
"grad_norm": 241.89292907714844, |
|
"learning_rate": 8.418668095317912e-06, |
|
"loss": 15.0339, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1421890957303854, |
|
"grad_norm": 245.7707061767578, |
|
"learning_rate": 8.405079293933986e-06, |
|
"loss": 15.0187, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1427624388583305, |
|
"grad_norm": 244.69918823242188, |
|
"learning_rate": 8.391443425925118e-06, |
|
"loss": 14.9716, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.1433357819862756, |
|
"grad_norm": 247.9059295654297, |
|
"learning_rate": 8.37776067977276e-06, |
|
"loss": 15.0733, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14390912511422072, |
|
"grad_norm": 238.36126708984375, |
|
"learning_rate": 8.36403124460633e-06, |
|
"loss": 14.9511, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.1444824682421658, |
|
"grad_norm": 239.73057556152344, |
|
"learning_rate": 8.350255310200611e-06, |
|
"loss": 15.0428, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1450558113701109, |
|
"grad_norm": 230.3163299560547, |
|
"learning_rate": 8.336433066973122e-06, |
|
"loss": 14.997, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.145629154498056, |
|
"grad_norm": 237.23446655273438, |
|
"learning_rate": 8.322564705981476e-06, |
|
"loss": 14.973, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.1462024976260011, |
|
"grad_norm": 230.16468811035156, |
|
"learning_rate": 8.308650418920751e-06, |
|
"loss": 15.0256, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1467758407539462, |
|
"grad_norm": 233.07260131835938, |
|
"learning_rate": 8.294690398120843e-06, |
|
"loss": 14.945, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1473491838818913, |
|
"grad_norm": 240.12940979003906, |
|
"learning_rate": 8.280684836543794e-06, |
|
"loss": 14.9974, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1479225270098364, |
|
"grad_norm": 243.80523681640625, |
|
"learning_rate": 8.266633927781135e-06, |
|
"loss": 15.0705, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1484958701377815, |
|
"grad_norm": 244.0867462158203, |
|
"learning_rate": 8.25253786605121e-06, |
|
"loss": 15.0141, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.1490692132657266, |
|
"grad_norm": 247.33151245117188, |
|
"learning_rate": 8.238396846196483e-06, |
|
"loss": 15.0344, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14964255639367172, |
|
"grad_norm": 250.08273315429688, |
|
"learning_rate": 8.224211063680854e-06, |
|
"loss": 14.9305, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.15021589952161682, |
|
"grad_norm": 257.2216491699219, |
|
"learning_rate": 8.209980714586955e-06, |
|
"loss": 14.9938, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.15078924264956192, |
|
"grad_norm": 238.5064239501953, |
|
"learning_rate": 8.195705995613436e-06, |
|
"loss": 15.0064, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.15136258577750702, |
|
"grad_norm": 232.31155395507812, |
|
"learning_rate": 8.181387104072252e-06, |
|
"loss": 14.9449, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.15193592890545213, |
|
"grad_norm": 227.94029235839844, |
|
"learning_rate": 8.167024237885927e-06, |
|
"loss": 14.8337, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.15250927203339723, |
|
"grad_norm": 240.96424865722656, |
|
"learning_rate": 8.152617595584827e-06, |
|
"loss": 15.0939, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.15308261516134233, |
|
"grad_norm": 237.70541381835938, |
|
"learning_rate": 8.138167376304411e-06, |
|
"loss": 14.909, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.15365595828928744, |
|
"grad_norm": 233.10304260253906, |
|
"learning_rate": 8.123673779782481e-06, |
|
"loss": 14.9505, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.15422930141723254, |
|
"grad_norm": 240.28123474121094, |
|
"learning_rate": 8.10913700635642e-06, |
|
"loss": 14.9045, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.15480264454517764, |
|
"grad_norm": 233.11627197265625, |
|
"learning_rate": 8.094557256960419e-06, |
|
"loss": 14.9225, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15537598767312275, |
|
"grad_norm": 244.76693725585938, |
|
"learning_rate": 8.079934733122708e-06, |
|
"loss": 14.9717, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.15594933080106785, |
|
"grad_norm": 240.1745147705078, |
|
"learning_rate": 8.065269636962765e-06, |
|
"loss": 15.0261, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.15652267392901295, |
|
"grad_norm": 246.17298889160156, |
|
"learning_rate": 8.05056217118852e-06, |
|
"loss": 14.9933, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.15709601705695805, |
|
"grad_norm": 244.8893585205078, |
|
"learning_rate": 8.035812539093557e-06, |
|
"loss": 15.0351, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.15766936018490316, |
|
"grad_norm": 244.82302856445312, |
|
"learning_rate": 8.021020944554305e-06, |
|
"loss": 14.9442, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.15824270331284826, |
|
"grad_norm": 243.9514923095703, |
|
"learning_rate": 8.006187592027215e-06, |
|
"loss": 14.9621, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.15881604644079336, |
|
"grad_norm": 230.46597290039062, |
|
"learning_rate": 7.991312686545939e-06, |
|
"loss": 14.8903, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.15938938956873847, |
|
"grad_norm": 249.49838256835938, |
|
"learning_rate": 7.976396433718492e-06, |
|
"loss": 14.9777, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.15996273269668357, |
|
"grad_norm": 243.70870971679688, |
|
"learning_rate": 7.961439039724413e-06, |
|
"loss": 15.0312, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.16053607582462867, |
|
"grad_norm": 230.47183227539062, |
|
"learning_rate": 7.946440711311913e-06, |
|
"loss": 14.9198, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16110941895257377, |
|
"grad_norm": 236.70082092285156, |
|
"learning_rate": 7.931401655795021e-06, |
|
"loss": 14.9223, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.16168276208051888, |
|
"grad_norm": 234.71527099609375, |
|
"learning_rate": 7.916322081050708e-06, |
|
"loss": 14.9188, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.16225610520846398, |
|
"grad_norm": 235.15675354003906, |
|
"learning_rate": 7.90120219551603e-06, |
|
"loss": 14.9309, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.16282944833640908, |
|
"grad_norm": 229.10137939453125, |
|
"learning_rate": 7.88604220818523e-06, |
|
"loss": 14.8877, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.16340279146435419, |
|
"grad_norm": 237.02072143554688, |
|
"learning_rate": 7.870842328606863e-06, |
|
"loss": 15.0099, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1639761345922993, |
|
"grad_norm": 236.75343322753906, |
|
"learning_rate": 7.85560276688089e-06, |
|
"loss": 14.8486, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.1645494777202444, |
|
"grad_norm": 233.91934204101562, |
|
"learning_rate": 7.84032373365578e-06, |
|
"loss": 14.897, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.1651228208481895, |
|
"grad_norm": 230.60330200195312, |
|
"learning_rate": 7.825005440125595e-06, |
|
"loss": 14.9105, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.1656961639761346, |
|
"grad_norm": 235.03897094726562, |
|
"learning_rate": 7.809648098027067e-06, |
|
"loss": 14.994, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1662695071040797, |
|
"grad_norm": 233.12936401367188, |
|
"learning_rate": 7.794251919636687e-06, |
|
"loss": 14.9753, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1668428502320248, |
|
"grad_norm": 231.44244384765625, |
|
"learning_rate": 7.778817117767748e-06, |
|
"loss": 14.994, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.1674161933599699, |
|
"grad_norm": 228.026611328125, |
|
"learning_rate": 7.76334390576742e-06, |
|
"loss": 14.9458, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.167989536487915, |
|
"grad_norm": 231.06951904296875, |
|
"learning_rate": 7.747832497513797e-06, |
|
"loss": 14.9729, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.1685628796158601, |
|
"grad_norm": 239.63568115234375, |
|
"learning_rate": 7.732283107412938e-06, |
|
"loss": 14.9274, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.16913622274380521, |
|
"grad_norm": 220.87551879882812, |
|
"learning_rate": 7.71669595039591e-06, |
|
"loss": 14.9327, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.16970956587175032, |
|
"grad_norm": 214.35519409179688, |
|
"learning_rate": 7.701071241915804e-06, |
|
"loss": 14.8955, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.17028290899969542, |
|
"grad_norm": 229.36508178710938, |
|
"learning_rate": 7.685409197944768e-06, |
|
"loss": 14.903, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.17085625212764052, |
|
"grad_norm": 224.3822021484375, |
|
"learning_rate": 7.669710034971025e-06, |
|
"loss": 14.9543, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.17142959525558563, |
|
"grad_norm": 228.7742462158203, |
|
"learning_rate": 7.653973969995866e-06, |
|
"loss": 14.9022, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.17200293838353073, |
|
"grad_norm": 228.00148010253906, |
|
"learning_rate": 7.638201220530664e-06, |
|
"loss": 14.8216, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17257628151147583, |
|
"grad_norm": 216.36854553222656, |
|
"learning_rate": 7.622392004593862e-06, |
|
"loss": 14.8582, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.17314962463942093, |
|
"grad_norm": 221.77157592773438, |
|
"learning_rate": 7.60654654070796e-06, |
|
"loss": 14.9161, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.17372296776736604, |
|
"grad_norm": 223.14935302734375, |
|
"learning_rate": 7.59066504789649e-06, |
|
"loss": 14.9057, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.17429631089531114, |
|
"grad_norm": 219.07955932617188, |
|
"learning_rate": 7.574747745681e-06, |
|
"loss": 14.8669, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.17486965402325624, |
|
"grad_norm": 226.2716827392578, |
|
"learning_rate": 7.558794854078006e-06, |
|
"loss": 14.8365, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.17544299715120135, |
|
"grad_norm": 243.78469848632812, |
|
"learning_rate": 7.542806593595963e-06, |
|
"loss": 14.9013, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.17601634027914642, |
|
"grad_norm": 214.9324188232422, |
|
"learning_rate": 7.526783185232208e-06, |
|
"loss": 14.7971, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.17658968340709152, |
|
"grad_norm": 217.00315856933594, |
|
"learning_rate": 7.51072485046991e-06, |
|
"loss": 14.8198, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.17716302653503663, |
|
"grad_norm": 230.4095001220703, |
|
"learning_rate": 7.494631811275008e-06, |
|
"loss": 14.8371, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.17773636966298173, |
|
"grad_norm": 236.96478271484375, |
|
"learning_rate": 7.478504290093138e-06, |
|
"loss": 14.8929, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17830971279092683, |
|
"grad_norm": 222.3997344970703, |
|
"learning_rate": 7.462342509846571e-06, |
|
"loss": 14.9166, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.17888305591887194, |
|
"grad_norm": 230.09429931640625, |
|
"learning_rate": 7.446146693931111e-06, |
|
"loss": 14.8528, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.17945639904681704, |
|
"grad_norm": 222.97035217285156, |
|
"learning_rate": 7.42991706621303e-06, |
|
"loss": 14.8732, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.18002974217476214, |
|
"grad_norm": 226.1836700439453, |
|
"learning_rate": 7.413653851025959e-06, |
|
"loss": 14.7586, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.18060308530270724, |
|
"grad_norm": 222.79554748535156, |
|
"learning_rate": 7.397357273167789e-06, |
|
"loss": 14.8905, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18117642843065235, |
|
"grad_norm": 230.41497802734375, |
|
"learning_rate": 7.381027557897568e-06, |
|
"loss": 14.7686, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.18174977155859745, |
|
"grad_norm": 207.27145385742188, |
|
"learning_rate": 7.364664930932385e-06, |
|
"loss": 14.8313, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.18232311468654255, |
|
"grad_norm": 224.7344207763672, |
|
"learning_rate": 7.348269618444248e-06, |
|
"loss": 14.7949, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.18289645781448766, |
|
"grad_norm": 227.63766479492188, |
|
"learning_rate": 7.331841847056962e-06, |
|
"loss": 14.7235, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.18346980094243276, |
|
"grad_norm": 214.8011932373047, |
|
"learning_rate": 7.315381843842995e-06, |
|
"loss": 14.7835, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18404314407037786, |
|
"grad_norm": 217.45916748046875, |
|
"learning_rate": 7.298889836320334e-06, |
|
"loss": 14.8223, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.18461648719832296, |
|
"grad_norm": 221.9704132080078, |
|
"learning_rate": 7.282366052449351e-06, |
|
"loss": 14.871, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.18518983032626807, |
|
"grad_norm": 222.32537841796875, |
|
"learning_rate": 7.265810720629643e-06, |
|
"loss": 14.8007, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.18576317345421317, |
|
"grad_norm": 227.74884033203125, |
|
"learning_rate": 7.249224069696876e-06, |
|
"loss": 14.8103, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.18633651658215827, |
|
"grad_norm": 219.51748657226562, |
|
"learning_rate": 7.232606328919627e-06, |
|
"loss": 14.7732, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.18690985971010338, |
|
"grad_norm": 217.20773315429688, |
|
"learning_rate": 7.215957727996208e-06, |
|
"loss": 14.7552, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.18748320283804848, |
|
"grad_norm": 209.55203247070312, |
|
"learning_rate": 7.199278497051498e-06, |
|
"loss": 14.7018, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.18805654596599358, |
|
"grad_norm": 214.1074676513672, |
|
"learning_rate": 7.182568866633757e-06, |
|
"loss": 14.7702, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.18862988909393869, |
|
"grad_norm": 229.8917236328125, |
|
"learning_rate": 7.16582906771144e-06, |
|
"loss": 14.7891, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.1892032322218838, |
|
"grad_norm": 217.26866149902344, |
|
"learning_rate": 7.149059331670009e-06, |
|
"loss": 14.7741, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1897765753498289, |
|
"grad_norm": 210.88253784179688, |
|
"learning_rate": 7.132259890308726e-06, |
|
"loss": 14.715, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.190349918477774, |
|
"grad_norm": 231.31787109375, |
|
"learning_rate": 7.115430975837457e-06, |
|
"loss": 14.7906, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.1909232616057191, |
|
"grad_norm": 224.2241973876953, |
|
"learning_rate": 7.098572820873461e-06, |
|
"loss": 14.7868, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.1914966047336642, |
|
"grad_norm": 220.03028869628906, |
|
"learning_rate": 7.081685658438173e-06, |
|
"loss": 14.7613, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.1920699478616093, |
|
"grad_norm": 213.73609924316406, |
|
"learning_rate": 7.064769721953975e-06, |
|
"loss": 14.7319, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.1926432909895544, |
|
"grad_norm": 223.67706298828125, |
|
"learning_rate": 7.047825245240989e-06, |
|
"loss": 14.8181, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.1932166341174995, |
|
"grad_norm": 207.2647705078125, |
|
"learning_rate": 7.030852462513827e-06, |
|
"loss": 14.7896, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.1937899772454446, |
|
"grad_norm": 213.09942626953125, |
|
"learning_rate": 7.013851608378359e-06, |
|
"loss": 14.727, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.19436332037338971, |
|
"grad_norm": 229.02037048339844, |
|
"learning_rate": 6.9968229178284775e-06, |
|
"loss": 14.7458, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.19493666350133482, |
|
"grad_norm": 222.83213806152344, |
|
"learning_rate": 6.979766626242839e-06, |
|
"loss": 14.7459, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19551000662927992, |
|
"grad_norm": 220.72726440429688, |
|
"learning_rate": 6.9626829693816135e-06, |
|
"loss": 14.7011, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.19608334975722502, |
|
"grad_norm": 214.8241424560547, |
|
"learning_rate": 6.945572183383229e-06, |
|
"loss": 14.7731, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.19665669288517013, |
|
"grad_norm": 222.2461700439453, |
|
"learning_rate": 6.928434504761106e-06, |
|
"loss": 14.681, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.19723003601311523, |
|
"grad_norm": 223.89845275878906, |
|
"learning_rate": 6.911270170400385e-06, |
|
"loss": 14.7092, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.19780337914106033, |
|
"grad_norm": 219.92869567871094, |
|
"learning_rate": 6.894079417554657e-06, |
|
"loss": 14.8403, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.19837672226900543, |
|
"grad_norm": 219.98406982421875, |
|
"learning_rate": 6.8768624838426815e-06, |
|
"loss": 14.7576, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.19895006539695054, |
|
"grad_norm": 207.61367797851562, |
|
"learning_rate": 6.859619607245102e-06, |
|
"loss": 14.7059, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.19952340852489564, |
|
"grad_norm": 206.98719787597656, |
|
"learning_rate": 6.842351026101155e-06, |
|
"loss": 14.6511, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.20009675165284074, |
|
"grad_norm": 210.80372619628906, |
|
"learning_rate": 6.825056979105382e-06, |
|
"loss": 14.7222, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.20067009478078585, |
|
"grad_norm": 213.69117736816406, |
|
"learning_rate": 6.807737705304324e-06, |
|
"loss": 14.7251, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20124343790873095, |
|
"grad_norm": 219.47328186035156, |
|
"learning_rate": 6.790393444093214e-06, |
|
"loss": 14.7487, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.20181678103667605, |
|
"grad_norm": 214.07040405273438, |
|
"learning_rate": 6.773024435212678e-06, |
|
"loss": 14.6365, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.20239012416462115, |
|
"grad_norm": 214.93496704101562, |
|
"learning_rate": 6.7556309187454185e-06, |
|
"loss": 14.6673, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.20296346729256626, |
|
"grad_norm": 206.5713348388672, |
|
"learning_rate": 6.738213135112884e-06, |
|
"loss": 14.7522, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.20353681042051136, |
|
"grad_norm": 210.60606384277344, |
|
"learning_rate": 6.720771325071965e-06, |
|
"loss": 14.6979, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.20411015354845646, |
|
"grad_norm": 212.65887451171875, |
|
"learning_rate": 6.703305729711653e-06, |
|
"loss": 14.7409, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.20468349667640157, |
|
"grad_norm": 216.2197723388672, |
|
"learning_rate": 6.685816590449708e-06, |
|
"loss": 14.7433, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.20525683980434667, |
|
"grad_norm": 210.51260375976562, |
|
"learning_rate": 6.668304149029331e-06, |
|
"loss": 14.7338, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.20583018293229177, |
|
"grad_norm": 210.6771697998047, |
|
"learning_rate": 6.650768647515813e-06, |
|
"loss": 14.7397, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.20640352606023687, |
|
"grad_norm": 216.00897216796875, |
|
"learning_rate": 6.63321032829319e-06, |
|
"loss": 14.8058, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.20697686918818198, |
|
"grad_norm": 206.54159545898438, |
|
"learning_rate": 6.615629434060903e-06, |
|
"loss": 14.6842, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.20755021231612708, |
|
"grad_norm": 213.61300659179688, |
|
"learning_rate": 6.598026207830428e-06, |
|
"loss": 14.6042, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.20812355544407216, |
|
"grad_norm": 217.9312744140625, |
|
"learning_rate": 6.5804008929219284e-06, |
|
"loss": 14.7647, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.20869689857201726, |
|
"grad_norm": 220.873291015625, |
|
"learning_rate": 6.562753732960887e-06, |
|
"loss": 14.7314, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.20927024169996236, |
|
"grad_norm": 223.9777069091797, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 14.7555, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.20984358482790746, |
|
"grad_norm": 217.828125, |
|
"learning_rate": 6.527394853889499e-06, |
|
"loss": 14.7245, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.21041692795585257, |
|
"grad_norm": 224.16778564453125, |
|
"learning_rate": 6.5096836235263904e-06, |
|
"loss": 14.7414, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.21099027108379767, |
|
"grad_norm": 216.91224670410156, |
|
"learning_rate": 6.491951525598461e-06, |
|
"loss": 14.6045, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.21156361421174277, |
|
"grad_norm": 209.5393829345703, |
|
"learning_rate": 6.4741988052071965e-06, |
|
"loss": 14.6805, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.21213695733968788, |
|
"grad_norm": 222.77627563476562, |
|
"learning_rate": 6.45642570773914e-06, |
|
"loss": 14.746, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.21271030046763298, |
|
"grad_norm": 216.05712890625, |
|
"learning_rate": 6.438632478862495e-06, |
|
"loss": 14.6645, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.21328364359557808, |
|
"grad_norm": 206.27911376953125, |
|
"learning_rate": 6.4208193645237314e-06, |
|
"loss": 14.6834, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.21385698672352318, |
|
"grad_norm": 215.7952880859375, |
|
"learning_rate": 6.402986610944183e-06, |
|
"loss": 14.7863, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2144303298514683, |
|
"grad_norm": 212.9938201904297, |
|
"learning_rate": 6.385134464616649e-06, |
|
"loss": 14.7525, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2150036729794134, |
|
"grad_norm": 200.97154235839844, |
|
"learning_rate": 6.367263172301985e-06, |
|
"loss": 14.649, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2155770161073585, |
|
"grad_norm": 222.55943298339844, |
|
"learning_rate": 6.3493729810256895e-06, |
|
"loss": 14.7005, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2161503592353036, |
|
"grad_norm": 220.4983367919922, |
|
"learning_rate": 6.331464138074493e-06, |
|
"loss": 14.7608, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2167237023632487, |
|
"grad_norm": 213.09095764160156, |
|
"learning_rate": 6.313536890992935e-06, |
|
"loss": 14.5953, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2172970454911938, |
|
"grad_norm": 211.12828063964844, |
|
"learning_rate": 6.29559148757995e-06, |
|
"loss": 14.6474, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2178703886191389, |
|
"grad_norm": 222.33969116210938, |
|
"learning_rate": 6.277628175885437e-06, |
|
"loss": 14.7324, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.218443731747084, |
|
"grad_norm": 209.89747619628906, |
|
"learning_rate": 6.2596472042068275e-06, |
|
"loss": 14.622, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2190170748750291, |
|
"grad_norm": 219.60342407226562, |
|
"learning_rate": 6.241648821085666e-06, |
|
"loss": 14.6497, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2195904180029742, |
|
"grad_norm": 221.1376953125, |
|
"learning_rate": 6.223633275304157e-06, |
|
"loss": 14.7248, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.22016376113091932, |
|
"grad_norm": 217.87611389160156, |
|
"learning_rate": 6.205600815881741e-06, |
|
"loss": 14.7175, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.22073710425886442, |
|
"grad_norm": 210.81985473632812, |
|
"learning_rate": 6.187551692071648e-06, |
|
"loss": 14.7288, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22131044738680952, |
|
"grad_norm": 218.46176147460938, |
|
"learning_rate": 6.1694861533574445e-06, |
|
"loss": 14.6473, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.22188379051475463, |
|
"grad_norm": 211.04080200195312, |
|
"learning_rate": 6.1514044494496e-06, |
|
"loss": 14.728, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.22245713364269973, |
|
"grad_norm": 214.88522338867188, |
|
"learning_rate": 6.133306830282021e-06, |
|
"loss": 14.5944, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.22303047677064483, |
|
"grad_norm": 214.91293334960938, |
|
"learning_rate": 6.115193546008602e-06, |
|
"loss": 14.6812, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.22360381989858993, |
|
"grad_norm": 218.2246856689453, |
|
"learning_rate": 6.097064846999774e-06, |
|
"loss": 14.6757, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.22417716302653504, |
|
"grad_norm": 209.82518005371094, |
|
"learning_rate": 6.078920983839032e-06, |
|
"loss": 14.6697, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.22475050615448014, |
|
"grad_norm": 219.08514404296875, |
|
"learning_rate": 6.060762207319479e-06, |
|
"loss": 14.663, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.22532384928242524, |
|
"grad_norm": 224.61856079101562, |
|
"learning_rate": 6.042588768440358e-06, |
|
"loss": 14.6559, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.22589719241037035, |
|
"grad_norm": 216.43028259277344, |
|
"learning_rate": 6.024400918403581e-06, |
|
"loss": 14.6848, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.22647053553831545, |
|
"grad_norm": 217.51576232910156, |
|
"learning_rate": 6.006198908610261e-06, |
|
"loss": 14.6885, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.22704387866626055, |
|
"grad_norm": 194.5399627685547, |
|
"learning_rate": 5.987982990657229e-06, |
|
"loss": 14.589, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.22761722179420565, |
|
"grad_norm": 214.05809020996094, |
|
"learning_rate": 5.9697534163335645e-06, |
|
"loss": 14.6364, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.22819056492215076, |
|
"grad_norm": 212.87832641601562, |
|
"learning_rate": 5.95151043761711e-06, |
|
"loss": 14.7834, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.22876390805009586, |
|
"grad_norm": 203.37142944335938, |
|
"learning_rate": 5.933254306670995e-06, |
|
"loss": 14.5586, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.22933725117804096, |
|
"grad_norm": 217.5912322998047, |
|
"learning_rate": 5.914985275840135e-06, |
|
"loss": 14.7334, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22991059430598607, |
|
"grad_norm": 201.1334991455078, |
|
"learning_rate": 5.896703597647765e-06, |
|
"loss": 14.6263, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.23048393743393117, |
|
"grad_norm": 206.36265563964844, |
|
"learning_rate": 5.878409524791931e-06, |
|
"loss": 14.6252, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.23105728056187627, |
|
"grad_norm": 213.31422424316406, |
|
"learning_rate": 5.8601033101420055e-06, |
|
"loss": 14.718, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.23163062368982137, |
|
"grad_norm": 213.38626098632812, |
|
"learning_rate": 5.841785206735192e-06, |
|
"loss": 14.5727, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.23220396681776648, |
|
"grad_norm": 189.9121551513672, |
|
"learning_rate": 5.823455467773027e-06, |
|
"loss": 14.5197, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.23277730994571158, |
|
"grad_norm": 198.7380828857422, |
|
"learning_rate": 5.805114346617874e-06, |
|
"loss": 14.5848, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.23335065307365668, |
|
"grad_norm": 212.24783325195312, |
|
"learning_rate": 5.786762096789431e-06, |
|
"loss": 14.6107, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.23392399620160179, |
|
"grad_norm": 219.87643432617188, |
|
"learning_rate": 5.768398971961221e-06, |
|
"loss": 14.7092, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2344973393295469, |
|
"grad_norm": 206.90530395507812, |
|
"learning_rate": 5.750025225957086e-06, |
|
"loss": 14.5481, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.235070682457492, |
|
"grad_norm": 202.2758331298828, |
|
"learning_rate": 5.731641112747679e-06, |
|
"loss": 14.6385, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2356440255854371, |
|
"grad_norm": 215.7546844482422, |
|
"learning_rate": 5.713246886446954e-06, |
|
"loss": 14.5969, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.2362173687133822, |
|
"grad_norm": 208.98550415039062, |
|
"learning_rate": 5.694842801308651e-06, |
|
"loss": 14.6304, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2367907118413273, |
|
"grad_norm": 207.6781005859375, |
|
"learning_rate": 5.676429111722786e-06, |
|
"loss": 14.6177, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2373640549692724, |
|
"grad_norm": 201.2788543701172, |
|
"learning_rate": 5.6580060722121325e-06, |
|
"loss": 14.5918, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.2379373980972175, |
|
"grad_norm": 213.871826171875, |
|
"learning_rate": 5.639573937428699e-06, |
|
"loss": 14.5532, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2385107412251626, |
|
"grad_norm": 196.2823486328125, |
|
"learning_rate": 5.621132962150216e-06, |
|
"loss": 14.5558, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.2390840843531077, |
|
"grad_norm": 199.7825927734375, |
|
"learning_rate": 5.6026834012766155e-06, |
|
"loss": 14.5658, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.23965742748105281, |
|
"grad_norm": 192.31263732910156, |
|
"learning_rate": 5.584225509826497e-06, |
|
"loss": 14.5083, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.2402307706089979, |
|
"grad_norm": 201.0004119873047, |
|
"learning_rate": 5.565759542933612e-06, |
|
"loss": 14.6235, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.240804113736943, |
|
"grad_norm": 197.17825317382812, |
|
"learning_rate": 5.547285755843334e-06, |
|
"loss": 14.5237, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2413774568648881, |
|
"grad_norm": 209.01620483398438, |
|
"learning_rate": 5.5288044039091335e-06, |
|
"loss": 14.596, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.2419507999928332, |
|
"grad_norm": 204.07884216308594, |
|
"learning_rate": 5.510315742589042e-06, |
|
"loss": 14.617, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.2425241431207783, |
|
"grad_norm": 208.53651428222656, |
|
"learning_rate": 5.491820027442126e-06, |
|
"loss": 14.6785, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.2430974862487234, |
|
"grad_norm": 199.32315063476562, |
|
"learning_rate": 5.473317514124958e-06, |
|
"loss": 14.512, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2436708293766685, |
|
"grad_norm": 206.72837829589844, |
|
"learning_rate": 5.454808458388069e-06, |
|
"loss": 14.6038, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2442441725046136, |
|
"grad_norm": 196.9921112060547, |
|
"learning_rate": 5.436293116072431e-06, |
|
"loss": 14.5451, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.2448175156325587, |
|
"grad_norm": 207.21530151367188, |
|
"learning_rate": 5.417771743105908e-06, |
|
"loss": 14.551, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.24539085876050382, |
|
"grad_norm": 201.5275115966797, |
|
"learning_rate": 5.399244595499721e-06, |
|
"loss": 14.5262, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.24596420188844892, |
|
"grad_norm": 204.6480712890625, |
|
"learning_rate": 5.380711929344915e-06, |
|
"loss": 14.4846, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.24653754501639402, |
|
"grad_norm": 194.9892120361328, |
|
"learning_rate": 5.362174000808813e-06, |
|
"loss": 14.5942, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.24711088814433912, |
|
"grad_norm": 199.96047973632812, |
|
"learning_rate": 5.343631066131476e-06, |
|
"loss": 14.6091, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.24768423127228423, |
|
"grad_norm": 212.93307495117188, |
|
"learning_rate": 5.325083381622165e-06, |
|
"loss": 14.5455, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.24825757440022933, |
|
"grad_norm": 194.9511260986328, |
|
"learning_rate": 5.30653120365579e-06, |
|
"loss": 14.5044, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.24883091752817443, |
|
"grad_norm": 200.14315795898438, |
|
"learning_rate": 5.28797478866938e-06, |
|
"loss": 14.6439, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.24940426065611954, |
|
"grad_norm": 197.60902404785156, |
|
"learning_rate": 5.269414393158523e-06, |
|
"loss": 14.5721, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.24997760378406464, |
|
"grad_norm": 192.06671142578125, |
|
"learning_rate": 5.250850273673831e-06, |
|
"loss": 14.5812, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.25055094691200974, |
|
"grad_norm": 189.84034729003906, |
|
"learning_rate": 5.232282686817392e-06, |
|
"loss": 14.6002, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.25112429003995484, |
|
"grad_norm": 195.87533569335938, |
|
"learning_rate": 5.213711889239214e-06, |
|
"loss": 14.4797, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.25169763316789995, |
|
"grad_norm": 186.12464904785156, |
|
"learning_rate": 5.195138137633695e-06, |
|
"loss": 14.5298, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.25227097629584505, |
|
"grad_norm": 189.66380310058594, |
|
"learning_rate": 5.17656168873606e-06, |
|
"loss": 14.4488, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.25284431942379015, |
|
"grad_norm": 196.0492401123047, |
|
"learning_rate": 5.157982799318817e-06, |
|
"loss": 14.5268, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.25341766255173526, |
|
"grad_norm": 192.8926239013672, |
|
"learning_rate": 5.139401726188208e-06, |
|
"loss": 14.555, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.25399100567968036, |
|
"grad_norm": 201.20632934570312, |
|
"learning_rate": 5.120818726180662e-06, |
|
"loss": 14.4914, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.25456434880762546, |
|
"grad_norm": 200.86207580566406, |
|
"learning_rate": 5.1022340561592396e-06, |
|
"loss": 14.5471, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.25513769193557057, |
|
"grad_norm": 203.37557983398438, |
|
"learning_rate": 5.083647973010085e-06, |
|
"loss": 14.5438, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.25571103506351567, |
|
"grad_norm": 193.55697631835938, |
|
"learning_rate": 5.065060733638878e-06, |
|
"loss": 14.4965, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.25628437819146077, |
|
"grad_norm": 195.2728271484375, |
|
"learning_rate": 5.046472594967279e-06, |
|
"loss": 14.5723, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.2568577213194059, |
|
"grad_norm": 197.77818298339844, |
|
"learning_rate": 5.027883813929374e-06, |
|
"loss": 14.4772, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.257431064447351, |
|
"grad_norm": 196.05238342285156, |
|
"learning_rate": 5.009294647468137e-06, |
|
"loss": 14.5655, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.2580044075752961, |
|
"grad_norm": 194.8416290283203, |
|
"learning_rate": 4.990705352531864e-06, |
|
"loss": 14.5701, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2585777507032412, |
|
"grad_norm": 193.21575927734375, |
|
"learning_rate": 4.972116186070626e-06, |
|
"loss": 14.5292, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.2591510938311863, |
|
"grad_norm": 189.819580078125, |
|
"learning_rate": 4.953527405032723e-06, |
|
"loss": 14.4925, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.2597244369591314, |
|
"grad_norm": 194.4360809326172, |
|
"learning_rate": 4.934939266361123e-06, |
|
"loss": 14.4965, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.2602977800870765, |
|
"grad_norm": 198.99061584472656, |
|
"learning_rate": 4.916352026989914e-06, |
|
"loss": 14.484, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.2608711232150216, |
|
"grad_norm": 193.81446838378906, |
|
"learning_rate": 4.897765943840761e-06, |
|
"loss": 14.527, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2614444663429667, |
|
"grad_norm": 189.20484924316406, |
|
"learning_rate": 4.87918127381934e-06, |
|
"loss": 14.4895, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.2620178094709118, |
|
"grad_norm": 190.6830291748047, |
|
"learning_rate": 4.860598273811793e-06, |
|
"loss": 14.4308, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.2625911525988569, |
|
"grad_norm": 189.31912231445312, |
|
"learning_rate": 4.842017200681185e-06, |
|
"loss": 14.5519, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.263164495726802, |
|
"grad_norm": 188.8474578857422, |
|
"learning_rate": 4.823438311263943e-06, |
|
"loss": 14.4147, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.2637378388547471, |
|
"grad_norm": 192.68406677246094, |
|
"learning_rate": 4.804861862366306e-06, |
|
"loss": 14.471, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2643111819826922, |
|
"grad_norm": 188.2942657470703, |
|
"learning_rate": 4.786288110760787e-06, |
|
"loss": 14.5164, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.2648845251106373, |
|
"grad_norm": 191.98313903808594, |
|
"learning_rate": 4.767717313182611e-06, |
|
"loss": 14.3865, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.2654578682385824, |
|
"grad_norm": 197.7642364501953, |
|
"learning_rate": 4.74914972632617e-06, |
|
"loss": 14.6162, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.2660312113665275, |
|
"grad_norm": 199.40097045898438, |
|
"learning_rate": 4.730585606841479e-06, |
|
"loss": 14.4812, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.2666045544944726, |
|
"grad_norm": 191.48199462890625, |
|
"learning_rate": 4.7120252113306216e-06, |
|
"loss": 14.445, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2671778976224177, |
|
"grad_norm": 195.9621124267578, |
|
"learning_rate": 4.693468796344211e-06, |
|
"loss": 14.4466, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.26775124075036283, |
|
"grad_norm": 193.89913940429688, |
|
"learning_rate": 4.6749166183778375e-06, |
|
"loss": 14.4653, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.26832458387830793, |
|
"grad_norm": 185.12448120117188, |
|
"learning_rate": 4.656368933868525e-06, |
|
"loss": 14.4962, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.26889792700625303, |
|
"grad_norm": 188.17173767089844, |
|
"learning_rate": 4.637825999191189e-06, |
|
"loss": 14.4282, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.26947127013419814, |
|
"grad_norm": 179.78378295898438, |
|
"learning_rate": 4.619288070655086e-06, |
|
"loss": 14.4112, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.27004461326214324, |
|
"grad_norm": 184.57598876953125, |
|
"learning_rate": 4.600755404500281e-06, |
|
"loss": 14.4972, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.27061795639008834, |
|
"grad_norm": 190.61500549316406, |
|
"learning_rate": 4.582228256894093e-06, |
|
"loss": 14.4585, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.27119129951803345, |
|
"grad_norm": 191.43365478515625, |
|
"learning_rate": 4.56370688392757e-06, |
|
"loss": 14.3984, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.27176464264597855, |
|
"grad_norm": 189.6448211669922, |
|
"learning_rate": 4.545191541611933e-06, |
|
"loss": 14.4596, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.27233798577392365, |
|
"grad_norm": 199.958740234375, |
|
"learning_rate": 4.526682485875044e-06, |
|
"loss": 14.5124, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.27291132890186875, |
|
"grad_norm": 187.1591033935547, |
|
"learning_rate": 4.508179972557875e-06, |
|
"loss": 14.4502, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.27348467202981386, |
|
"grad_norm": 212.7876739501953, |
|
"learning_rate": 4.489684257410959e-06, |
|
"loss": 14.4952, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.27405801515775896, |
|
"grad_norm": 197.2154541015625, |
|
"learning_rate": 4.471195596090867e-06, |
|
"loss": 14.5392, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.27463135828570406, |
|
"grad_norm": 193.218505859375, |
|
"learning_rate": 4.452714244156667e-06, |
|
"loss": 14.5221, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.27520470141364917, |
|
"grad_norm": 195.3530731201172, |
|
"learning_rate": 4.434240457066388e-06, |
|
"loss": 14.4045, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.27577804454159427, |
|
"grad_norm": 191.08155822753906, |
|
"learning_rate": 4.415774490173504e-06, |
|
"loss": 14.363, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.2763513876695394, |
|
"grad_norm": 205.4665985107422, |
|
"learning_rate": 4.397316598723385e-06, |
|
"loss": 14.5536, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.2769247307974845, |
|
"grad_norm": 202.93714904785156, |
|
"learning_rate": 4.3788670378497836e-06, |
|
"loss": 14.4253, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.2774980739254296, |
|
"grad_norm": 199.6490020751953, |
|
"learning_rate": 4.360426062571303e-06, |
|
"loss": 14.5529, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.2780714170533747, |
|
"grad_norm": 198.09494018554688, |
|
"learning_rate": 4.341993927787871e-06, |
|
"loss": 14.4701, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2786447601813198, |
|
"grad_norm": 194.7907257080078, |
|
"learning_rate": 4.323570888277215e-06, |
|
"loss": 14.4267, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.2792181033092649, |
|
"grad_norm": 204.8142852783203, |
|
"learning_rate": 4.305157198691351e-06, |
|
"loss": 14.4313, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.27979144643721, |
|
"grad_norm": 199.0611572265625, |
|
"learning_rate": 4.286753113553049e-06, |
|
"loss": 14.4615, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.2803647895651551, |
|
"grad_norm": 188.00750732421875, |
|
"learning_rate": 4.268358887252322e-06, |
|
"loss": 14.3631, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.2809381326931002, |
|
"grad_norm": 191.73825073242188, |
|
"learning_rate": 4.249974774042915e-06, |
|
"loss": 14.4741, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2815114758210453, |
|
"grad_norm": 188.29759216308594, |
|
"learning_rate": 4.231601028038781e-06, |
|
"loss": 14.446, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.2820848189489904, |
|
"grad_norm": 197.5531768798828, |
|
"learning_rate": 4.2132379032105695e-06, |
|
"loss": 14.4405, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.2826581620769355, |
|
"grad_norm": 190.16937255859375, |
|
"learning_rate": 4.194885653382128e-06, |
|
"loss": 14.3906, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.2832315052048806, |
|
"grad_norm": 188.8497772216797, |
|
"learning_rate": 4.176544532226974e-06, |
|
"loss": 14.4415, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.2838048483328257, |
|
"grad_norm": 186.59799194335938, |
|
"learning_rate": 4.158214793264808e-06, |
|
"loss": 14.4197, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2843781914607708, |
|
"grad_norm": 184.35581970214844, |
|
"learning_rate": 4.139896689857995e-06, |
|
"loss": 14.3536, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.2849515345887159, |
|
"grad_norm": 199.46311950683594, |
|
"learning_rate": 4.121590475208071e-06, |
|
"loss": 14.4356, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.285524877716661, |
|
"grad_norm": 200.33966064453125, |
|
"learning_rate": 4.1032964023522366e-06, |
|
"loss": 14.4552, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.2860982208446061, |
|
"grad_norm": 189.87977600097656, |
|
"learning_rate": 4.085014724159866e-06, |
|
"loss": 14.3919, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.2866715639725512, |
|
"grad_norm": 196.80152893066406, |
|
"learning_rate": 4.066745693329008e-06, |
|
"loss": 14.5031, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2872449071004963, |
|
"grad_norm": 193.42140197753906, |
|
"learning_rate": 4.0484895623828906e-06, |
|
"loss": 14.4403, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.28781825022844143, |
|
"grad_norm": 194.4940948486328, |
|
"learning_rate": 4.030246583666437e-06, |
|
"loss": 14.4734, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.2883915933563865, |
|
"grad_norm": 192.37107849121094, |
|
"learning_rate": 4.012017009342773e-06, |
|
"loss": 14.4512, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.2889649364843316, |
|
"grad_norm": 181.2819366455078, |
|
"learning_rate": 3.99380109138974e-06, |
|
"loss": 14.4906, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2895382796122767, |
|
"grad_norm": 199.6365509033203, |
|
"learning_rate": 3.97559908159642e-06, |
|
"loss": 14.4517, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2901116227402218, |
|
"grad_norm": 182.9588165283203, |
|
"learning_rate": 3.9574112315596425e-06, |
|
"loss": 14.4496, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.2906849658681669, |
|
"grad_norm": 183.8024139404297, |
|
"learning_rate": 3.9392377926805226e-06, |
|
"loss": 14.403, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.291258308996112, |
|
"grad_norm": 195.86257934570312, |
|
"learning_rate": 3.92107901616097e-06, |
|
"loss": 14.3586, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.2918316521240571, |
|
"grad_norm": 193.3267822265625, |
|
"learning_rate": 3.9029351530002264e-06, |
|
"loss": 14.4352, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.2924049952520022, |
|
"grad_norm": 189.76773071289062, |
|
"learning_rate": 3.884806453991399e-06, |
|
"loss": 14.3374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2929783383799473, |
|
"grad_norm": 190.036865234375, |
|
"learning_rate": 3.866693169717982e-06, |
|
"loss": 14.3719, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.2935516815078924, |
|
"grad_norm": 187.96229553222656, |
|
"learning_rate": 3.848595550550401e-06, |
|
"loss": 14.4594, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.2941250246358375, |
|
"grad_norm": 189.76959228515625, |
|
"learning_rate": 3.830513846642556e-06, |
|
"loss": 14.3997, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.2946983677637826, |
|
"grad_norm": 188.51016235351562, |
|
"learning_rate": 3.8124483079283546e-06, |
|
"loss": 14.3977, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.2952717108917277, |
|
"grad_norm": 182.27618408203125, |
|
"learning_rate": 3.7943991841182586e-06, |
|
"loss": 14.3342, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2958450540196728, |
|
"grad_norm": 194.53384399414062, |
|
"learning_rate": 3.7763667246958447e-06, |
|
"loss": 14.3353, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.2964183971476179, |
|
"grad_norm": 186.60391235351562, |
|
"learning_rate": 3.758351178914336e-06, |
|
"loss": 14.3462, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.296991740275563, |
|
"grad_norm": 208.77110290527344, |
|
"learning_rate": 3.7403527957931716e-06, |
|
"loss": 14.4527, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.2975650834035081, |
|
"grad_norm": 192.8214111328125, |
|
"learning_rate": 3.7223718241145646e-06, |
|
"loss": 14.3971, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.2981384265314532, |
|
"grad_norm": 185.70005798339844, |
|
"learning_rate": 3.7044085124200517e-06, |
|
"loss": 14.3432, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.29871176965939833, |
|
"grad_norm": 196.39981079101562, |
|
"learning_rate": 3.6864631090070656e-06, |
|
"loss": 14.5102, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.29928511278734343, |
|
"grad_norm": 187.2920684814453, |
|
"learning_rate": 3.668535861925509e-06, |
|
"loss": 14.4782, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.29985845591528854, |
|
"grad_norm": 186.00146484375, |
|
"learning_rate": 3.650627018974312e-06, |
|
"loss": 14.4494, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.30043179904323364, |
|
"grad_norm": 189.43801879882812, |
|
"learning_rate": 3.632736827698015e-06, |
|
"loss": 14.3908, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.30100514217117874, |
|
"grad_norm": 201.06126403808594, |
|
"learning_rate": 3.6148655353833518e-06, |
|
"loss": 14.458, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.30157848529912384, |
|
"grad_norm": 190.3157501220703, |
|
"learning_rate": 3.5970133890558184e-06, |
|
"loss": 14.3939, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.30215182842706895, |
|
"grad_norm": 203.18019104003906, |
|
"learning_rate": 3.5791806354762702e-06, |
|
"loss": 14.4642, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.30272517155501405, |
|
"grad_norm": 186.1299285888672, |
|
"learning_rate": 3.5613675211375066e-06, |
|
"loss": 14.3403, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.30329851468295915, |
|
"grad_norm": 188.37765502929688, |
|
"learning_rate": 3.5435742922608618e-06, |
|
"loss": 14.3578, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.30387185781090426, |
|
"grad_norm": 184.9286346435547, |
|
"learning_rate": 3.525801194792805e-06, |
|
"loss": 14.3543, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.30444520093884936, |
|
"grad_norm": 193.71884155273438, |
|
"learning_rate": 3.508048474401541e-06, |
|
"loss": 14.3639, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.30501854406679446, |
|
"grad_norm": 187.72390747070312, |
|
"learning_rate": 3.4903163764736104e-06, |
|
"loss": 14.2493, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.30559188719473956, |
|
"grad_norm": 195.72886657714844, |
|
"learning_rate": 3.4726051461105016e-06, |
|
"loss": 14.4045, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.30616523032268467, |
|
"grad_norm": 185.08929443359375, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 14.4521, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.30673857345062977, |
|
"grad_norm": 182.60292053222656, |
|
"learning_rate": 3.437246267039115e-06, |
|
"loss": 14.3866, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.3073119165785749, |
|
"grad_norm": 181.70509338378906, |
|
"learning_rate": 3.419599107078073e-06, |
|
"loss": 14.4036, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.30788525970652, |
|
"grad_norm": 187.29672241210938, |
|
"learning_rate": 3.401973792169574e-06, |
|
"loss": 14.3734, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.3084586028344651, |
|
"grad_norm": 187.84115600585938, |
|
"learning_rate": 3.384370565939098e-06, |
|
"loss": 14.4167, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3090319459624102, |
|
"grad_norm": 200.47061157226562, |
|
"learning_rate": 3.3667896717068105e-06, |
|
"loss": 14.4517, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.3096052890903553, |
|
"grad_norm": 192.6443634033203, |
|
"learning_rate": 3.34923135248419e-06, |
|
"loss": 14.4143, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3101786322183004, |
|
"grad_norm": 189.818115234375, |
|
"learning_rate": 3.33169585097067e-06, |
|
"loss": 14.3478, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.3107519753462455, |
|
"grad_norm": 185.73080444335938, |
|
"learning_rate": 3.314183409550293e-06, |
|
"loss": 14.3765, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3113253184741906, |
|
"grad_norm": 183.9041290283203, |
|
"learning_rate": 3.2966942702883494e-06, |
|
"loss": 14.3506, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.3118986616021357, |
|
"grad_norm": 188.9761505126953, |
|
"learning_rate": 3.279228674928035e-06, |
|
"loss": 14.4349, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.3124720047300808, |
|
"grad_norm": 190.45909118652344, |
|
"learning_rate": 3.261786864887117e-06, |
|
"loss": 14.3562, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3130453478580259, |
|
"grad_norm": 191.3506317138672, |
|
"learning_rate": 3.244369081254585e-06, |
|
"loss": 14.2781, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.313618690985971, |
|
"grad_norm": 181.74490356445312, |
|
"learning_rate": 3.226975564787322e-06, |
|
"loss": 14.3264, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3141920341139161, |
|
"grad_norm": 186.11990356445312, |
|
"learning_rate": 3.209606555906788e-06, |
|
"loss": 14.3599, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.3147653772418612, |
|
"grad_norm": 192.1141357421875, |
|
"learning_rate": 3.192262294695679e-06, |
|
"loss": 14.3444, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.3153387203698063, |
|
"grad_norm": 193.52890014648438, |
|
"learning_rate": 3.174943020894618e-06, |
|
"loss": 14.4323, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3159120634977514, |
|
"grad_norm": 183.9879150390625, |
|
"learning_rate": 3.1576489738988457e-06, |
|
"loss": 14.2539, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.3164854066256965, |
|
"grad_norm": 186.39529418945312, |
|
"learning_rate": 3.140380392754901e-06, |
|
"loss": 14.3633, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3170587497536416, |
|
"grad_norm": 193.56439208984375, |
|
"learning_rate": 3.12313751615732e-06, |
|
"loss": 14.3256, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.3176320928815867, |
|
"grad_norm": 187.15281677246094, |
|
"learning_rate": 3.1059205824453446e-06, |
|
"loss": 14.3763, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.31820543600953183, |
|
"grad_norm": 188.94200134277344, |
|
"learning_rate": 3.0887298295996183e-06, |
|
"loss": 14.3864, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.31877877913747693, |
|
"grad_norm": 186.75950622558594, |
|
"learning_rate": 3.0715654952388957e-06, |
|
"loss": 14.3803, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.31935212226542203, |
|
"grad_norm": 193.4385223388672, |
|
"learning_rate": 3.054427816616773e-06, |
|
"loss": 14.2965, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.31992546539336714, |
|
"grad_norm": 188.6703338623047, |
|
"learning_rate": 3.0373170306183885e-06, |
|
"loss": 14.4114, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.32049880852131224, |
|
"grad_norm": 194.2964630126953, |
|
"learning_rate": 3.020233373757162e-06, |
|
"loss": 14.2351, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.32107215164925734, |
|
"grad_norm": 204.58041381835938, |
|
"learning_rate": 3.0031770821715233e-06, |
|
"loss": 14.3925, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32164549477720245, |
|
"grad_norm": 182.63665771484375, |
|
"learning_rate": 2.9861483916216404e-06, |
|
"loss": 14.371, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.32221883790514755, |
|
"grad_norm": 201.17764282226562, |
|
"learning_rate": 2.969147537486175e-06, |
|
"loss": 14.301, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.32279218103309265, |
|
"grad_norm": 187.64376831054688, |
|
"learning_rate": 2.952174754759012e-06, |
|
"loss": 14.3037, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.32336552416103775, |
|
"grad_norm": 182.01651000976562, |
|
"learning_rate": 2.935230278046025e-06, |
|
"loss": 14.2326, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.32393886728898286, |
|
"grad_norm": 184.65011596679688, |
|
"learning_rate": 2.9183143415618297e-06, |
|
"loss": 14.3121, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.32451221041692796, |
|
"grad_norm": 172.1057891845703, |
|
"learning_rate": 2.9014271791265403e-06, |
|
"loss": 14.203, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.32508555354487306, |
|
"grad_norm": 178.24777221679688, |
|
"learning_rate": 2.8845690241625437e-06, |
|
"loss": 14.3961, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.32565889667281817, |
|
"grad_norm": 198.43179321289062, |
|
"learning_rate": 2.867740109691277e-06, |
|
"loss": 14.3644, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.32623223980076327, |
|
"grad_norm": 184.53721618652344, |
|
"learning_rate": 2.850940668329996e-06, |
|
"loss": 14.3736, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.32680558292870837, |
|
"grad_norm": 186.57337951660156, |
|
"learning_rate": 2.8341709322885624e-06, |
|
"loss": 14.2914, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3273789260566535, |
|
"grad_norm": 194.31634521484375, |
|
"learning_rate": 2.817431133366246e-06, |
|
"loss": 14.3647, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.3279522691845986, |
|
"grad_norm": 189.49636840820312, |
|
"learning_rate": 2.800721502948506e-06, |
|
"loss": 14.4111, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.3285256123125437, |
|
"grad_norm": 194.70204162597656, |
|
"learning_rate": 2.7840422720037943e-06, |
|
"loss": 14.4538, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.3290989554404888, |
|
"grad_norm": 191.64688110351562, |
|
"learning_rate": 2.767393671080376e-06, |
|
"loss": 14.2899, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.3296722985684339, |
|
"grad_norm": 193.7047576904297, |
|
"learning_rate": 2.7507759303031257e-06, |
|
"loss": 14.3198, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.330245641696379, |
|
"grad_norm": 189.0587158203125, |
|
"learning_rate": 2.7341892793703594e-06, |
|
"loss": 14.3457, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.3308189848243241, |
|
"grad_norm": 188.7035675048828, |
|
"learning_rate": 2.7176339475506515e-06, |
|
"loss": 14.2817, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.3313923279522692, |
|
"grad_norm": 184.22344970703125, |
|
"learning_rate": 2.7011101636796677e-06, |
|
"loss": 14.3146, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.3319656710802143, |
|
"grad_norm": 180.2777557373047, |
|
"learning_rate": 2.6846181561570085e-06, |
|
"loss": 14.3799, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.3325390142081594, |
|
"grad_norm": 185.93838500976562, |
|
"learning_rate": 2.668158152943039e-06, |
|
"loss": 14.3632, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3331123573361045, |
|
"grad_norm": 183.86941528320312, |
|
"learning_rate": 2.651730381555754e-06, |
|
"loss": 14.3327, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.3336857004640496, |
|
"grad_norm": 184.0933074951172, |
|
"learning_rate": 2.635335069067617e-06, |
|
"loss": 14.3807, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.3342590435919947, |
|
"grad_norm": 183.67532348632812, |
|
"learning_rate": 2.618972442102432e-06, |
|
"loss": 14.4402, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.3348323867199398, |
|
"grad_norm": 185.25009155273438, |
|
"learning_rate": 2.602642726832212e-06, |
|
"loss": 14.3258, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.3354057298478849, |
|
"grad_norm": 186.76087951660156, |
|
"learning_rate": 2.5863461489740403e-06, |
|
"loss": 14.2503, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.33597907297583, |
|
"grad_norm": 183.74209594726562, |
|
"learning_rate": 2.57008293378697e-06, |
|
"loss": 14.282, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.3365524161037751, |
|
"grad_norm": 185.21743774414062, |
|
"learning_rate": 2.553853306068888e-06, |
|
"loss": 14.3058, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.3371257592317202, |
|
"grad_norm": 180.64405822753906, |
|
"learning_rate": 2.5376574901534303e-06, |
|
"loss": 14.2191, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.3376991023596653, |
|
"grad_norm": 197.49221801757812, |
|
"learning_rate": 2.5214957099068613e-06, |
|
"loss": 14.2684, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.33827244548761043, |
|
"grad_norm": 178.35708618164062, |
|
"learning_rate": 2.5053681887249916e-06, |
|
"loss": 14.2358, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.33884578861555553, |
|
"grad_norm": 181.4188995361328, |
|
"learning_rate": 2.4892751495300893e-06, |
|
"loss": 14.3204, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.33941913174350063, |
|
"grad_norm": 178.8732452392578, |
|
"learning_rate": 2.4732168147677927e-06, |
|
"loss": 14.2609, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.33999247487144574, |
|
"grad_norm": 191.7628631591797, |
|
"learning_rate": 2.4571934064040364e-06, |
|
"loss": 14.2528, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.34056581799939084, |
|
"grad_norm": 193.52305603027344, |
|
"learning_rate": 2.4412051459219945e-06, |
|
"loss": 14.3341, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.34113916112733594, |
|
"grad_norm": 198.21897888183594, |
|
"learning_rate": 2.425252254319002e-06, |
|
"loss": 14.3828, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34171250425528105, |
|
"grad_norm": 191.85609436035156, |
|
"learning_rate": 2.4093349521035105e-06, |
|
"loss": 14.3309, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.34228584738322615, |
|
"grad_norm": 185.22528076171875, |
|
"learning_rate": 2.3934534592920416e-06, |
|
"loss": 14.2623, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.34285919051117125, |
|
"grad_norm": 188.74754333496094, |
|
"learning_rate": 2.3776079954061385e-06, |
|
"loss": 14.4269, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.34343253363911636, |
|
"grad_norm": 178.31825256347656, |
|
"learning_rate": 2.3617987794693358e-06, |
|
"loss": 14.2489, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.34400587676706146, |
|
"grad_norm": 188.00209045410156, |
|
"learning_rate": 2.3460260300041355e-06, |
|
"loss": 14.3401, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34457921989500656, |
|
"grad_norm": 191.75465393066406, |
|
"learning_rate": 2.3302899650289773e-06, |
|
"loss": 14.3273, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.34515256302295166, |
|
"grad_norm": 185.55166625976562, |
|
"learning_rate": 2.314590802055232e-06, |
|
"loss": 14.3695, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.34572590615089677, |
|
"grad_norm": 177.90130615234375, |
|
"learning_rate": 2.2989287580841985e-06, |
|
"loss": 14.3113, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.34629924927884187, |
|
"grad_norm": 189.20179748535156, |
|
"learning_rate": 2.2833040496040925e-06, |
|
"loss": 14.2244, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.346872592406787, |
|
"grad_norm": 192.3074493408203, |
|
"learning_rate": 2.267716892587062e-06, |
|
"loss": 14.28, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.3474459355347321, |
|
"grad_norm": 192.26055908203125, |
|
"learning_rate": 2.252167502486205e-06, |
|
"loss": 14.2554, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.3480192786626772, |
|
"grad_norm": 184.66305541992188, |
|
"learning_rate": 2.2366560942325833e-06, |
|
"loss": 14.3175, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.3485926217906223, |
|
"grad_norm": 186.08566284179688, |
|
"learning_rate": 2.2211828822322547e-06, |
|
"loss": 14.2586, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.3491659649185674, |
|
"grad_norm": 183.10336303710938, |
|
"learning_rate": 2.205748080363316e-06, |
|
"loss": 14.3051, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.3497393080465125, |
|
"grad_norm": 188.01463317871094, |
|
"learning_rate": 2.190351901972935e-06, |
|
"loss": 14.2597, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3503126511744576, |
|
"grad_norm": 186.796630859375, |
|
"learning_rate": 2.1749945598744076e-06, |
|
"loss": 14.3121, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.3508859943024027, |
|
"grad_norm": 197.26966857910156, |
|
"learning_rate": 2.159676266344222e-06, |
|
"loss": 14.3272, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.3514593374303478, |
|
"grad_norm": 188.6767578125, |
|
"learning_rate": 2.144397233119112e-06, |
|
"loss": 14.2799, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.35203268055829284, |
|
"grad_norm": 185.7920684814453, |
|
"learning_rate": 2.1291576713931382e-06, |
|
"loss": 14.3654, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.35260602368623795, |
|
"grad_norm": 183.85186767578125, |
|
"learning_rate": 2.1139577918147715e-06, |
|
"loss": 14.2435, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.35317936681418305, |
|
"grad_norm": 188.81492614746094, |
|
"learning_rate": 2.0987978044839707e-06, |
|
"loss": 14.3787, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.35375270994212815, |
|
"grad_norm": 181.99166870117188, |
|
"learning_rate": 2.0836779189492925e-06, |
|
"loss": 14.3489, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.35432605307007325, |
|
"grad_norm": 182.6253204345703, |
|
"learning_rate": 2.068598344204981e-06, |
|
"loss": 14.2816, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.35489939619801836, |
|
"grad_norm": 178.6793975830078, |
|
"learning_rate": 2.053559288688086e-06, |
|
"loss": 14.2392, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.35547273932596346, |
|
"grad_norm": 190.26219177246094, |
|
"learning_rate": 2.0385609602755878e-06, |
|
"loss": 14.2875, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.35604608245390856, |
|
"grad_norm": 199.85971069335938, |
|
"learning_rate": 2.02360356628151e-06, |
|
"loss": 14.3167, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.35661942558185367, |
|
"grad_norm": 199.51605224609375, |
|
"learning_rate": 2.0086873134540626e-06, |
|
"loss": 14.336, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.35719276870979877, |
|
"grad_norm": 183.92247009277344, |
|
"learning_rate": 1.9938124079727874e-06, |
|
"loss": 14.2201, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.35776611183774387, |
|
"grad_norm": 193.48175048828125, |
|
"learning_rate": 1.9789790554456977e-06, |
|
"loss": 14.2868, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.358339454965689, |
|
"grad_norm": 189.4330291748047, |
|
"learning_rate": 1.9641874609064443e-06, |
|
"loss": 14.2538, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3589127980936341, |
|
"grad_norm": 182.5979461669922, |
|
"learning_rate": 1.9494378288114816e-06, |
|
"loss": 14.2463, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.3594861412215792, |
|
"grad_norm": 177.77850341796875, |
|
"learning_rate": 1.9347303630372373e-06, |
|
"loss": 14.1946, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.3600594843495243, |
|
"grad_norm": 182.85313415527344, |
|
"learning_rate": 1.9200652668772924e-06, |
|
"loss": 14.2852, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.3606328274774694, |
|
"grad_norm": 189.149169921875, |
|
"learning_rate": 1.9054427430395828e-06, |
|
"loss": 14.2522, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.3612061706054145, |
|
"grad_norm": 186.2698211669922, |
|
"learning_rate": 1.890862993643583e-06, |
|
"loss": 14.2526, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3617795137333596, |
|
"grad_norm": 188.8157196044922, |
|
"learning_rate": 1.8763262202175204e-06, |
|
"loss": 14.2772, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.3623528568613047, |
|
"grad_norm": 184.87147521972656, |
|
"learning_rate": 1.8618326236955908e-06, |
|
"loss": 14.3395, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.3629261999892498, |
|
"grad_norm": 185.856201171875, |
|
"learning_rate": 1.8473824044151762e-06, |
|
"loss": 14.2998, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.3634995431171949, |
|
"grad_norm": 184.26248168945312, |
|
"learning_rate": 1.8329757621140748e-06, |
|
"loss": 14.2654, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.36407288624514, |
|
"grad_norm": 186.35105895996094, |
|
"learning_rate": 1.81861289592775e-06, |
|
"loss": 14.2294, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.3646462293730851, |
|
"grad_norm": 187.1624298095703, |
|
"learning_rate": 1.8042940043865658e-06, |
|
"loss": 14.3037, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.3652195725010302, |
|
"grad_norm": 176.15463256835938, |
|
"learning_rate": 1.7900192854130465e-06, |
|
"loss": 14.2271, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.3657929156289753, |
|
"grad_norm": 188.59449768066406, |
|
"learning_rate": 1.7757889363191484e-06, |
|
"loss": 14.3419, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.3663662587569204, |
|
"grad_norm": 180.50051879882812, |
|
"learning_rate": 1.7616031538035189e-06, |
|
"loss": 14.2815, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.3669396018848655, |
|
"grad_norm": 185.34474182128906, |
|
"learning_rate": 1.7474621339487925e-06, |
|
"loss": 14.2534, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3675129450128106, |
|
"grad_norm": 184.1910858154297, |
|
"learning_rate": 1.7333660722188667e-06, |
|
"loss": 14.2397, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.3680862881407557, |
|
"grad_norm": 185.2908477783203, |
|
"learning_rate": 1.7193151634562071e-06, |
|
"loss": 14.2306, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.3686596312687008, |
|
"grad_norm": 183.8131103515625, |
|
"learning_rate": 1.7053096018791588e-06, |
|
"loss": 14.2843, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.36923297439664593, |
|
"grad_norm": 189.00628662109375, |
|
"learning_rate": 1.691349581079249e-06, |
|
"loss": 14.1944, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.36980631752459103, |
|
"grad_norm": 189.68801879882812, |
|
"learning_rate": 1.6774352940185269e-06, |
|
"loss": 14.2894, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.37037966065253614, |
|
"grad_norm": 193.29290771484375, |
|
"learning_rate": 1.663566933026879e-06, |
|
"loss": 14.3125, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.37095300378048124, |
|
"grad_norm": 189.4978790283203, |
|
"learning_rate": 1.6497446897993885e-06, |
|
"loss": 14.1912, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.37152634690842634, |
|
"grad_norm": 187.17823791503906, |
|
"learning_rate": 1.6359687553936714e-06, |
|
"loss": 14.2728, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.37209969003637144, |
|
"grad_norm": 180.1759033203125, |
|
"learning_rate": 1.6222393202272414e-06, |
|
"loss": 14.2409, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.37267303316431655, |
|
"grad_norm": 175.7593536376953, |
|
"learning_rate": 1.6085565740748825e-06, |
|
"loss": 14.1765, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.37324637629226165, |
|
"grad_norm": 183.71810913085938, |
|
"learning_rate": 1.5949207060660138e-06, |
|
"loss": 14.2563, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.37381971942020675, |
|
"grad_norm": 185.6693572998047, |
|
"learning_rate": 1.581331904682089e-06, |
|
"loss": 14.3579, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.37439306254815186, |
|
"grad_norm": 189.27444458007812, |
|
"learning_rate": 1.5677903577539806e-06, |
|
"loss": 14.2853, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.37496640567609696, |
|
"grad_norm": 190.42837524414062, |
|
"learning_rate": 1.5542962524593869e-06, |
|
"loss": 14.2187, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.37553974880404206, |
|
"grad_norm": 177.54698181152344, |
|
"learning_rate": 1.54084977532025e-06, |
|
"loss": 14.1745, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.37611309193198716, |
|
"grad_norm": 183.06019592285156, |
|
"learning_rate": 1.5274511122001684e-06, |
|
"loss": 14.2742, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.37668643505993227, |
|
"grad_norm": 190.93809509277344, |
|
"learning_rate": 1.5141004483018323e-06, |
|
"loss": 14.3287, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.37725977818787737, |
|
"grad_norm": 195.81625366210938, |
|
"learning_rate": 1.5007979681644696e-06, |
|
"loss": 14.2384, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.3778331213158225, |
|
"grad_norm": 187.17530822753906, |
|
"learning_rate": 1.4875438556612836e-06, |
|
"loss": 14.25, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.3784064644437676, |
|
"grad_norm": 183.16397094726562, |
|
"learning_rate": 1.474338293996917e-06, |
|
"loss": 14.3265, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3789798075717127, |
|
"grad_norm": 177.78402709960938, |
|
"learning_rate": 1.4611814657049257e-06, |
|
"loss": 14.1526, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.3795531506996578, |
|
"grad_norm": 187.61419677734375, |
|
"learning_rate": 1.4480735526452427e-06, |
|
"loss": 14.2041, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.3801264938276029, |
|
"grad_norm": 181.4232635498047, |
|
"learning_rate": 1.4350147360016743e-06, |
|
"loss": 14.2766, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.380699836955548, |
|
"grad_norm": 185.21261596679688, |
|
"learning_rate": 1.4220051962793952e-06, |
|
"loss": 14.216, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.3812731800834931, |
|
"grad_norm": 187.9059295654297, |
|
"learning_rate": 1.4090451133024473e-06, |
|
"loss": 14.2696, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3818465232114382, |
|
"grad_norm": 184.80746459960938, |
|
"learning_rate": 1.3961346662112585e-06, |
|
"loss": 14.2777, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.3824198663393833, |
|
"grad_norm": 178.53359985351562, |
|
"learning_rate": 1.3832740334601692e-06, |
|
"loss": 14.2119, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.3829932094673284, |
|
"grad_norm": 186.9265594482422, |
|
"learning_rate": 1.3704633928149575e-06, |
|
"loss": 14.278, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.3835665525952735, |
|
"grad_norm": 181.26290893554688, |
|
"learning_rate": 1.3577029213503911e-06, |
|
"loss": 14.2922, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.3841398957232186, |
|
"grad_norm": 182.86557006835938, |
|
"learning_rate": 1.3449927954477732e-06, |
|
"loss": 14.2855, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3847132388511637, |
|
"grad_norm": 182.475830078125, |
|
"learning_rate": 1.3323331907925046e-06, |
|
"loss": 14.2958, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.3852865819791088, |
|
"grad_norm": 189.7706756591797, |
|
"learning_rate": 1.319724282371664e-06, |
|
"loss": 14.2176, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.3858599251070539, |
|
"grad_norm": 193.93069458007812, |
|
"learning_rate": 1.307166244471576e-06, |
|
"loss": 14.2117, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.386433268234999, |
|
"grad_norm": 179.2334442138672, |
|
"learning_rate": 1.2946592506754097e-06, |
|
"loss": 14.3632, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.3870066113629441, |
|
"grad_norm": 189.32432556152344, |
|
"learning_rate": 1.282203473860783e-06, |
|
"loss": 14.1928, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3875799544908892, |
|
"grad_norm": 182.27935791015625, |
|
"learning_rate": 1.2697990861973635e-06, |
|
"loss": 14.2161, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.3881532976188343, |
|
"grad_norm": 181.55154418945312, |
|
"learning_rate": 1.257446259144494e-06, |
|
"loss": 14.2658, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.38872664074677943, |
|
"grad_norm": 183.76902770996094, |
|
"learning_rate": 1.2451451634488264e-06, |
|
"loss": 14.3169, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.38929998387472453, |
|
"grad_norm": 179.52069091796875, |
|
"learning_rate": 1.2328959691419517e-06, |
|
"loss": 14.261, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.38987332700266963, |
|
"grad_norm": 187.97842407226562, |
|
"learning_rate": 1.2206988455380558e-06, |
|
"loss": 14.1935, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.39044667013061474, |
|
"grad_norm": 177.58485412597656, |
|
"learning_rate": 1.2085539612315844e-06, |
|
"loss": 14.0745, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.39102001325855984, |
|
"grad_norm": 178.7311248779297, |
|
"learning_rate": 1.1964614840949002e-06, |
|
"loss": 14.223, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.39159335638650494, |
|
"grad_norm": 181.48497009277344, |
|
"learning_rate": 1.1844215812759708e-06, |
|
"loss": 14.1863, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.39216669951445005, |
|
"grad_norm": 183.38412475585938, |
|
"learning_rate": 1.1724344191960591e-06, |
|
"loss": 14.2664, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.39274004264239515, |
|
"grad_norm": 190.3087921142578, |
|
"learning_rate": 1.1605001635474183e-06, |
|
"loss": 14.3032, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.39331338577034025, |
|
"grad_norm": 179.9006805419922, |
|
"learning_rate": 1.1486189792910024e-06, |
|
"loss": 14.2501, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.39388672889828535, |
|
"grad_norm": 186.22154235839844, |
|
"learning_rate": 1.1367910306541918e-06, |
|
"loss": 14.1971, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.39446007202623046, |
|
"grad_norm": 180.23377990722656, |
|
"learning_rate": 1.1250164811285148e-06, |
|
"loss": 14.2892, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.39503341515417556, |
|
"grad_norm": 177.89480590820312, |
|
"learning_rate": 1.1132954934673911e-06, |
|
"loss": 14.1728, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.39560675828212066, |
|
"grad_norm": 187.4567108154297, |
|
"learning_rate": 1.1016282296838887e-06, |
|
"loss": 14.2579, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.39618010141006577, |
|
"grad_norm": 176.47003173828125, |
|
"learning_rate": 1.090014851048473e-06, |
|
"loss": 14.2398, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.39675344453801087, |
|
"grad_norm": 182.03118896484375, |
|
"learning_rate": 1.078455518086784e-06, |
|
"loss": 14.2395, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.39732678766595597, |
|
"grad_norm": 181.1314697265625, |
|
"learning_rate": 1.0669503905774198e-06, |
|
"loss": 14.1643, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.3979001307939011, |
|
"grad_norm": 189.62818908691406, |
|
"learning_rate": 1.055499627549722e-06, |
|
"loss": 14.1924, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.3984734739218462, |
|
"grad_norm": 180.246337890625, |
|
"learning_rate": 1.0441033872815804e-06, |
|
"loss": 14.2148, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3990468170497913, |
|
"grad_norm": 180.3937530517578, |
|
"learning_rate": 1.0327618272972484e-06, |
|
"loss": 14.2263, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.3996201601777364, |
|
"grad_norm": 189.4615478515625, |
|
"learning_rate": 1.0214751043651582e-06, |
|
"loss": 14.2253, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.4001935033056815, |
|
"grad_norm": 177.67706298828125, |
|
"learning_rate": 1.010243374495763e-06, |
|
"loss": 14.1809, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.4007668464336266, |
|
"grad_norm": 176.24996948242188, |
|
"learning_rate": 9.990667929393715e-07, |
|
"loss": 14.0939, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.4013401895615717, |
|
"grad_norm": 184.9351806640625, |
|
"learning_rate": 9.879455141840067e-07, |
|
"loss": 14.3538, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4019135326895168, |
|
"grad_norm": 189.4423370361328, |
|
"learning_rate": 9.768796919532742e-07, |
|
"loss": 14.2778, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.4024868758174619, |
|
"grad_norm": 190.33895874023438, |
|
"learning_rate": 9.658694792042284e-07, |
|
"loss": 14.3299, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.403060218945407, |
|
"grad_norm": 183.4825897216797, |
|
"learning_rate": 9.549150281252633e-07, |
|
"loss": 14.1587, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.4036335620733521, |
|
"grad_norm": 184.19715881347656, |
|
"learning_rate": 9.440164901340127e-07, |
|
"loss": 14.1235, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.4042069052012972, |
|
"grad_norm": 191.84231567382812, |
|
"learning_rate": 9.331740158752495e-07, |
|
"loss": 14.1645, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.4047802483292423, |
|
"grad_norm": 181.87342834472656, |
|
"learning_rate": 9.223877552188065e-07, |
|
"loss": 14.2719, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.4053535914571874, |
|
"grad_norm": 183.34930419921875, |
|
"learning_rate": 9.116578572575091e-07, |
|
"loss": 14.2534, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.4059269345851325, |
|
"grad_norm": 174.75514221191406, |
|
"learning_rate": 9.009844703051063e-07, |
|
"loss": 14.3114, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.4065002777130776, |
|
"grad_norm": 176.34121704101562, |
|
"learning_rate": 8.903677418942292e-07, |
|
"loss": 14.2201, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.4070736208410227, |
|
"grad_norm": 183.08766174316406, |
|
"learning_rate": 8.79807818774343e-07, |
|
"loss": 14.1528, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4076469639689678, |
|
"grad_norm": 189.90757751464844, |
|
"learning_rate": 8.693048469097293e-07, |
|
"loss": 14.2383, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.4082203070969129, |
|
"grad_norm": 181.50448608398438, |
|
"learning_rate": 8.58858971477457e-07, |
|
"loss": 14.262, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.40879365022485803, |
|
"grad_norm": 178.92880249023438, |
|
"learning_rate": 8.484703368653812e-07, |
|
"loss": 14.1923, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.40936699335280313, |
|
"grad_norm": 186.92608642578125, |
|
"learning_rate": 8.381390866701517e-07, |
|
"loss": 14.1751, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.40994033648074824, |
|
"grad_norm": 183.1122589111328, |
|
"learning_rate": 8.278653636952177e-07, |
|
"loss": 14.2072, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.41051367960869334, |
|
"grad_norm": 172.70138549804688, |
|
"learning_rate": 8.176493099488664e-07, |
|
"loss": 14.209, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.41108702273663844, |
|
"grad_norm": 193.0767822265625, |
|
"learning_rate": 8.074910666422475e-07, |
|
"loss": 14.2055, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.41166036586458354, |
|
"grad_norm": 181.7238006591797, |
|
"learning_rate": 7.973907741874287e-07, |
|
"loss": 14.2313, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.41223370899252865, |
|
"grad_norm": 196.82655334472656, |
|
"learning_rate": 7.873485721954572e-07, |
|
"loss": 14.3521, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.41280705212047375, |
|
"grad_norm": 184.25498962402344, |
|
"learning_rate": 7.773645994744222e-07, |
|
"loss": 14.2955, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41338039524841885, |
|
"grad_norm": 179.338623046875, |
|
"learning_rate": 7.674389940275406e-07, |
|
"loss": 14.1519, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.41395373837636396, |
|
"grad_norm": 179.32083129882812, |
|
"learning_rate": 7.575718930512516e-07, |
|
"loss": 14.2179, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.41452708150430906, |
|
"grad_norm": 178.83621215820312, |
|
"learning_rate": 7.47763432933315e-07, |
|
"loss": 14.2179, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.41510042463225416, |
|
"grad_norm": 184.3859100341797, |
|
"learning_rate": 7.380137492509309e-07, |
|
"loss": 14.2816, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.41567376776019926, |
|
"grad_norm": 178.84129333496094, |
|
"learning_rate": 7.283229767688627e-07, |
|
"loss": 14.2278, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4162471108881443, |
|
"grad_norm": 171.81666564941406, |
|
"learning_rate": 7.186912494375736e-07, |
|
"loss": 14.1466, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.4168204540160894, |
|
"grad_norm": 194.59820556640625, |
|
"learning_rate": 7.091187003913802e-07, |
|
"loss": 14.2792, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.4173937971440345, |
|
"grad_norm": 180.1846160888672, |
|
"learning_rate": 6.996054619466053e-07, |
|
"loss": 14.1733, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.4179671402719796, |
|
"grad_norm": 180.338134765625, |
|
"learning_rate": 6.901516655997536e-07, |
|
"loss": 14.1878, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.4185404833999247, |
|
"grad_norm": 182.3441162109375, |
|
"learning_rate": 6.80757442025694e-07, |
|
"loss": 14.2232, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4191138265278698, |
|
"grad_norm": 180.3588104248047, |
|
"learning_rate": 6.714229210758516e-07, |
|
"loss": 14.2163, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.41968716965581493, |
|
"grad_norm": 181.55784606933594, |
|
"learning_rate": 6.621482317764105e-07, |
|
"loss": 14.1579, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.42026051278376003, |
|
"grad_norm": 184.411376953125, |
|
"learning_rate": 6.529335023265387e-07, |
|
"loss": 14.2631, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.42083385591170513, |
|
"grad_norm": 182.96253967285156, |
|
"learning_rate": 6.437788600966066e-07, |
|
"loss": 14.285, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.42140719903965024, |
|
"grad_norm": 192.8575897216797, |
|
"learning_rate": 6.346844316264312e-07, |
|
"loss": 14.1554, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.42198054216759534, |
|
"grad_norm": 176.40582275390625, |
|
"learning_rate": 6.256503426235277e-07, |
|
"loss": 14.2083, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.42255388529554044, |
|
"grad_norm": 183.86581420898438, |
|
"learning_rate": 6.166767179613691e-07, |
|
"loss": 14.2304, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.42312722842348555, |
|
"grad_norm": 190.2710723876953, |
|
"learning_rate": 6.077636816776611e-07, |
|
"loss": 14.2459, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.42370057155143065, |
|
"grad_norm": 183.04217529296875, |
|
"learning_rate": 5.989113569726312e-07, |
|
"loss": 14.1955, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.42427391467937575, |
|
"grad_norm": 176.5095672607422, |
|
"learning_rate": 5.901198662073188e-07, |
|
"loss": 14.2403, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.42484725780732085, |
|
"grad_norm": 175.92588806152344, |
|
"learning_rate": 5.813893309018881e-07, |
|
"loss": 14.2281, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.42542060093526596, |
|
"grad_norm": 190.41502380371094, |
|
"learning_rate": 5.727198717339511e-07, |
|
"loss": 14.239, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.42599394406321106, |
|
"grad_norm": 179.48741149902344, |
|
"learning_rate": 5.641116085368931e-07, |
|
"loss": 14.2565, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.42656728719115616, |
|
"grad_norm": 195.33184814453125, |
|
"learning_rate": 5.555646602982207e-07, |
|
"loss": 14.3216, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.42714063031910127, |
|
"grad_norm": 185.87525939941406, |
|
"learning_rate": 5.470791451579172e-07, |
|
"loss": 14.242, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.42771397344704637, |
|
"grad_norm": 188.23599243164062, |
|
"learning_rate": 5.386551804068063e-07, |
|
"loss": 14.2882, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.4282873165749915, |
|
"grad_norm": 177.58998107910156, |
|
"learning_rate": 5.302928824849335e-07, |
|
"loss": 14.2356, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.4288606597029366, |
|
"grad_norm": 186.3286895751953, |
|
"learning_rate": 5.219923669799587e-07, |
|
"loss": 14.2915, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.4294340028308817, |
|
"grad_norm": 180.6791229248047, |
|
"learning_rate": 5.137537486255517e-07, |
|
"loss": 14.2342, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.4300073459588268, |
|
"grad_norm": 194.50714111328125, |
|
"learning_rate": 5.055771412998122e-07, |
|
"loss": 14.2382, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4305806890867719, |
|
"grad_norm": 181.63011169433594, |
|
"learning_rate": 4.974626580236957e-07, |
|
"loss": 14.1548, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.431154032214717, |
|
"grad_norm": 185.96437072753906, |
|
"learning_rate": 4.894104109594466e-07, |
|
"loss": 14.2133, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.4317273753426621, |
|
"grad_norm": 177.23391723632812, |
|
"learning_rate": 4.814205114090543e-07, |
|
"loss": 14.213, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.4323007184706072, |
|
"grad_norm": 178.10658264160156, |
|
"learning_rate": 4.734930698127077e-07, |
|
"loss": 14.216, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.4328740615985523, |
|
"grad_norm": 178.41822814941406, |
|
"learning_rate": 4.6562819574727304e-07, |
|
"loss": 14.0747, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.4334474047264974, |
|
"grad_norm": 192.12301635742188, |
|
"learning_rate": 4.578259979247801e-07, |
|
"loss": 14.2543, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.4340207478544425, |
|
"grad_norm": 182.95399475097656, |
|
"learning_rate": 4.500865841909169e-07, |
|
"loss": 14.1967, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.4345940909823876, |
|
"grad_norm": 182.12098693847656, |
|
"learning_rate": 4.4241006152353885e-07, |
|
"loss": 14.233, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.4351674341103327, |
|
"grad_norm": 185.19178771972656, |
|
"learning_rate": 4.3479653603119287e-07, |
|
"loss": 14.1932, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.4357407772382778, |
|
"grad_norm": 175.16232299804688, |
|
"learning_rate": 4.2724611295164755e-07, |
|
"loss": 14.2061, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4363141203662229, |
|
"grad_norm": 171.42161560058594, |
|
"learning_rate": 4.197588966504401e-07, |
|
"loss": 14.1964, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.436887463494168, |
|
"grad_norm": 179.8773193359375, |
|
"learning_rate": 4.123349906194357e-07, |
|
"loss": 14.1541, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.4374608066221131, |
|
"grad_norm": 179.10585021972656, |
|
"learning_rate": 4.0497449747539217e-07, |
|
"loss": 14.1968, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.4380341497500582, |
|
"grad_norm": 191.01058959960938, |
|
"learning_rate": 3.9767751895854467e-07, |
|
"loss": 14.2196, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.4386074928780033, |
|
"grad_norm": 183.64254760742188, |
|
"learning_rate": 3.904441559312006e-07, |
|
"loss": 14.2129, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4391808360059484, |
|
"grad_norm": 186.27633666992188, |
|
"learning_rate": 3.8327450837634284e-07, |
|
"loss": 14.1771, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.43975417913389353, |
|
"grad_norm": 189.0173797607422, |
|
"learning_rate": 3.7616867539624733e-07, |
|
"loss": 14.275, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.44032752226183863, |
|
"grad_norm": 187.9246368408203, |
|
"learning_rate": 3.691267552111183e-07, |
|
"loss": 14.2115, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.44090086538978374, |
|
"grad_norm": 185.96083068847656, |
|
"learning_rate": 3.621488451577221e-07, |
|
"loss": 14.1871, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.44147420851772884, |
|
"grad_norm": 180.14927673339844, |
|
"learning_rate": 3.552350416880507e-07, |
|
"loss": 14.1769, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44204755164567394, |
|
"grad_norm": 190.77037048339844, |
|
"learning_rate": 3.483854403679832e-07, |
|
"loss": 14.159, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.44262089477361904, |
|
"grad_norm": 179.29052734375, |
|
"learning_rate": 3.416001358759635e-07, |
|
"loss": 14.2194, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.44319423790156415, |
|
"grad_norm": 187.48687744140625, |
|
"learning_rate": 3.3487922200169944e-07, |
|
"loss": 14.2782, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.44376758102950925, |
|
"grad_norm": 175.16188049316406, |
|
"learning_rate": 3.2822279164485494e-07, |
|
"loss": 14.1779, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.44434092415745435, |
|
"grad_norm": 182.10446166992188, |
|
"learning_rate": 3.2163093681377765e-07, |
|
"loss": 14.1585, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.44491426728539946, |
|
"grad_norm": 179.84536743164062, |
|
"learning_rate": 3.151037486242181e-07, |
|
"loss": 14.1605, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.44548761041334456, |
|
"grad_norm": 179.8004608154297, |
|
"learning_rate": 3.08641317298074e-07, |
|
"loss": 14.231, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.44606095354128966, |
|
"grad_norm": 190.25631713867188, |
|
"learning_rate": 3.022437321621452e-07, |
|
"loss": 14.2661, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.44663429666923476, |
|
"grad_norm": 177.27598571777344, |
|
"learning_rate": 2.959110816468935e-07, |
|
"loss": 14.3369, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.44720763979717987, |
|
"grad_norm": 180.63668823242188, |
|
"learning_rate": 2.896434532852277e-07, |
|
"loss": 14.1925, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.44778098292512497, |
|
"grad_norm": 172.8029022216797, |
|
"learning_rate": 2.834409337112842e-07, |
|
"loss": 14.2616, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.4483543260530701, |
|
"grad_norm": 182.10931396484375, |
|
"learning_rate": 2.7730360865923954e-07, |
|
"loss": 14.2489, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.4489276691810152, |
|
"grad_norm": 182.58995056152344, |
|
"learning_rate": 2.712315629621176e-07, |
|
"loss": 14.2247, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.4495010123089603, |
|
"grad_norm": 182.17227172851562, |
|
"learning_rate": 2.6522488055062076e-07, |
|
"loss": 14.251, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.4500743554369054, |
|
"grad_norm": 179.82858276367188, |
|
"learning_rate": 2.5928364445196975e-07, |
|
"loss": 14.2028, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4506476985648505, |
|
"grad_norm": 177.07699584960938, |
|
"learning_rate": 2.534079367887549e-07, |
|
"loss": 14.1402, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.4512210416927956, |
|
"grad_norm": 174.88539123535156, |
|
"learning_rate": 2.475978387778e-07, |
|
"loss": 14.2159, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.4517943848207407, |
|
"grad_norm": 182.9810028076172, |
|
"learning_rate": 2.4185343072904376e-07, |
|
"loss": 14.2624, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.4523677279486858, |
|
"grad_norm": 180.19107055664062, |
|
"learning_rate": 2.3617479204442462e-07, |
|
"loss": 14.2149, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.4529410710766309, |
|
"grad_norm": 181.24143981933594, |
|
"learning_rate": 2.305620012167853e-07, |
|
"loss": 14.1732, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.453514414204576, |
|
"grad_norm": 174.54727172851562, |
|
"learning_rate": 2.2501513582879108e-07, |
|
"loss": 14.1911, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.4540877573325211, |
|
"grad_norm": 181.31564331054688, |
|
"learning_rate": 2.1953427255185122e-07, |
|
"loss": 14.2618, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.4546611004604662, |
|
"grad_norm": 179.88681030273438, |
|
"learning_rate": 2.1411948714506414e-07, |
|
"loss": 14.2918, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.4552344435884113, |
|
"grad_norm": 175.06451416015625, |
|
"learning_rate": 2.0877085445416889e-07, |
|
"loss": 14.1995, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.4558077867163564, |
|
"grad_norm": 181.00540161132812, |
|
"learning_rate": 2.034884484105093e-07, |
|
"loss": 14.1838, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4563811298443015, |
|
"grad_norm": 182.90286254882812, |
|
"learning_rate": 1.98272342030012e-07, |
|
"loss": 14.2421, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.4569544729722466, |
|
"grad_norm": 188.80038452148438, |
|
"learning_rate": 1.9312260741218114e-07, |
|
"loss": 14.2287, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.4575278161001917, |
|
"grad_norm": 189.58168029785156, |
|
"learning_rate": 1.8803931573909584e-07, |
|
"loss": 14.1547, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.4581011592281368, |
|
"grad_norm": 182.40635681152344, |
|
"learning_rate": 1.8302253727443041e-07, |
|
"loss": 14.1816, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.4586745023560819, |
|
"grad_norm": 177.64756774902344, |
|
"learning_rate": 1.7807234136248296e-07, |
|
"loss": 14.0972, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.45924784548402703, |
|
"grad_norm": 179.09646606445312, |
|
"learning_rate": 1.731887964272144e-07, |
|
"loss": 14.2329, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.45982118861197213, |
|
"grad_norm": 182.92236328125, |
|
"learning_rate": 1.6837196997130434e-07, |
|
"loss": 14.129, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.46039453173991723, |
|
"grad_norm": 180.4651641845703, |
|
"learning_rate": 1.6362192857521942e-07, |
|
"loss": 14.2359, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.46096787486786234, |
|
"grad_norm": 188.5583038330078, |
|
"learning_rate": 1.5893873789628812e-07, |
|
"loss": 14.3177, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.46154121799580744, |
|
"grad_norm": 179.5811767578125, |
|
"learning_rate": 1.5432246266780083e-07, |
|
"loss": 14.1691, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46211456112375254, |
|
"grad_norm": 174.0120391845703, |
|
"learning_rate": 1.4977316669810782e-07, |
|
"loss": 14.1824, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.46268790425169765, |
|
"grad_norm": 191.203369140625, |
|
"learning_rate": 1.4529091286973994e-07, |
|
"loss": 14.1955, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.46326124737964275, |
|
"grad_norm": 183.5585479736328, |
|
"learning_rate": 1.4087576313854212e-07, |
|
"loss": 14.2568, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.46383459050758785, |
|
"grad_norm": 183.38294982910156, |
|
"learning_rate": 1.365277785328123e-07, |
|
"loss": 14.1888, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.46440793363553295, |
|
"grad_norm": 178.88182067871094, |
|
"learning_rate": 1.3224701915246053e-07, |
|
"loss": 14.1905, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.46498127676347806, |
|
"grad_norm": 176.96397399902344, |
|
"learning_rate": 1.280335441681796e-07, |
|
"loss": 14.2524, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.46555461989142316, |
|
"grad_norm": 182.11790466308594, |
|
"learning_rate": 1.2388741182062348e-07, |
|
"loss": 14.161, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.46612796301936826, |
|
"grad_norm": 178.43495178222656, |
|
"learning_rate": 1.198086794196035e-07, |
|
"loss": 14.2621, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.46670130614731337, |
|
"grad_norm": 172.70196533203125, |
|
"learning_rate": 1.1579740334330014e-07, |
|
"loss": 14.1181, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.46727464927525847, |
|
"grad_norm": 187.98484802246094, |
|
"learning_rate": 1.1185363903747748e-07, |
|
"loss": 14.269, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.46784799240320357, |
|
"grad_norm": 172.6577911376953, |
|
"learning_rate": 1.0797744101472052e-07, |
|
"loss": 14.1737, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.4684213355311487, |
|
"grad_norm": 181.676025390625, |
|
"learning_rate": 1.0416886285368188e-07, |
|
"loss": 14.2495, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.4689946786590938, |
|
"grad_norm": 176.9022216796875, |
|
"learning_rate": 1.0042795719833964e-07, |
|
"loss": 14.1739, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.4695680217870389, |
|
"grad_norm": 174.6312255859375, |
|
"learning_rate": 9.675477575726954e-08, |
|
"loss": 14.2219, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.470141364914984, |
|
"grad_norm": 176.77125549316406, |
|
"learning_rate": 9.314936930293283e-08, |
|
"loss": 14.1415, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4707147080429291, |
|
"grad_norm": 180.0824432373047, |
|
"learning_rate": 8.961178767097178e-08, |
|
"loss": 14.2163, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.4712880511708742, |
|
"grad_norm": 179.52847290039062, |
|
"learning_rate": 8.614207975952083e-08, |
|
"loss": 14.2163, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.4718613942988193, |
|
"grad_norm": 175.8388671875, |
|
"learning_rate": 8.274029352853264e-08, |
|
"loss": 14.1408, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.4724347374267644, |
|
"grad_norm": 179.85772705078125, |
|
"learning_rate": 7.940647599911477e-08, |
|
"loss": 14.2558, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.4730080805547095, |
|
"grad_norm": 176.0479278564453, |
|
"learning_rate": 7.614067325287632e-08, |
|
"loss": 14.1834, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4735814236826546, |
|
"grad_norm": 179.95497131347656, |
|
"learning_rate": 7.294293043129785e-08, |
|
"loss": 14.2747, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.4741547668105997, |
|
"grad_norm": 187.21307373046875, |
|
"learning_rate": 6.981329173509909e-08, |
|
"loss": 14.235, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.4747281099385448, |
|
"grad_norm": 183.1041717529297, |
|
"learning_rate": 6.675180042363505e-08, |
|
"loss": 14.2802, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.4753014530664899, |
|
"grad_norm": 177.82183837890625, |
|
"learning_rate": 6.375849881429418e-08, |
|
"loss": 14.2127, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.475874796194435, |
|
"grad_norm": 185.0269775390625, |
|
"learning_rate": 6.083342828191453e-08, |
|
"loss": 14.1445, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4764481393223801, |
|
"grad_norm": 184.57952880859375, |
|
"learning_rate": 5.797662925821068e-08, |
|
"loss": 14.2531, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.4770214824503252, |
|
"grad_norm": 184.90017700195312, |
|
"learning_rate": 5.518814123121885e-08, |
|
"loss": 14.1998, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.4775948255782703, |
|
"grad_norm": 178.5499267578125, |
|
"learning_rate": 5.246800274474439e-08, |
|
"loss": 14.1822, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.4781681687062154, |
|
"grad_norm": 176.92861938476562, |
|
"learning_rate": 4.981625139783619e-08, |
|
"loss": 14.1861, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.4787415118341605, |
|
"grad_norm": 176.53111267089844, |
|
"learning_rate": 4.723292384426203e-08, |
|
"loss": 14.1773, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.47931485496210563, |
|
"grad_norm": 198.08750915527344, |
|
"learning_rate": 4.471805579200239e-08, |
|
"loss": 14.3216, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.47988819809005073, |
|
"grad_norm": 180.246826171875, |
|
"learning_rate": 4.227168200276077e-08, |
|
"loss": 14.0681, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.4804615412179958, |
|
"grad_norm": 181.2344970703125, |
|
"learning_rate": 3.989383629147747e-08, |
|
"loss": 14.239, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.4810348843459409, |
|
"grad_norm": 182.63856506347656, |
|
"learning_rate": 3.758455152586715e-08, |
|
"loss": 14.1785, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.481608227473886, |
|
"grad_norm": 176.11099243164062, |
|
"learning_rate": 3.534385962596143e-08, |
|
"loss": 14.1423, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4821815706018311, |
|
"grad_norm": 175.12725830078125, |
|
"learning_rate": 3.3171791563669785e-08, |
|
"loss": 14.2053, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.4827549137297762, |
|
"grad_norm": 185.15928649902344, |
|
"learning_rate": 3.10683773623488e-08, |
|
"loss": 14.146, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.4833282568577213, |
|
"grad_norm": 188.5362548828125, |
|
"learning_rate": 2.9033646096390255e-08, |
|
"loss": 14.2097, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.4839015999856664, |
|
"grad_norm": 187.72796630859375, |
|
"learning_rate": 2.706762589081646e-08, |
|
"loss": 14.1802, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.4844749431136115, |
|
"grad_norm": 177.17909240722656, |
|
"learning_rate": 2.517034392089446e-08, |
|
"loss": 14.1847, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.4850482862415566, |
|
"grad_norm": 174.41868591308594, |
|
"learning_rate": 2.3341826411756863e-08, |
|
"loss": 14.1541, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.4856216293695017, |
|
"grad_norm": 177.609130859375, |
|
"learning_rate": 2.158209863804217e-08, |
|
"loss": 14.2386, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.4861949724974468, |
|
"grad_norm": 182.3568115234375, |
|
"learning_rate": 1.9891184923544472e-08, |
|
"loss": 14.1531, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.4867683156253919, |
|
"grad_norm": 174.87728881835938, |
|
"learning_rate": 1.826910864087761e-08, |
|
"loss": 14.1399, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.487341658753337, |
|
"grad_norm": 183.8682403564453, |
|
"learning_rate": 1.6715892211150442e-08, |
|
"loss": 14.1189, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4879150018812821, |
|
"grad_norm": 176.34315490722656, |
|
"learning_rate": 1.5231557103658755e-08, |
|
"loss": 14.2468, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.4884883450092272, |
|
"grad_norm": 177.79586791992188, |
|
"learning_rate": 1.3816123835588835e-08, |
|
"loss": 14.2414, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.4890616881371723, |
|
"grad_norm": 173.83486938476562, |
|
"learning_rate": 1.2469611971731576e-08, |
|
"loss": 14.1864, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.4896350312651174, |
|
"grad_norm": 181.15512084960938, |
|
"learning_rate": 1.1192040124214931e-08, |
|
"loss": 14.1471, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.49020837439306253, |
|
"grad_norm": 185.8532257080078, |
|
"learning_rate": 9.983425952243552e-09, |
|
"loss": 14.2145, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.49078171752100763, |
|
"grad_norm": 172.852783203125, |
|
"learning_rate": 8.84378616185788e-09, |
|
"loss": 14.1675, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.49135506064895274, |
|
"grad_norm": 183.1013946533203, |
|
"learning_rate": 7.773136505700995e-09, |
|
"loss": 14.1541, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.49192840377689784, |
|
"grad_norm": 173.04444885253906, |
|
"learning_rate": 6.7714917828004545e-09, |
|
"loss": 14.1, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.49250174690484294, |
|
"grad_norm": 173.59991455078125, |
|
"learning_rate": 5.838865838366792e-09, |
|
"loss": 14.1511, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.49307509003278804, |
|
"grad_norm": 197.82601928710938, |
|
"learning_rate": 4.975271563599227e-09, |
|
"loss": 14.2182, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.49364843316073315, |
|
"grad_norm": 183.84568786621094, |
|
"learning_rate": 4.180720895508028e-09, |
|
"loss": 14.1797, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.49422177628867825, |
|
"grad_norm": 194.36610412597656, |
|
"learning_rate": 3.4552248167507576e-09, |
|
"loss": 14.237, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.49479511941662335, |
|
"grad_norm": 172.23765563964844, |
|
"learning_rate": 2.798793355478502e-09, |
|
"loss": 14.094, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.49536846254456846, |
|
"grad_norm": 185.76551818847656, |
|
"learning_rate": 2.2114355851993175e-09, |
|
"loss": 14.1855, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.49594180567251356, |
|
"grad_norm": 175.8227081298828, |
|
"learning_rate": 1.6931596246516636e-09, |
|
"loss": 14.2448, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.49651514880045866, |
|
"grad_norm": 174.75340270996094, |
|
"learning_rate": 1.24397263769227e-09, |
|
"loss": 14.103, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.49708849192840376, |
|
"grad_norm": 175.39210510253906, |
|
"learning_rate": 8.638808331973281e-10, |
|
"loss": 14.1831, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.49766183505634887, |
|
"grad_norm": 180.33180236816406, |
|
"learning_rate": 5.528894649758921e-10, |
|
"loss": 14.1561, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.49823517818429397, |
|
"grad_norm": 168.7253875732422, |
|
"learning_rate": 3.1100283169938074e-10, |
|
"loss": 14.2352, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.4988085213122391, |
|
"grad_norm": 179.21212768554688, |
|
"learning_rate": 1.3822427683884975e-10, |
|
"loss": 14.2388, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4993818644401842, |
|
"grad_norm": 181.83961486816406, |
|
"learning_rate": 3.4556188622802964e-11, |
|
"loss": 14.1703, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.4999552075681293, |
|
"grad_norm": 178.8787078857422, |
|
"learning_rate": 0.0, |
|
"loss": 14.2526, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.4999552075681293, |
|
"step": 872, |
|
"total_flos": 7.585435033523978e+18, |
|
"train_loss": 14.689219380737445, |
|
"train_runtime": 70676.4546, |
|
"train_samples_per_second": 3.948, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 872, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 7.585435033523978e+18, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|