diff --git "a/checkpoint-2000/trainer_state.json" "b/checkpoint-2000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-2000/trainer_state.json" @@ -0,0 +1,14021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.11466903649342086, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 5.7334518246710435e-05, + "grad_norm": 0.0, + "learning_rate": 1.2658227848101266e-07, + "loss": 15.7887, + "step": 1 + }, + { + "epoch": 0.00011466903649342087, + "grad_norm": 0.0, + "learning_rate": 2.5316455696202533e-07, + "loss": 15.8118, + "step": 2 + }, + { + "epoch": 0.0001720035547401313, + "grad_norm": 0.0, + "learning_rate": 3.79746835443038e-07, + "loss": 15.5044, + "step": 3 + }, + { + "epoch": 0.00022933807298684174, + "grad_norm": 0.0, + "learning_rate": 5.063291139240507e-07, + "loss": 15.726, + "step": 4 + }, + { + "epoch": 0.00028667259123355216, + "grad_norm": 0.0, + "learning_rate": 6.329113924050634e-07, + "loss": 15.5511, + "step": 5 + }, + { + "epoch": 0.0003440071094802626, + "grad_norm": 0.0, + "learning_rate": 7.59493670886076e-07, + "loss": 15.6542, + "step": 6 + }, + { + "epoch": 0.00040134162772697304, + "grad_norm": 0.0, + "learning_rate": 8.860759493670887e-07, + "loss": 15.7995, + "step": 7 + }, + { + "epoch": 0.0004586761459736835, + "grad_norm": 0.0, + "learning_rate": 1.0126582278481013e-06, + "loss": 16.2208, + "step": 8 + }, + { + "epoch": 0.0005160106642203939, + "grad_norm": 0.0, + "learning_rate": 1.139240506329114e-06, + "loss": 15.7266, + "step": 9 + }, + { + "epoch": 0.0005733451824671043, + "grad_norm": 0.0, + "learning_rate": 1.2658227848101267e-06, + "loss": 16.0773, + "step": 10 + }, + { + "epoch": 0.0006306797007138148, + "grad_norm": 0.0, + "learning_rate": 1.3924050632911392e-06, + "loss": 16.0366, + "step": 11 + }, + { + "epoch": 0.0006880142189605252, + "grad_norm": 0.0, + "learning_rate": 1.518987341772152e-06, + "loss": 15.5359, + "step": 12 + }, + { + "epoch": 0.0007453487372072356, + "grad_norm": 0.0, + "learning_rate": 1.6455696202531647e-06, + "loss": 15.7016, + "step": 13 + }, + { + "epoch": 0.0008026832554539461, + "grad_norm": 0.0, + "learning_rate": 1.7721518987341774e-06, + "loss": 16.0417, + "step": 14 + }, + { + "epoch": 0.0008600177737006565, + "grad_norm": 0.0, + "learning_rate": 1.8987341772151901e-06, + "loss": 15.7408, + "step": 15 + }, + { + "epoch": 0.000917352291947367, + "grad_norm": 0.0, + "learning_rate": 2.0253164556962026e-06, + "loss": 16.0203, + "step": 16 + }, + { + "epoch": 0.0009746868101940773, + "grad_norm": 0.0, + "learning_rate": 2.1518987341772153e-06, + "loss": 15.6151, + "step": 17 + }, + { + "epoch": 0.0010320213284407877, + "grad_norm": 0.0, + "learning_rate": 2.278481012658228e-06, + "loss": 15.7387, + "step": 18 + }, + { + "epoch": 0.0010893558466874983, + "grad_norm": 0.0, + "learning_rate": 2.4050632911392408e-06, + "loss": 15.9719, + "step": 19 + }, + { + "epoch": 0.0011466903649342086, + "grad_norm": 0.0, + "learning_rate": 2.5316455696202535e-06, + "loss": 15.6512, + "step": 20 + }, + { + "epoch": 0.0012040248831809192, + "grad_norm": 0.0, + "learning_rate": 2.6582278481012658e-06, + "loss": 15.6905, + "step": 21 + }, + { + "epoch": 0.0012613594014276295, + "grad_norm": 0.0, + "learning_rate": 2.7848101265822785e-06, + "loss": 15.8747, + "step": 22 + }, + { + "epoch": 0.0013186939196743398, + "grad_norm": 0.0, + "learning_rate": 2.9113924050632912e-06, + "loss": 15.9172, + "step": 23 + }, + { + "epoch": 0.0013760284379210504, + "grad_norm": 0.0, + "learning_rate": 3.037974683544304e-06, + "loss": 15.4097, + "step": 24 + }, + { + "epoch": 0.0014333629561677607, + "grad_norm": 0.0, + "learning_rate": 3.164556962025317e-06, + "loss": 15.9326, + "step": 25 + }, + { + "epoch": 0.0014906974744144713, + "grad_norm": 0.0, + "learning_rate": 3.2911392405063294e-06, + "loss": 15.7093, + "step": 26 + }, + { + "epoch": 0.0015480319926611816, + "grad_norm": 0.0, + "learning_rate": 3.417721518987342e-06, + "loss": 16.1732, + "step": 27 + }, + { + "epoch": 0.0016053665109078922, + "grad_norm": 0.0, + "learning_rate": 3.544303797468355e-06, + "loss": 15.9809, + "step": 28 + }, + { + "epoch": 0.0016627010291546025, + "grad_norm": 0.0, + "learning_rate": 3.6708860759493675e-06, + "loss": 15.6426, + "step": 29 + }, + { + "epoch": 0.001720035547401313, + "grad_norm": 0.0, + "learning_rate": 3.7974683544303802e-06, + "loss": 15.5717, + "step": 30 + }, + { + "epoch": 0.0017773700656480234, + "grad_norm": 0.0, + "learning_rate": 3.924050632911393e-06, + "loss": 15.6596, + "step": 31 + }, + { + "epoch": 0.001834704583894734, + "grad_norm": 0.0, + "learning_rate": 4.050632911392405e-06, + "loss": 15.4429, + "step": 32 + }, + { + "epoch": 0.0018920391021414443, + "grad_norm": 0.0, + "learning_rate": 4.177215189873418e-06, + "loss": 15.9317, + "step": 33 + }, + { + "epoch": 0.0019493736203881546, + "grad_norm": 0.0, + "learning_rate": 4.303797468354431e-06, + "loss": 15.6599, + "step": 34 + }, + { + "epoch": 0.002006708138634865, + "grad_norm": 0.0, + "learning_rate": 4.430379746835443e-06, + "loss": 15.8037, + "step": 35 + }, + { + "epoch": 0.0020640426568815755, + "grad_norm": 0.0, + "learning_rate": 4.556962025316456e-06, + "loss": 15.976, + "step": 36 + }, + { + "epoch": 0.002121377175128286, + "grad_norm": 0.0, + "learning_rate": 4.683544303797468e-06, + "loss": 15.8437, + "step": 37 + }, + { + "epoch": 0.0021787116933749966, + "grad_norm": 0.0, + "learning_rate": 4.8101265822784815e-06, + "loss": 15.8187, + "step": 38 + }, + { + "epoch": 0.002236046211621707, + "grad_norm": 0.0, + "learning_rate": 4.936708860759495e-06, + "loss": 15.8865, + "step": 39 + }, + { + "epoch": 0.0022933807298684173, + "grad_norm": 0.0, + "learning_rate": 5.063291139240507e-06, + "loss": 15.9997, + "step": 40 + }, + { + "epoch": 0.0023507152481151276, + "grad_norm": 0.0, + "learning_rate": 5.189873417721519e-06, + "loss": 16.2616, + "step": 41 + }, + { + "epoch": 0.0024080497663618384, + "grad_norm": 0.0, + "learning_rate": 5.3164556962025316e-06, + "loss": 15.7347, + "step": 42 + }, + { + "epoch": 0.0024653842846085487, + "grad_norm": 0.0, + "learning_rate": 5.443037974683545e-06, + "loss": 15.7051, + "step": 43 + }, + { + "epoch": 0.002522718802855259, + "grad_norm": 0.0, + "learning_rate": 5.569620253164557e-06, + "loss": 15.9378, + "step": 44 + }, + { + "epoch": 0.0025800533211019694, + "grad_norm": 0.0, + "learning_rate": 5.69620253164557e-06, + "loss": 15.7706, + "step": 45 + }, + { + "epoch": 0.0026373878393486797, + "grad_norm": 0.0, + "learning_rate": 5.8227848101265824e-06, + "loss": 15.4018, + "step": 46 + }, + { + "epoch": 0.0026947223575953905, + "grad_norm": 0.0, + "learning_rate": 5.949367088607595e-06, + "loss": 15.4771, + "step": 47 + }, + { + "epoch": 0.002752056875842101, + "grad_norm": 0.0, + "learning_rate": 6.075949367088608e-06, + "loss": 15.8046, + "step": 48 + }, + { + "epoch": 0.002809391394088811, + "grad_norm": 0.0, + "learning_rate": 6.20253164556962e-06, + "loss": 16.0405, + "step": 49 + }, + { + "epoch": 0.0028667259123355215, + "grad_norm": 0.0, + "learning_rate": 6.329113924050634e-06, + "loss": 16.0027, + "step": 50 + }, + { + "epoch": 0.0029240604305822322, + "grad_norm": 0.0, + "learning_rate": 6.4556962025316464e-06, + "loss": 16.0728, + "step": 51 + }, + { + "epoch": 0.0029813949488289426, + "grad_norm": 0.0, + "learning_rate": 6.582278481012659e-06, + "loss": 15.831, + "step": 52 + }, + { + "epoch": 0.003038729467075653, + "grad_norm": 0.0, + "learning_rate": 6.708860759493672e-06, + "loss": 15.4141, + "step": 53 + }, + { + "epoch": 0.0030960639853223632, + "grad_norm": 0.0, + "learning_rate": 6.835443037974684e-06, + "loss": 15.6401, + "step": 54 + }, + { + "epoch": 0.0031533985035690736, + "grad_norm": 0.0, + "learning_rate": 6.962025316455697e-06, + "loss": 15.7223, + "step": 55 + }, + { + "epoch": 0.0032107330218157843, + "grad_norm": 0.0, + "learning_rate": 7.08860759493671e-06, + "loss": 15.805, + "step": 56 + }, + { + "epoch": 0.0032680675400624947, + "grad_norm": 0.0, + "learning_rate": 7.215189873417722e-06, + "loss": 15.6997, + "step": 57 + }, + { + "epoch": 0.003325402058309205, + "grad_norm": 0.0, + "learning_rate": 7.341772151898735e-06, + "loss": 15.8021, + "step": 58 + }, + { + "epoch": 0.0033827365765559153, + "grad_norm": 0.0, + "learning_rate": 7.468354430379747e-06, + "loss": 15.6945, + "step": 59 + }, + { + "epoch": 0.003440071094802626, + "grad_norm": 0.0, + "learning_rate": 7.5949367088607605e-06, + "loss": 15.7754, + "step": 60 + }, + { + "epoch": 0.0034974056130493364, + "grad_norm": 0.0, + "learning_rate": 7.721518987341773e-06, + "loss": 15.6585, + "step": 61 + }, + { + "epoch": 0.0035547401312960468, + "grad_norm": 0.0, + "learning_rate": 7.848101265822786e-06, + "loss": 16.1158, + "step": 62 + }, + { + "epoch": 0.003612074649542757, + "grad_norm": 0.0, + "learning_rate": 7.974683544303799e-06, + "loss": 15.8004, + "step": 63 + }, + { + "epoch": 0.003669409167789468, + "grad_norm": 0.0, + "learning_rate": 8.10126582278481e-06, + "loss": 15.3772, + "step": 64 + }, + { + "epoch": 0.003726743686036178, + "grad_norm": 0.0, + "learning_rate": 8.227848101265824e-06, + "loss": 15.7289, + "step": 65 + }, + { + "epoch": 0.0037840782042828885, + "grad_norm": 0.0, + "learning_rate": 8.354430379746837e-06, + "loss": 15.7923, + "step": 66 + }, + { + "epoch": 0.003841412722529599, + "grad_norm": 0.0, + "learning_rate": 8.481012658227848e-06, + "loss": 15.5226, + "step": 67 + }, + { + "epoch": 0.003898747240776309, + "grad_norm": 0.0, + "learning_rate": 8.607594936708861e-06, + "loss": 15.7584, + "step": 68 + }, + { + "epoch": 0.00395608175902302, + "grad_norm": 0.0, + "learning_rate": 8.734177215189874e-06, + "loss": 15.9545, + "step": 69 + }, + { + "epoch": 0.00401341627726973, + "grad_norm": 0.0, + "learning_rate": 8.860759493670886e-06, + "loss": 15.9191, + "step": 70 + }, + { + "epoch": 0.004070750795516441, + "grad_norm": 0.0, + "learning_rate": 8.987341772151899e-06, + "loss": 15.7233, + "step": 71 + }, + { + "epoch": 0.004128085313763151, + "grad_norm": 0.0, + "learning_rate": 9.113924050632912e-06, + "loss": 15.5079, + "step": 72 + }, + { + "epoch": 0.004185419832009861, + "grad_norm": 0.0, + "learning_rate": 9.240506329113925e-06, + "loss": 15.7607, + "step": 73 + }, + { + "epoch": 0.004242754350256572, + "grad_norm": 0.0, + "learning_rate": 9.367088607594937e-06, + "loss": 16.0051, + "step": 74 + }, + { + "epoch": 0.004300088868503283, + "grad_norm": 0.0, + "learning_rate": 9.49367088607595e-06, + "loss": 15.9513, + "step": 75 + }, + { + "epoch": 0.004357423386749993, + "grad_norm": 0.0, + "learning_rate": 9.620253164556963e-06, + "loss": 15.9781, + "step": 76 + }, + { + "epoch": 0.0044147579049967035, + "grad_norm": 0.0, + "learning_rate": 9.746835443037975e-06, + "loss": 16.0359, + "step": 77 + }, + { + "epoch": 0.004472092423243414, + "grad_norm": 0.0, + "learning_rate": 9.87341772151899e-06, + "loss": 15.8052, + "step": 78 + }, + { + "epoch": 0.004529426941490124, + "grad_norm": 0.0, + "learning_rate": 1e-05, + "loss": 15.4073, + "step": 79 + }, + { + "epoch": 0.0045867614597368345, + "grad_norm": 0.0, + "learning_rate": 9.999996169491213e-06, + "loss": 15.8073, + "step": 80 + }, + { + "epoch": 0.004644095977983545, + "grad_norm": 0.0, + "learning_rate": 9.999984677970716e-06, + "loss": 15.9295, + "step": 81 + }, + { + "epoch": 0.004701430496230255, + "grad_norm": 0.0, + "learning_rate": 9.99996552545612e-06, + "loss": 15.5126, + "step": 82 + }, + { + "epoch": 0.0047587650144769655, + "grad_norm": 0.0, + "learning_rate": 9.999938711976769e-06, + "loss": 15.5249, + "step": 83 + }, + { + "epoch": 0.004816099532723677, + "grad_norm": 0.0, + "learning_rate": 9.999904237573746e-06, + "loss": 15.7576, + "step": 84 + }, + { + "epoch": 0.004873434050970387, + "grad_norm": 0.0, + "learning_rate": 9.999862102299874e-06, + "loss": 16.1043, + "step": 85 + }, + { + "epoch": 0.004930768569217097, + "grad_norm": 0.0, + "learning_rate": 9.999812306219712e-06, + "loss": 15.7214, + "step": 86 + }, + { + "epoch": 0.004988103087463808, + "grad_norm": 0.0, + "learning_rate": 9.999754849409559e-06, + "loss": 16.0242, + "step": 87 + }, + { + "epoch": 0.005045437605710518, + "grad_norm": 0.0, + "learning_rate": 9.99968973195745e-06, + "loss": 15.825, + "step": 88 + }, + { + "epoch": 0.005102772123957228, + "grad_norm": 0.0, + "learning_rate": 9.999616953963156e-06, + "loss": 15.7653, + "step": 89 + }, + { + "epoch": 0.005160106642203939, + "grad_norm": 0.0, + "learning_rate": 9.99953651553819e-06, + "loss": 15.7074, + "step": 90 + }, + { + "epoch": 0.005217441160450649, + "grad_norm": 0.0, + "learning_rate": 9.999448416805802e-06, + "loss": 16.1031, + "step": 91 + }, + { + "epoch": 0.005274775678697359, + "grad_norm": 0.0, + "learning_rate": 9.999352657900973e-06, + "loss": 15.7045, + "step": 92 + }, + { + "epoch": 0.005332110196944071, + "grad_norm": 0.0, + "learning_rate": 9.999249238970427e-06, + "loss": 16.0837, + "step": 93 + }, + { + "epoch": 0.005389444715190781, + "grad_norm": 0.0, + "learning_rate": 9.999138160172624e-06, + "loss": 15.6533, + "step": 94 + }, + { + "epoch": 0.005446779233437491, + "grad_norm": 0.0, + "learning_rate": 9.999019421677755e-06, + "loss": 15.8828, + "step": 95 + }, + { + "epoch": 0.005504113751684202, + "grad_norm": 0.0, + "learning_rate": 9.998893023667758e-06, + "loss": 15.9728, + "step": 96 + }, + { + "epoch": 0.005561448269930912, + "grad_norm": 0.0, + "learning_rate": 9.998758966336296e-06, + "loss": 15.9939, + "step": 97 + }, + { + "epoch": 0.005618782788177622, + "grad_norm": 0.0, + "learning_rate": 9.998617249888773e-06, + "loss": 15.896, + "step": 98 + }, + { + "epoch": 0.005676117306424333, + "grad_norm": 0.0, + "learning_rate": 9.998467874542328e-06, + "loss": 15.8819, + "step": 99 + }, + { + "epoch": 0.005733451824671043, + "grad_norm": 0.0, + "learning_rate": 9.998310840525835e-06, + "loss": 15.7407, + "step": 100 + }, + { + "epoch": 0.005790786342917753, + "grad_norm": 0.0, + "learning_rate": 9.9981461480799e-06, + "loss": 15.5921, + "step": 101 + }, + { + "epoch": 0.0058481208611644644, + "grad_norm": 0.0, + "learning_rate": 9.997973797456867e-06, + "loss": 16.1465, + "step": 102 + }, + { + "epoch": 0.005905455379411175, + "grad_norm": 0.0, + "learning_rate": 9.99779378892081e-06, + "loss": 15.5944, + "step": 103 + }, + { + "epoch": 0.005962789897657885, + "grad_norm": 0.0, + "learning_rate": 9.997606122747543e-06, + "loss": 15.6872, + "step": 104 + }, + { + "epoch": 0.0060201244159045954, + "grad_norm": 0.0, + "learning_rate": 9.997410799224604e-06, + "loss": 15.5546, + "step": 105 + }, + { + "epoch": 0.006077458934151306, + "grad_norm": 0.0, + "learning_rate": 9.997207818651273e-06, + "loss": 15.9227, + "step": 106 + }, + { + "epoch": 0.006134793452398016, + "grad_norm": 0.0, + "learning_rate": 9.996997181338554e-06, + "loss": 15.7962, + "step": 107 + }, + { + "epoch": 0.0061921279706447264, + "grad_norm": 0.0, + "learning_rate": 9.996778887609189e-06, + "loss": 15.4476, + "step": 108 + }, + { + "epoch": 0.006249462488891437, + "grad_norm": 0.0, + "learning_rate": 9.996552937797646e-06, + "loss": 15.6365, + "step": 109 + }, + { + "epoch": 0.006306797007138147, + "grad_norm": 0.0, + "learning_rate": 9.996319332250127e-06, + "loss": 15.935, + "step": 110 + }, + { + "epoch": 0.006364131525384858, + "grad_norm": 0.0, + "learning_rate": 9.996078071324562e-06, + "loss": 15.8665, + "step": 111 + }, + { + "epoch": 0.006421466043631569, + "grad_norm": 0.0, + "learning_rate": 9.995829155390613e-06, + "loss": 15.5091, + "step": 112 + }, + { + "epoch": 0.006478800561878279, + "grad_norm": 0.0, + "learning_rate": 9.99557258482967e-06, + "loss": 15.586, + "step": 113 + }, + { + "epoch": 0.006536135080124989, + "grad_norm": 0.0, + "learning_rate": 9.995308360034852e-06, + "loss": 15.6547, + "step": 114 + }, + { + "epoch": 0.0065934695983717, + "grad_norm": 0.0, + "learning_rate": 9.995036481411005e-06, + "loss": 15.3795, + "step": 115 + }, + { + "epoch": 0.00665080411661841, + "grad_norm": 0.0, + "learning_rate": 9.9947569493747e-06, + "loss": 15.7044, + "step": 116 + }, + { + "epoch": 0.00670813863486512, + "grad_norm": 0.0, + "learning_rate": 9.99446976435424e-06, + "loss": 15.8559, + "step": 117 + }, + { + "epoch": 0.006765473153111831, + "grad_norm": 0.0, + "learning_rate": 9.994174926789648e-06, + "loss": 15.6059, + "step": 118 + }, + { + "epoch": 0.006822807671358541, + "grad_norm": 0.0, + "learning_rate": 9.993872437132678e-06, + "loss": 16.1036, + "step": 119 + }, + { + "epoch": 0.006880142189605252, + "grad_norm": 0.0, + "learning_rate": 9.993562295846806e-06, + "loss": 15.5074, + "step": 120 + }, + { + "epoch": 0.0069374767078519625, + "grad_norm": 0.0, + "learning_rate": 9.993244503407227e-06, + "loss": 15.5059, + "step": 121 + }, + { + "epoch": 0.006994811226098673, + "grad_norm": 0.0, + "learning_rate": 9.99291906030087e-06, + "loss": 15.5546, + "step": 122 + }, + { + "epoch": 0.007052145744345383, + "grad_norm": 0.0, + "learning_rate": 9.992585967026374e-06, + "loss": 15.8098, + "step": 123 + }, + { + "epoch": 0.0071094802625920935, + "grad_norm": 0.0, + "learning_rate": 9.99224522409411e-06, + "loss": 15.9037, + "step": 124 + }, + { + "epoch": 0.007166814780838804, + "grad_norm": 0.0, + "learning_rate": 9.991896832026162e-06, + "loss": 16.0274, + "step": 125 + }, + { + "epoch": 0.007224149299085514, + "grad_norm": 0.0, + "learning_rate": 9.991540791356342e-06, + "loss": 15.5932, + "step": 126 + }, + { + "epoch": 0.0072814838173322245, + "grad_norm": 0.0, + "learning_rate": 9.991177102630173e-06, + "loss": 15.7162, + "step": 127 + }, + { + "epoch": 0.007338818335578936, + "grad_norm": 0.0, + "learning_rate": 9.990805766404902e-06, + "loss": 15.2172, + "step": 128 + }, + { + "epoch": 0.007396152853825646, + "grad_norm": 0.0, + "learning_rate": 9.990426783249492e-06, + "loss": 15.444, + "step": 129 + }, + { + "epoch": 0.007453487372072356, + "grad_norm": 0.0, + "learning_rate": 9.99004015374462e-06, + "loss": 15.4409, + "step": 130 + }, + { + "epoch": 0.007510821890319067, + "grad_norm": 0.0, + "learning_rate": 9.989645878482684e-06, + "loss": 15.941, + "step": 131 + }, + { + "epoch": 0.007568156408565777, + "grad_norm": 0.0, + "learning_rate": 9.989243958067791e-06, + "loss": 16.0396, + "step": 132 + }, + { + "epoch": 0.007625490926812487, + "grad_norm": 0.0, + "learning_rate": 9.988834393115768e-06, + "loss": 15.4225, + "step": 133 + }, + { + "epoch": 0.007682825445059198, + "grad_norm": 0.0, + "learning_rate": 9.988417184254148e-06, + "loss": 15.82, + "step": 134 + }, + { + "epoch": 0.007740159963305908, + "grad_norm": 0.0, + "learning_rate": 9.987992332122182e-06, + "loss": 15.7309, + "step": 135 + }, + { + "epoch": 0.007797494481552618, + "grad_norm": 0.0, + "learning_rate": 9.987559837370832e-06, + "loss": 15.7048, + "step": 136 + }, + { + "epoch": 0.00785482899979933, + "grad_norm": 0.0, + "learning_rate": 9.987119700662766e-06, + "loss": 15.3024, + "step": 137 + }, + { + "epoch": 0.00791216351804604, + "grad_norm": 0.0, + "learning_rate": 9.986671922672362e-06, + "loss": 15.7417, + "step": 138 + }, + { + "epoch": 0.00796949803629275, + "grad_norm": 0.0, + "learning_rate": 9.986216504085709e-06, + "loss": 15.8964, + "step": 139 + }, + { + "epoch": 0.00802683255453946, + "grad_norm": 0.0, + "learning_rate": 9.9857534456006e-06, + "loss": 15.7391, + "step": 140 + }, + { + "epoch": 0.008084167072786171, + "grad_norm": 0.0, + "learning_rate": 9.985282747926535e-06, + "loss": 15.558, + "step": 141 + }, + { + "epoch": 0.008141501591032881, + "grad_norm": 0.0, + "learning_rate": 9.984804411784717e-06, + "loss": 15.2734, + "step": 142 + }, + { + "epoch": 0.008198836109279592, + "grad_norm": 0.0, + "learning_rate": 9.984318437908056e-06, + "loss": 15.6669, + "step": 143 + }, + { + "epoch": 0.008256170627526302, + "grad_norm": 0.0, + "learning_rate": 9.983824827041164e-06, + "loss": 15.8501, + "step": 144 + }, + { + "epoch": 0.008313505145773012, + "grad_norm": 0.0, + "learning_rate": 9.983323579940351e-06, + "loss": 15.7229, + "step": 145 + }, + { + "epoch": 0.008370839664019723, + "grad_norm": 0.0, + "learning_rate": 9.98281469737363e-06, + "loss": 15.7334, + "step": 146 + }, + { + "epoch": 0.008428174182266433, + "grad_norm": 0.0, + "learning_rate": 9.982298180120715e-06, + "loss": 15.3766, + "step": 147 + }, + { + "epoch": 0.008485508700513143, + "grad_norm": 0.0, + "learning_rate": 9.981774028973013e-06, + "loss": 15.6116, + "step": 148 + }, + { + "epoch": 0.008542843218759854, + "grad_norm": 0.0, + "learning_rate": 9.981242244733631e-06, + "loss": 15.4852, + "step": 149 + }, + { + "epoch": 0.008600177737006566, + "grad_norm": 0.0, + "learning_rate": 9.98070282821737e-06, + "loss": 15.7986, + "step": 150 + }, + { + "epoch": 0.008657512255253276, + "grad_norm": 0.0, + "learning_rate": 9.980155780250728e-06, + "loss": 15.6175, + "step": 151 + }, + { + "epoch": 0.008714846773499986, + "grad_norm": 0.0, + "learning_rate": 9.97960110167189e-06, + "loss": 15.7265, + "step": 152 + }, + { + "epoch": 0.008772181291746697, + "grad_norm": 0.0, + "learning_rate": 9.979038793330743e-06, + "loss": 15.3184, + "step": 153 + }, + { + "epoch": 0.008829515809993407, + "grad_norm": 0.0, + "learning_rate": 9.97846885608885e-06, + "loss": 15.6656, + "step": 154 + }, + { + "epoch": 0.008886850328240117, + "grad_norm": 0.0, + "learning_rate": 9.977891290819474e-06, + "loss": 15.5521, + "step": 155 + }, + { + "epoch": 0.008944184846486828, + "grad_norm": 0.0, + "learning_rate": 9.977306098407566e-06, + "loss": 15.6549, + "step": 156 + }, + { + "epoch": 0.009001519364733538, + "grad_norm": 0.0, + "learning_rate": 9.976713279749754e-06, + "loss": 15.719, + "step": 157 + }, + { + "epoch": 0.009058853882980248, + "grad_norm": 0.0, + "learning_rate": 9.976112835754362e-06, + "loss": 15.5373, + "step": 158 + }, + { + "epoch": 0.009116188401226959, + "grad_norm": 0.0, + "learning_rate": 9.975504767341388e-06, + "loss": 15.9543, + "step": 159 + }, + { + "epoch": 0.009173522919473669, + "grad_norm": 0.0, + "learning_rate": 9.97488907544252e-06, + "loss": 15.6243, + "step": 160 + }, + { + "epoch": 0.00923085743772038, + "grad_norm": 0.0, + "learning_rate": 9.974265761001123e-06, + "loss": 15.4797, + "step": 161 + }, + { + "epoch": 0.00928819195596709, + "grad_norm": 0.0, + "learning_rate": 9.97363482497224e-06, + "loss": 15.9338, + "step": 162 + }, + { + "epoch": 0.0093455264742138, + "grad_norm": 0.0, + "learning_rate": 9.972996268322594e-06, + "loss": 15.7102, + "step": 163 + }, + { + "epoch": 0.00940286099246051, + "grad_norm": 0.0, + "learning_rate": 9.972350092030583e-06, + "loss": 15.4883, + "step": 164 + }, + { + "epoch": 0.00946019551070722, + "grad_norm": 0.0, + "learning_rate": 9.971696297086282e-06, + "loss": 15.7564, + "step": 165 + }, + { + "epoch": 0.009517530028953931, + "grad_norm": 0.0, + "learning_rate": 9.971034884491436e-06, + "loss": 15.8265, + "step": 166 + }, + { + "epoch": 0.009574864547200641, + "grad_norm": 0.0, + "learning_rate": 9.970365855259465e-06, + "loss": 15.7952, + "step": 167 + }, + { + "epoch": 0.009632199065447353, + "grad_norm": 0.0, + "learning_rate": 9.96968921041546e-06, + "loss": 15.3301, + "step": 168 + }, + { + "epoch": 0.009689533583694064, + "grad_norm": 0.0, + "learning_rate": 9.969004950996175e-06, + "loss": 15.6668, + "step": 169 + }, + { + "epoch": 0.009746868101940774, + "grad_norm": 0.0, + "learning_rate": 9.968313078050035e-06, + "loss": 15.7378, + "step": 170 + }, + { + "epoch": 0.009804202620187484, + "grad_norm": 0.0, + "learning_rate": 9.967613592637133e-06, + "loss": 15.3782, + "step": 171 + }, + { + "epoch": 0.009861537138434195, + "grad_norm": 0.0, + "learning_rate": 9.96690649582922e-06, + "loss": 15.5379, + "step": 172 + }, + { + "epoch": 0.009918871656680905, + "grad_norm": 0.0, + "learning_rate": 9.966191788709716e-06, + "loss": 15.4814, + "step": 173 + }, + { + "epoch": 0.009976206174927615, + "grad_norm": 0.0, + "learning_rate": 9.965469472373693e-06, + "loss": 15.6, + "step": 174 + }, + { + "epoch": 0.010033540693174326, + "grad_norm": 0.0, + "learning_rate": 9.964739547927892e-06, + "loss": 15.8427, + "step": 175 + }, + { + "epoch": 0.010090875211421036, + "grad_norm": 0.0, + "learning_rate": 9.964002016490698e-06, + "loss": 15.403, + "step": 176 + }, + { + "epoch": 0.010148209729667746, + "grad_norm": 0.0, + "learning_rate": 9.963256879192167e-06, + "loss": 15.7499, + "step": 177 + }, + { + "epoch": 0.010205544247914457, + "grad_norm": 0.0, + "learning_rate": 9.962504137173997e-06, + "loss": 15.9071, + "step": 178 + }, + { + "epoch": 0.010262878766161167, + "grad_norm": 0.0, + "learning_rate": 9.961743791589544e-06, + "loss": 15.852, + "step": 179 + }, + { + "epoch": 0.010320213284407877, + "grad_norm": 0.0, + "learning_rate": 9.96097584360381e-06, + "loss": 15.6302, + "step": 180 + }, + { + "epoch": 0.010377547802654588, + "grad_norm": 0.0, + "learning_rate": 9.96020029439345e-06, + "loss": 15.6271, + "step": 181 + }, + { + "epoch": 0.010434882320901298, + "grad_norm": 0.0, + "learning_rate": 9.959417145146761e-06, + "loss": 15.8202, + "step": 182 + }, + { + "epoch": 0.010492216839148008, + "grad_norm": 0.0, + "learning_rate": 9.958626397063688e-06, + "loss": 15.4802, + "step": 183 + }, + { + "epoch": 0.010549551357394719, + "grad_norm": 0.0, + "learning_rate": 9.957828051355817e-06, + "loss": 15.7718, + "step": 184 + }, + { + "epoch": 0.010606885875641429, + "grad_norm": 0.0, + "learning_rate": 9.95702210924638e-06, + "loss": 15.905, + "step": 185 + }, + { + "epoch": 0.010664220393888141, + "grad_norm": 0.0, + "learning_rate": 9.956208571970238e-06, + "loss": 15.4488, + "step": 186 + }, + { + "epoch": 0.010721554912134851, + "grad_norm": 0.0, + "learning_rate": 9.955387440773902e-06, + "loss": 15.6706, + "step": 187 + }, + { + "epoch": 0.010778889430381562, + "grad_norm": 0.0, + "learning_rate": 9.954558716915508e-06, + "loss": 15.7807, + "step": 188 + }, + { + "epoch": 0.010836223948628272, + "grad_norm": 0.0, + "learning_rate": 9.953722401664829e-06, + "loss": 15.9079, + "step": 189 + }, + { + "epoch": 0.010893558466874982, + "grad_norm": 0.0, + "learning_rate": 9.952878496303274e-06, + "loss": 15.8757, + "step": 190 + }, + { + "epoch": 0.010950892985121693, + "grad_norm": 0.0, + "learning_rate": 9.952027002123877e-06, + "loss": 15.8773, + "step": 191 + }, + { + "epoch": 0.011008227503368403, + "grad_norm": 0.0, + "learning_rate": 9.951167920431297e-06, + "loss": 15.3914, + "step": 192 + }, + { + "epoch": 0.011065562021615113, + "grad_norm": 0.0, + "learning_rate": 9.950301252541824e-06, + "loss": 15.5592, + "step": 193 + }, + { + "epoch": 0.011122896539861824, + "grad_norm": 0.0, + "learning_rate": 9.94942699978337e-06, + "loss": 15.9349, + "step": 194 + }, + { + "epoch": 0.011180231058108534, + "grad_norm": 0.0, + "learning_rate": 9.94854516349547e-06, + "loss": 15.3999, + "step": 195 + }, + { + "epoch": 0.011237565576355244, + "grad_norm": 0.0, + "learning_rate": 9.94765574502927e-06, + "loss": 15.7352, + "step": 196 + }, + { + "epoch": 0.011294900094601955, + "grad_norm": 0.0, + "learning_rate": 9.946758745747549e-06, + "loss": 16.1242, + "step": 197 + }, + { + "epoch": 0.011352234612848665, + "grad_norm": 0.0, + "learning_rate": 9.945854167024685e-06, + "loss": 15.617, + "step": 198 + }, + { + "epoch": 0.011409569131095375, + "grad_norm": 0.0, + "learning_rate": 9.944942010246681e-06, + "loss": 15.5772, + "step": 199 + }, + { + "epoch": 0.011466903649342086, + "grad_norm": 0.0, + "learning_rate": 9.944022276811147e-06, + "loss": 15.8159, + "step": 200 + }, + { + "epoch": 0.011524238167588796, + "grad_norm": 0.0, + "learning_rate": 9.943094968127298e-06, + "loss": 15.7496, + "step": 201 + }, + { + "epoch": 0.011581572685835506, + "grad_norm": 0.0, + "learning_rate": 9.942160085615963e-06, + "loss": 15.694, + "step": 202 + }, + { + "epoch": 0.011638907204082219, + "grad_norm": 0.0, + "learning_rate": 9.941217630709571e-06, + "loss": 15.2256, + "step": 203 + }, + { + "epoch": 0.011696241722328929, + "grad_norm": 0.0, + "learning_rate": 9.940267604852155e-06, + "loss": 15.7162, + "step": 204 + }, + { + "epoch": 0.01175357624057564, + "grad_norm": 0.0, + "learning_rate": 9.939310009499348e-06, + "loss": 16.0539, + "step": 205 + }, + { + "epoch": 0.01181091075882235, + "grad_norm": 0.0, + "learning_rate": 9.938344846118382e-06, + "loss": 15.3012, + "step": 206 + }, + { + "epoch": 0.01186824527706906, + "grad_norm": 0.0, + "learning_rate": 9.937372116188081e-06, + "loss": 15.5806, + "step": 207 + }, + { + "epoch": 0.01192557979531577, + "grad_norm": 0.0, + "learning_rate": 9.936391821198868e-06, + "loss": 15.522, + "step": 208 + }, + { + "epoch": 0.01198291431356248, + "grad_norm": 0.0, + "learning_rate": 9.935403962652753e-06, + "loss": 15.522, + "step": 209 + }, + { + "epoch": 0.012040248831809191, + "grad_norm": 0.0, + "learning_rate": 9.934408542063337e-06, + "loss": 16.0803, + "step": 210 + }, + { + "epoch": 0.012097583350055901, + "grad_norm": 0.0, + "learning_rate": 9.933405560955805e-06, + "loss": 15.6953, + "step": 211 + }, + { + "epoch": 0.012154917868302612, + "grad_norm": 0.0, + "learning_rate": 9.932395020866929e-06, + "loss": 15.6063, + "step": 212 + }, + { + "epoch": 0.012212252386549322, + "grad_norm": 0.0, + "learning_rate": 9.931376923345067e-06, + "loss": 15.7092, + "step": 213 + }, + { + "epoch": 0.012269586904796032, + "grad_norm": 0.0, + "learning_rate": 9.930351269950144e-06, + "loss": 15.7337, + "step": 214 + }, + { + "epoch": 0.012326921423042743, + "grad_norm": 0.0, + "learning_rate": 9.929318062253673e-06, + "loss": 15.5571, + "step": 215 + }, + { + "epoch": 0.012384255941289453, + "grad_norm": 0.0, + "learning_rate": 9.92827730183874e-06, + "loss": 16.0815, + "step": 216 + }, + { + "epoch": 0.012441590459536163, + "grad_norm": 0.0, + "learning_rate": 9.9272289903e-06, + "loss": 15.7825, + "step": 217 + }, + { + "epoch": 0.012498924977782874, + "grad_norm": 0.0, + "learning_rate": 9.92617312924368e-06, + "loss": 15.7294, + "step": 218 + }, + { + "epoch": 0.012556259496029584, + "grad_norm": 0.0, + "learning_rate": 9.925109720287574e-06, + "loss": 15.7288, + "step": 219 + }, + { + "epoch": 0.012613594014276294, + "grad_norm": 0.0, + "learning_rate": 9.924038765061042e-06, + "loss": 15.5877, + "step": 220 + }, + { + "epoch": 0.012670928532523006, + "grad_norm": 0.0, + "learning_rate": 9.922960265205001e-06, + "loss": 15.5727, + "step": 221 + }, + { + "epoch": 0.012728263050769717, + "grad_norm": 0.0, + "learning_rate": 9.921874222371939e-06, + "loss": 15.5752, + "step": 222 + }, + { + "epoch": 0.012785597569016427, + "grad_norm": 0.0, + "learning_rate": 9.92078063822589e-06, + "loss": 15.5827, + "step": 223 + }, + { + "epoch": 0.012842932087263137, + "grad_norm": 0.0, + "learning_rate": 9.919679514442449e-06, + "loss": 15.451, + "step": 224 + }, + { + "epoch": 0.012900266605509848, + "grad_norm": 0.0, + "learning_rate": 9.918570852708762e-06, + "loss": 15.7206, + "step": 225 + }, + { + "epoch": 0.012957601123756558, + "grad_norm": 0.0, + "learning_rate": 9.917454654723522e-06, + "loss": 16.032, + "step": 226 + }, + { + "epoch": 0.013014935642003268, + "grad_norm": 0.0, + "learning_rate": 9.916330922196975e-06, + "loss": 15.7988, + "step": 227 + }, + { + "epoch": 0.013072270160249979, + "grad_norm": 0.0, + "learning_rate": 9.915199656850906e-06, + "loss": 15.6503, + "step": 228 + }, + { + "epoch": 0.013129604678496689, + "grad_norm": 0.0, + "learning_rate": 9.914060860418644e-06, + "loss": 15.5872, + "step": 229 + }, + { + "epoch": 0.0131869391967434, + "grad_norm": 0.0, + "learning_rate": 9.912914534645056e-06, + "loss": 15.89, + "step": 230 + }, + { + "epoch": 0.01324427371499011, + "grad_norm": 0.0, + "learning_rate": 9.91176068128655e-06, + "loss": 15.5145, + "step": 231 + }, + { + "epoch": 0.01330160823323682, + "grad_norm": 0.0, + "learning_rate": 9.910599302111057e-06, + "loss": 15.885, + "step": 232 + }, + { + "epoch": 0.01335894275148353, + "grad_norm": 0.0, + "learning_rate": 9.909430398898053e-06, + "loss": 15.4595, + "step": 233 + }, + { + "epoch": 0.01341627726973024, + "grad_norm": 0.0, + "learning_rate": 9.908253973438533e-06, + "loss": 15.66, + "step": 234 + }, + { + "epoch": 0.013473611787976951, + "grad_norm": 0.0, + "learning_rate": 9.907070027535022e-06, + "loss": 15.8289, + "step": 235 + }, + { + "epoch": 0.013530946306223661, + "grad_norm": 0.0, + "learning_rate": 9.905878563001563e-06, + "loss": 15.5208, + "step": 236 + }, + { + "epoch": 0.013588280824470372, + "grad_norm": 0.0, + "learning_rate": 9.904679581663725e-06, + "loss": 15.7992, + "step": 237 + }, + { + "epoch": 0.013645615342717082, + "grad_norm": 0.0, + "learning_rate": 9.903473085358589e-06, + "loss": 15.7632, + "step": 238 + }, + { + "epoch": 0.013702949860963794, + "grad_norm": 0.0, + "learning_rate": 9.902259075934755e-06, + "loss": 15.5836, + "step": 239 + }, + { + "epoch": 0.013760284379210504, + "grad_norm": 0.0, + "learning_rate": 9.90103755525233e-06, + "loss": 15.3104, + "step": 240 + }, + { + "epoch": 0.013817618897457215, + "grad_norm": 0.0, + "learning_rate": 9.899808525182935e-06, + "loss": 15.7533, + "step": 241 + }, + { + "epoch": 0.013874953415703925, + "grad_norm": 0.0, + "learning_rate": 9.898571987609692e-06, + "loss": 15.4214, + "step": 242 + }, + { + "epoch": 0.013932287933950635, + "grad_norm": 0.0, + "learning_rate": 9.897327944427231e-06, + "loss": 15.9372, + "step": 243 + }, + { + "epoch": 0.013989622452197346, + "grad_norm": 0.0, + "learning_rate": 9.896076397541676e-06, + "loss": 15.4472, + "step": 244 + }, + { + "epoch": 0.014046956970444056, + "grad_norm": 0.0, + "learning_rate": 9.894817348870654e-06, + "loss": 15.308, + "step": 245 + }, + { + "epoch": 0.014104291488690766, + "grad_norm": 0.0, + "learning_rate": 9.893550800343283e-06, + "loss": 15.892, + "step": 246 + }, + { + "epoch": 0.014161626006937477, + "grad_norm": 0.0, + "learning_rate": 9.892276753900173e-06, + "loss": 15.5055, + "step": 247 + }, + { + "epoch": 0.014218960525184187, + "grad_norm": 0.0, + "learning_rate": 9.890995211493422e-06, + "loss": 15.7145, + "step": 248 + }, + { + "epoch": 0.014276295043430897, + "grad_norm": 0.0, + "learning_rate": 9.889706175086615e-06, + "loss": 15.7733, + "step": 249 + }, + { + "epoch": 0.014333629561677608, + "grad_norm": 0.0, + "learning_rate": 9.888409646654818e-06, + "loss": 15.4903, + "step": 250 + }, + { + "epoch": 0.014390964079924318, + "grad_norm": 0.0, + "learning_rate": 9.887105628184575e-06, + "loss": 15.7896, + "step": 251 + }, + { + "epoch": 0.014448298598171028, + "grad_norm": 0.0, + "learning_rate": 9.885794121673907e-06, + "loss": 15.5118, + "step": 252 + }, + { + "epoch": 0.014505633116417739, + "grad_norm": 0.0, + "learning_rate": 9.884475129132312e-06, + "loss": 15.8531, + "step": 253 + }, + { + "epoch": 0.014562967634664449, + "grad_norm": 0.0, + "learning_rate": 9.883148652580752e-06, + "loss": 15.7605, + "step": 254 + }, + { + "epoch": 0.01462030215291116, + "grad_norm": 0.0, + "learning_rate": 9.881814694051662e-06, + "loss": 15.462, + "step": 255 + }, + { + "epoch": 0.014677636671157871, + "grad_norm": 0.0, + "learning_rate": 9.880473255588937e-06, + "loss": 15.7006, + "step": 256 + }, + { + "epoch": 0.014734971189404582, + "grad_norm": 0.0, + "learning_rate": 9.879124339247931e-06, + "loss": 15.4329, + "step": 257 + }, + { + "epoch": 0.014792305707651292, + "grad_norm": 0.0, + "learning_rate": 9.877767947095462e-06, + "loss": 15.7845, + "step": 258 + }, + { + "epoch": 0.014849640225898002, + "grad_norm": 0.0, + "learning_rate": 9.876404081209796e-06, + "loss": 15.8761, + "step": 259 + }, + { + "epoch": 0.014906974744144713, + "grad_norm": 0.0, + "learning_rate": 9.875032743680656e-06, + "loss": 15.4054, + "step": 260 + }, + { + "epoch": 0.014964309262391423, + "grad_norm": 0.0, + "learning_rate": 9.873653936609207e-06, + "loss": 15.3106, + "step": 261 + }, + { + "epoch": 0.015021643780638133, + "grad_norm": 0.0, + "learning_rate": 9.872267662108064e-06, + "loss": 15.5101, + "step": 262 + }, + { + "epoch": 0.015078978298884844, + "grad_norm": 0.0, + "learning_rate": 9.870873922301281e-06, + "loss": 16.1312, + "step": 263 + }, + { + "epoch": 0.015136312817131554, + "grad_norm": 0.0, + "learning_rate": 9.869472719324351e-06, + "loss": 16.0715, + "step": 264 + }, + { + "epoch": 0.015193647335378264, + "grad_norm": 0.0, + "learning_rate": 9.868064055324204e-06, + "loss": 15.661, + "step": 265 + }, + { + "epoch": 0.015250981853624975, + "grad_norm": 0.0, + "learning_rate": 9.866647932459196e-06, + "loss": 15.7833, + "step": 266 + }, + { + "epoch": 0.015308316371871685, + "grad_norm": 0.0, + "learning_rate": 9.86522435289912e-06, + "loss": 15.845, + "step": 267 + }, + { + "epoch": 0.015365650890118395, + "grad_norm": 0.0, + "learning_rate": 9.863793318825186e-06, + "loss": 15.9774, + "step": 268 + }, + { + "epoch": 0.015422985408365106, + "grad_norm": 0.0, + "learning_rate": 9.862354832430033e-06, + "loss": 16.1399, + "step": 269 + }, + { + "epoch": 0.015480319926611816, + "grad_norm": 0.0, + "learning_rate": 9.86090889591771e-06, + "loss": 15.7273, + "step": 270 + }, + { + "epoch": 0.015537654444858526, + "grad_norm": 0.0, + "learning_rate": 9.859455511503691e-06, + "loss": 15.6735, + "step": 271 + }, + { + "epoch": 0.015594988963105237, + "grad_norm": 0.0, + "learning_rate": 9.857994681414853e-06, + "loss": 15.7031, + "step": 272 + }, + { + "epoch": 0.015652323481351947, + "grad_norm": 0.0, + "learning_rate": 9.856526407889486e-06, + "loss": 15.4029, + "step": 273 + }, + { + "epoch": 0.01570965799959866, + "grad_norm": 0.0, + "learning_rate": 9.855050693177286e-06, + "loss": 15.4254, + "step": 274 + }, + { + "epoch": 0.015766992517845368, + "grad_norm": 0.0, + "learning_rate": 9.853567539539345e-06, + "loss": 15.9419, + "step": 275 + }, + { + "epoch": 0.01582432703609208, + "grad_norm": 0.0, + "learning_rate": 9.85207694924816e-06, + "loss": 15.5074, + "step": 276 + }, + { + "epoch": 0.01588166155433879, + "grad_norm": 0.0, + "learning_rate": 9.850578924587614e-06, + "loss": 15.6744, + "step": 277 + }, + { + "epoch": 0.0159389960725855, + "grad_norm": 0.0, + "learning_rate": 9.849073467852988e-06, + "loss": 15.56, + "step": 278 + }, + { + "epoch": 0.01599633059083221, + "grad_norm": 0.0, + "learning_rate": 9.84756058135095e-06, + "loss": 15.5879, + "step": 279 + }, + { + "epoch": 0.01605366510907892, + "grad_norm": 0.0, + "learning_rate": 9.846040267399548e-06, + "loss": 15.8704, + "step": 280 + }, + { + "epoch": 0.01611099962732563, + "grad_norm": 0.0, + "learning_rate": 9.844512528328212e-06, + "loss": 15.7365, + "step": 281 + }, + { + "epoch": 0.016168334145572342, + "grad_norm": 0.0, + "learning_rate": 9.84297736647775e-06, + "loss": 15.4265, + "step": 282 + }, + { + "epoch": 0.016225668663819054, + "grad_norm": 0.0, + "learning_rate": 9.841434784200341e-06, + "loss": 15.3965, + "step": 283 + }, + { + "epoch": 0.016283003182065763, + "grad_norm": 0.0, + "learning_rate": 9.83988478385954e-06, + "loss": 15.5008, + "step": 284 + }, + { + "epoch": 0.016340337700312475, + "grad_norm": 0.0, + "learning_rate": 9.838327367830257e-06, + "loss": 15.6644, + "step": 285 + }, + { + "epoch": 0.016397672218559183, + "grad_norm": 0.0, + "learning_rate": 9.83676253849877e-06, + "loss": 15.5689, + "step": 286 + }, + { + "epoch": 0.016455006736805895, + "grad_norm": 0.0, + "learning_rate": 9.835190298262721e-06, + "loss": 15.2807, + "step": 287 + }, + { + "epoch": 0.016512341255052604, + "grad_norm": 0.0, + "learning_rate": 9.833610649531099e-06, + "loss": 15.4072, + "step": 288 + }, + { + "epoch": 0.016569675773299316, + "grad_norm": 0.0, + "learning_rate": 9.832023594724248e-06, + "loss": 15.7064, + "step": 289 + }, + { + "epoch": 0.016627010291546025, + "grad_norm": 0.0, + "learning_rate": 9.830429136273858e-06, + "loss": 15.4726, + "step": 290 + }, + { + "epoch": 0.016684344809792737, + "grad_norm": 0.0, + "learning_rate": 9.828827276622965e-06, + "loss": 15.932, + "step": 291 + }, + { + "epoch": 0.016741679328039445, + "grad_norm": 0.0, + "learning_rate": 9.827218018225944e-06, + "loss": 15.5607, + "step": 292 + }, + { + "epoch": 0.016799013846286157, + "grad_norm": 0.0, + "learning_rate": 9.825601363548507e-06, + "loss": 15.568, + "step": 293 + }, + { + "epoch": 0.016856348364532866, + "grad_norm": 0.0, + "learning_rate": 9.823977315067696e-06, + "loss": 15.6851, + "step": 294 + }, + { + "epoch": 0.016913682882779578, + "grad_norm": 0.0, + "learning_rate": 9.822345875271884e-06, + "loss": 15.5914, + "step": 295 + }, + { + "epoch": 0.016971017401026287, + "grad_norm": 0.0, + "learning_rate": 9.82070704666077e-06, + "loss": 15.9541, + "step": 296 + }, + { + "epoch": 0.017028351919273, + "grad_norm": 0.0, + "learning_rate": 9.819060831745373e-06, + "loss": 15.3636, + "step": 297 + }, + { + "epoch": 0.017085686437519707, + "grad_norm": 0.0, + "learning_rate": 9.817407233048028e-06, + "loss": 15.6332, + "step": 298 + }, + { + "epoch": 0.01714302095576642, + "grad_norm": 0.0, + "learning_rate": 9.815746253102385e-06, + "loss": 15.562, + "step": 299 + }, + { + "epoch": 0.01720035547401313, + "grad_norm": 0.0, + "learning_rate": 9.814077894453406e-06, + "loss": 15.6414, + "step": 300 + }, + { + "epoch": 0.01725768999225984, + "grad_norm": 0.0, + "learning_rate": 9.812402159657352e-06, + "loss": 15.2265, + "step": 301 + }, + { + "epoch": 0.017315024510506552, + "grad_norm": 0.0, + "learning_rate": 9.810719051281791e-06, + "loss": 15.8043, + "step": 302 + }, + { + "epoch": 0.01737235902875326, + "grad_norm": 0.0, + "learning_rate": 9.80902857190559e-06, + "loss": 15.646, + "step": 303 + }, + { + "epoch": 0.017429693546999973, + "grad_norm": 0.0, + "learning_rate": 9.807330724118906e-06, + "loss": 15.5725, + "step": 304 + }, + { + "epoch": 0.01748702806524668, + "grad_norm": 0.0, + "learning_rate": 9.805625510523184e-06, + "loss": 15.6773, + "step": 305 + }, + { + "epoch": 0.017544362583493393, + "grad_norm": 0.0, + "learning_rate": 9.803912933731163e-06, + "loss": 15.5197, + "step": 306 + }, + { + "epoch": 0.017601697101740102, + "grad_norm": 0.0, + "learning_rate": 9.802192996366859e-06, + "loss": 15.7341, + "step": 307 + }, + { + "epoch": 0.017659031619986814, + "grad_norm": 0.0, + "learning_rate": 9.800465701065562e-06, + "loss": 15.765, + "step": 308 + }, + { + "epoch": 0.017716366138233523, + "grad_norm": 0.0, + "learning_rate": 9.798731050473843e-06, + "loss": 15.3062, + "step": 309 + }, + { + "epoch": 0.017773700656480235, + "grad_norm": 0.0, + "learning_rate": 9.796989047249539e-06, + "loss": 15.6744, + "step": 310 + }, + { + "epoch": 0.017831035174726943, + "grad_norm": 0.0, + "learning_rate": 9.795239694061754e-06, + "loss": 15.4008, + "step": 311 + }, + { + "epoch": 0.017888369692973655, + "grad_norm": 0.0, + "learning_rate": 9.793482993590853e-06, + "loss": 15.5721, + "step": 312 + }, + { + "epoch": 0.017945704211220364, + "grad_norm": 0.0, + "learning_rate": 9.791718948528457e-06, + "loss": 15.8716, + "step": 313 + }, + { + "epoch": 0.018003038729467076, + "grad_norm": 0.0, + "learning_rate": 9.789947561577445e-06, + "loss": 15.5011, + "step": 314 + }, + { + "epoch": 0.018060373247713785, + "grad_norm": 0.0, + "learning_rate": 9.78816883545194e-06, + "loss": 15.3945, + "step": 315 + }, + { + "epoch": 0.018117707765960497, + "grad_norm": 0.0, + "learning_rate": 9.786382772877312e-06, + "loss": 15.6252, + "step": 316 + }, + { + "epoch": 0.018175042284207205, + "grad_norm": 0.0, + "learning_rate": 9.784589376590175e-06, + "loss": 15.1961, + "step": 317 + }, + { + "epoch": 0.018232376802453917, + "grad_norm": 0.0, + "learning_rate": 9.782788649338376e-06, + "loss": 15.7459, + "step": 318 + }, + { + "epoch": 0.01828971132070063, + "grad_norm": 0.0, + "learning_rate": 9.780980593880993e-06, + "loss": 15.658, + "step": 319 + }, + { + "epoch": 0.018347045838947338, + "grad_norm": 0.0, + "learning_rate": 9.779165212988339e-06, + "loss": 15.5801, + "step": 320 + }, + { + "epoch": 0.01840438035719405, + "grad_norm": 0.0, + "learning_rate": 9.777342509441946e-06, + "loss": 15.3656, + "step": 321 + }, + { + "epoch": 0.01846171487544076, + "grad_norm": 0.0, + "learning_rate": 9.775512486034564e-06, + "loss": 15.6884, + "step": 322 + }, + { + "epoch": 0.01851904939368747, + "grad_norm": 0.0, + "learning_rate": 9.773675145570163e-06, + "loss": 15.7698, + "step": 323 + }, + { + "epoch": 0.01857638391193418, + "grad_norm": 0.0, + "learning_rate": 9.771830490863923e-06, + "loss": 15.5413, + "step": 324 + }, + { + "epoch": 0.01863371843018089, + "grad_norm": 0.0, + "learning_rate": 9.76997852474223e-06, + "loss": 15.227, + "step": 325 + }, + { + "epoch": 0.0186910529484276, + "grad_norm": 0.0, + "learning_rate": 9.768119250042673e-06, + "loss": 16.0092, + "step": 326 + }, + { + "epoch": 0.018748387466674312, + "grad_norm": 0.0, + "learning_rate": 9.76625266961404e-06, + "loss": 15.5157, + "step": 327 + }, + { + "epoch": 0.01880572198492102, + "grad_norm": 0.0, + "learning_rate": 9.76437878631631e-06, + "loss": 15.4693, + "step": 328 + }, + { + "epoch": 0.018863056503167733, + "grad_norm": 0.0, + "learning_rate": 9.762497603020658e-06, + "loss": 15.336, + "step": 329 + }, + { + "epoch": 0.01892039102141444, + "grad_norm": 0.0, + "learning_rate": 9.760609122609434e-06, + "loss": 15.4514, + "step": 330 + }, + { + "epoch": 0.018977725539661153, + "grad_norm": 0.0, + "learning_rate": 9.758713347976179e-06, + "loss": 15.8285, + "step": 331 + }, + { + "epoch": 0.019035060057907862, + "grad_norm": 0.0, + "learning_rate": 9.756810282025602e-06, + "loss": 16.0446, + "step": 332 + }, + { + "epoch": 0.019092394576154574, + "grad_norm": 0.0, + "learning_rate": 9.754899927673588e-06, + "loss": 15.9323, + "step": 333 + }, + { + "epoch": 0.019149729094401283, + "grad_norm": 0.0, + "learning_rate": 9.752982287847193e-06, + "loss": 15.4573, + "step": 334 + }, + { + "epoch": 0.019207063612647995, + "grad_norm": 0.0, + "learning_rate": 9.751057365484625e-06, + "loss": 15.6772, + "step": 335 + }, + { + "epoch": 0.019264398130894707, + "grad_norm": 0.0, + "learning_rate": 9.74912516353526e-06, + "loss": 15.7258, + "step": 336 + }, + { + "epoch": 0.019321732649141415, + "grad_norm": 0.0, + "learning_rate": 9.747185684959626e-06, + "loss": 15.716, + "step": 337 + }, + { + "epoch": 0.019379067167388127, + "grad_norm": 0.0, + "learning_rate": 9.745238932729397e-06, + "loss": 15.6332, + "step": 338 + }, + { + "epoch": 0.019436401685634836, + "grad_norm": 0.0, + "learning_rate": 9.743284909827393e-06, + "loss": 15.0069, + "step": 339 + }, + { + "epoch": 0.019493736203881548, + "grad_norm": 0.0, + "learning_rate": 9.741323619247575e-06, + "loss": 15.6709, + "step": 340 + }, + { + "epoch": 0.019551070722128257, + "grad_norm": 0.0, + "learning_rate": 9.739355063995042e-06, + "loss": 15.6234, + "step": 341 + }, + { + "epoch": 0.01960840524037497, + "grad_norm": 0.0, + "learning_rate": 9.73737924708602e-06, + "loss": 15.7564, + "step": 342 + }, + { + "epoch": 0.019665739758621677, + "grad_norm": 0.0, + "learning_rate": 9.735396171547859e-06, + "loss": 15.582, + "step": 343 + }, + { + "epoch": 0.01972307427686839, + "grad_norm": 0.0, + "learning_rate": 9.73340584041904e-06, + "loss": 15.6952, + "step": 344 + }, + { + "epoch": 0.019780408795115098, + "grad_norm": 0.0, + "learning_rate": 9.73140825674915e-06, + "loss": 15.4585, + "step": 345 + }, + { + "epoch": 0.01983774331336181, + "grad_norm": 0.0, + "learning_rate": 9.7294034235989e-06, + "loss": 15.6929, + "step": 346 + }, + { + "epoch": 0.01989507783160852, + "grad_norm": 0.0, + "learning_rate": 9.727391344040095e-06, + "loss": 15.7319, + "step": 347 + }, + { + "epoch": 0.01995241234985523, + "grad_norm": 0.0, + "learning_rate": 9.725372021155656e-06, + "loss": 15.6427, + "step": 348 + }, + { + "epoch": 0.02000974686810194, + "grad_norm": 0.0, + "learning_rate": 9.723345458039595e-06, + "loss": 15.3811, + "step": 349 + }, + { + "epoch": 0.02006708138634865, + "grad_norm": 0.0, + "learning_rate": 9.721311657797018e-06, + "loss": 15.6416, + "step": 350 + }, + { + "epoch": 0.02012441590459536, + "grad_norm": 0.0, + "learning_rate": 9.719270623544122e-06, + "loss": 15.5834, + "step": 351 + }, + { + "epoch": 0.020181750422842072, + "grad_norm": 0.0, + "learning_rate": 9.717222358408188e-06, + "loss": 15.8382, + "step": 352 + }, + { + "epoch": 0.020239084941088784, + "grad_norm": 0.0, + "learning_rate": 9.71516686552757e-06, + "loss": 15.8291, + "step": 353 + }, + { + "epoch": 0.020296419459335493, + "grad_norm": 0.0, + "learning_rate": 9.71310414805171e-06, + "loss": 15.6789, + "step": 354 + }, + { + "epoch": 0.020353753977582205, + "grad_norm": 0.0, + "learning_rate": 9.711034209141102e-06, + "loss": 15.5076, + "step": 355 + }, + { + "epoch": 0.020411088495828913, + "grad_norm": 0.0, + "learning_rate": 9.708957051967318e-06, + "loss": 15.5615, + "step": 356 + }, + { + "epoch": 0.020468423014075626, + "grad_norm": 0.0, + "learning_rate": 9.706872679712986e-06, + "loss": 15.6241, + "step": 357 + }, + { + "epoch": 0.020525757532322334, + "grad_norm": 0.0, + "learning_rate": 9.704781095571788e-06, + "loss": 15.4126, + "step": 358 + }, + { + "epoch": 0.020583092050569046, + "grad_norm": 0.0, + "learning_rate": 9.702682302748456e-06, + "loss": 15.6394, + "step": 359 + }, + { + "epoch": 0.020640426568815755, + "grad_norm": 0.0, + "learning_rate": 9.700576304458769e-06, + "loss": 15.7437, + "step": 360 + }, + { + "epoch": 0.020697761087062467, + "grad_norm": 0.0, + "learning_rate": 9.698463103929542e-06, + "loss": 15.425, + "step": 361 + }, + { + "epoch": 0.020755095605309175, + "grad_norm": 0.0, + "learning_rate": 9.696342704398632e-06, + "loss": 15.5889, + "step": 362 + }, + { + "epoch": 0.020812430123555888, + "grad_norm": 0.0, + "learning_rate": 9.69421510911492e-06, + "loss": 15.4247, + "step": 363 + }, + { + "epoch": 0.020869764641802596, + "grad_norm": 0.0, + "learning_rate": 9.692080321338317e-06, + "loss": 15.572, + "step": 364 + }, + { + "epoch": 0.020927099160049308, + "grad_norm": 0.0, + "learning_rate": 9.689938344339751e-06, + "loss": 15.6727, + "step": 365 + }, + { + "epoch": 0.020984433678296017, + "grad_norm": 0.0, + "learning_rate": 9.687789181401166e-06, + "loss": 15.5547, + "step": 366 + }, + { + "epoch": 0.02104176819654273, + "grad_norm": 0.0, + "learning_rate": 9.685632835815519e-06, + "loss": 15.5774, + "step": 367 + }, + { + "epoch": 0.021099102714789437, + "grad_norm": 0.0, + "learning_rate": 9.683469310886769e-06, + "loss": 15.5133, + "step": 368 + }, + { + "epoch": 0.02115643723303615, + "grad_norm": 0.0, + "learning_rate": 9.681298609929875e-06, + "loss": 15.7577, + "step": 369 + }, + { + "epoch": 0.021213771751282858, + "grad_norm": 0.0, + "learning_rate": 9.679120736270796e-06, + "loss": 15.8414, + "step": 370 + }, + { + "epoch": 0.02127110626952957, + "grad_norm": 0.0, + "learning_rate": 9.676935693246475e-06, + "loss": 15.5358, + "step": 371 + }, + { + "epoch": 0.021328440787776282, + "grad_norm": 0.0, + "learning_rate": 9.674743484204844e-06, + "loss": 15.4978, + "step": 372 + }, + { + "epoch": 0.02138577530602299, + "grad_norm": 0.0, + "learning_rate": 9.672544112504813e-06, + "loss": 15.9646, + "step": 373 + }, + { + "epoch": 0.021443109824269703, + "grad_norm": 0.0, + "learning_rate": 9.670337581516268e-06, + "loss": 15.6926, + "step": 374 + }, + { + "epoch": 0.02150044434251641, + "grad_norm": 0.0, + "learning_rate": 9.668123894620062e-06, + "loss": 15.8415, + "step": 375 + }, + { + "epoch": 0.021557778860763124, + "grad_norm": 0.0, + "learning_rate": 9.665903055208013e-06, + "loss": 15.7397, + "step": 376 + }, + { + "epoch": 0.021615113379009832, + "grad_norm": 0.0, + "learning_rate": 9.663675066682903e-06, + "loss": 15.8171, + "step": 377 + }, + { + "epoch": 0.021672447897256544, + "grad_norm": 0.0, + "learning_rate": 9.66143993245846e-06, + "loss": 15.5251, + "step": 378 + }, + { + "epoch": 0.021729782415503253, + "grad_norm": 0.0, + "learning_rate": 9.659197655959364e-06, + "loss": 15.9786, + "step": 379 + }, + { + "epoch": 0.021787116933749965, + "grad_norm": 0.0, + "learning_rate": 9.656948240621244e-06, + "loss": 15.2645, + "step": 380 + }, + { + "epoch": 0.021844451451996674, + "grad_norm": 0.0, + "learning_rate": 9.654691689890656e-06, + "loss": 15.4899, + "step": 381 + }, + { + "epoch": 0.021901785970243386, + "grad_norm": 0.0, + "learning_rate": 9.6524280072251e-06, + "loss": 15.5523, + "step": 382 + }, + { + "epoch": 0.021959120488490094, + "grad_norm": 0.0, + "learning_rate": 9.650157196092995e-06, + "loss": 16.0337, + "step": 383 + }, + { + "epoch": 0.022016455006736806, + "grad_norm": 0.0, + "learning_rate": 9.647879259973687e-06, + "loss": 15.6746, + "step": 384 + }, + { + "epoch": 0.022073789524983515, + "grad_norm": 0.0, + "learning_rate": 9.645594202357438e-06, + "loss": 15.5839, + "step": 385 + }, + { + "epoch": 0.022131124043230227, + "grad_norm": 0.0, + "learning_rate": 9.643302026745423e-06, + "loss": 15.6965, + "step": 386 + }, + { + "epoch": 0.022188458561476936, + "grad_norm": 0.0, + "learning_rate": 9.641002736649718e-06, + "loss": 15.6181, + "step": 387 + }, + { + "epoch": 0.022245793079723648, + "grad_norm": 0.0, + "learning_rate": 9.638696335593304e-06, + "loss": 15.6196, + "step": 388 + }, + { + "epoch": 0.02230312759797036, + "grad_norm": 0.0, + "learning_rate": 9.636382827110059e-06, + "loss": 15.7294, + "step": 389 + }, + { + "epoch": 0.02236046211621707, + "grad_norm": 0.0, + "learning_rate": 9.634062214744749e-06, + "loss": 15.3191, + "step": 390 + }, + { + "epoch": 0.02241779663446378, + "grad_norm": 0.0, + "learning_rate": 9.63173450205302e-06, + "loss": 15.4134, + "step": 391 + }, + { + "epoch": 0.02247513115271049, + "grad_norm": 0.0, + "learning_rate": 9.629399692601406e-06, + "loss": 15.3963, + "step": 392 + }, + { + "epoch": 0.0225324656709572, + "grad_norm": 0.0, + "learning_rate": 9.62705778996731e-06, + "loss": 15.6359, + "step": 393 + }, + { + "epoch": 0.02258980018920391, + "grad_norm": 0.0, + "learning_rate": 9.624708797739002e-06, + "loss": 15.721, + "step": 394 + }, + { + "epoch": 0.02264713470745062, + "grad_norm": 0.0, + "learning_rate": 9.622352719515615e-06, + "loss": 15.2731, + "step": 395 + }, + { + "epoch": 0.02270446922569733, + "grad_norm": 0.0, + "learning_rate": 9.619989558907144e-06, + "loss": 15.5629, + "step": 396 + }, + { + "epoch": 0.022761803743944042, + "grad_norm": 0.0, + "learning_rate": 9.617619319534427e-06, + "loss": 15.9466, + "step": 397 + }, + { + "epoch": 0.02281913826219075, + "grad_norm": 0.0, + "learning_rate": 9.615242005029159e-06, + "loss": 15.4187, + "step": 398 + }, + { + "epoch": 0.022876472780437463, + "grad_norm": 0.0, + "learning_rate": 9.612857619033865e-06, + "loss": 15.7401, + "step": 399 + }, + { + "epoch": 0.02293380729868417, + "grad_norm": 0.0, + "learning_rate": 9.610466165201912e-06, + "loss": 15.6079, + "step": 400 + }, + { + "epoch": 0.022991141816930884, + "grad_norm": 0.0, + "learning_rate": 9.608067647197492e-06, + "loss": 15.53, + "step": 401 + }, + { + "epoch": 0.023048476335177592, + "grad_norm": 0.0, + "learning_rate": 9.605662068695625e-06, + "loss": 15.7122, + "step": 402 + }, + { + "epoch": 0.023105810853424304, + "grad_norm": 0.0, + "learning_rate": 9.603249433382145e-06, + "loss": 15.3243, + "step": 403 + }, + { + "epoch": 0.023163145371671013, + "grad_norm": 0.0, + "learning_rate": 9.6008297449537e-06, + "loss": 15.6436, + "step": 404 + }, + { + "epoch": 0.023220479889917725, + "grad_norm": 0.0, + "learning_rate": 9.598403007117748e-06, + "loss": 15.6834, + "step": 405 + }, + { + "epoch": 0.023277814408164437, + "grad_norm": 0.0, + "learning_rate": 9.595969223592544e-06, + "loss": 15.6098, + "step": 406 + }, + { + "epoch": 0.023335148926411146, + "grad_norm": 0.0, + "learning_rate": 9.593528398107137e-06, + "loss": 15.7385, + "step": 407 + }, + { + "epoch": 0.023392483444657858, + "grad_norm": 0.0, + "learning_rate": 9.591080534401371e-06, + "loss": 15.3407, + "step": 408 + }, + { + "epoch": 0.023449817962904566, + "grad_norm": 0.0, + "learning_rate": 9.588625636225871e-06, + "loss": 15.4649, + "step": 409 + }, + { + "epoch": 0.02350715248115128, + "grad_norm": 0.0, + "learning_rate": 9.58616370734204e-06, + "loss": 16.0059, + "step": 410 + }, + { + "epoch": 0.023564486999397987, + "grad_norm": 0.0, + "learning_rate": 9.583694751522054e-06, + "loss": 15.6544, + "step": 411 + }, + { + "epoch": 0.0236218215176447, + "grad_norm": 0.0, + "learning_rate": 9.58121877254886e-06, + "loss": 15.3054, + "step": 412 + }, + { + "epoch": 0.023679156035891408, + "grad_norm": 0.0, + "learning_rate": 9.578735774216155e-06, + "loss": 15.3782, + "step": 413 + }, + { + "epoch": 0.02373649055413812, + "grad_norm": 0.0, + "learning_rate": 9.5762457603284e-06, + "loss": 15.851, + "step": 414 + }, + { + "epoch": 0.02379382507238483, + "grad_norm": 0.0, + "learning_rate": 9.573748734700806e-06, + "loss": 15.4994, + "step": 415 + }, + { + "epoch": 0.02385115959063154, + "grad_norm": 0.0, + "learning_rate": 9.57124470115932e-06, + "loss": 15.3128, + "step": 416 + }, + { + "epoch": 0.02390849410887825, + "grad_norm": 0.0, + "learning_rate": 9.568733663540634e-06, + "loss": 15.3998, + "step": 417 + }, + { + "epoch": 0.02396582862712496, + "grad_norm": 0.0, + "learning_rate": 9.566215625692168e-06, + "loss": 15.5673, + "step": 418 + }, + { + "epoch": 0.02402316314537167, + "grad_norm": 0.0, + "learning_rate": 9.563690591472067e-06, + "loss": 15.642, + "step": 419 + }, + { + "epoch": 0.024080497663618382, + "grad_norm": 0.0, + "learning_rate": 9.561158564749202e-06, + "loss": 15.7555, + "step": 420 + }, + { + "epoch": 0.02413783218186509, + "grad_norm": 0.0, + "learning_rate": 9.558619549403148e-06, + "loss": 15.5263, + "step": 421 + }, + { + "epoch": 0.024195166700111802, + "grad_norm": 0.0, + "learning_rate": 9.556073549324195e-06, + "loss": 15.6044, + "step": 422 + }, + { + "epoch": 0.02425250121835851, + "grad_norm": 0.0, + "learning_rate": 9.553520568413335e-06, + "loss": 15.8033, + "step": 423 + }, + { + "epoch": 0.024309835736605223, + "grad_norm": 0.0, + "learning_rate": 9.550960610582251e-06, + "loss": 15.8008, + "step": 424 + }, + { + "epoch": 0.024367170254851935, + "grad_norm": 0.0, + "learning_rate": 9.548393679753321e-06, + "loss": 15.6639, + "step": 425 + }, + { + "epoch": 0.024424504773098644, + "grad_norm": 0.0, + "learning_rate": 9.545819779859607e-06, + "loss": 15.2476, + "step": 426 + }, + { + "epoch": 0.024481839291345356, + "grad_norm": 0.0, + "learning_rate": 9.543238914844844e-06, + "loss": 15.331, + "step": 427 + }, + { + "epoch": 0.024539173809592064, + "grad_norm": 0.0, + "learning_rate": 9.540651088663446e-06, + "loss": 15.4785, + "step": 428 + }, + { + "epoch": 0.024596508327838777, + "grad_norm": 0.0, + "learning_rate": 9.538056305280487e-06, + "loss": 15.6987, + "step": 429 + }, + { + "epoch": 0.024653842846085485, + "grad_norm": 0.0, + "learning_rate": 9.535454568671705e-06, + "loss": 15.2466, + "step": 430 + }, + { + "epoch": 0.024711177364332197, + "grad_norm": 0.0, + "learning_rate": 9.532845882823489e-06, + "loss": 15.8279, + "step": 431 + }, + { + "epoch": 0.024768511882578906, + "grad_norm": 0.0, + "learning_rate": 9.530230251732875e-06, + "loss": 15.8509, + "step": 432 + }, + { + "epoch": 0.024825846400825618, + "grad_norm": 0.0, + "learning_rate": 9.527607679407545e-06, + "loss": 15.6834, + "step": 433 + }, + { + "epoch": 0.024883180919072326, + "grad_norm": 0.0, + "learning_rate": 9.524978169865813e-06, + "loss": 15.2951, + "step": 434 + }, + { + "epoch": 0.02494051543731904, + "grad_norm": 0.0, + "learning_rate": 9.522341727136622e-06, + "loss": 15.8398, + "step": 435 + }, + { + "epoch": 0.024997849955565747, + "grad_norm": 0.0, + "learning_rate": 9.519698355259537e-06, + "loss": 15.735, + "step": 436 + }, + { + "epoch": 0.02505518447381246, + "grad_norm": 0.0, + "learning_rate": 9.517048058284746e-06, + "loss": 15.2163, + "step": 437 + }, + { + "epoch": 0.025112518992059168, + "grad_norm": 0.0, + "learning_rate": 9.51439084027304e-06, + "loss": 15.9837, + "step": 438 + }, + { + "epoch": 0.02516985351030588, + "grad_norm": 0.0, + "learning_rate": 9.51172670529582e-06, + "loss": 15.5941, + "step": 439 + }, + { + "epoch": 0.02522718802855259, + "grad_norm": 0.0, + "learning_rate": 9.50905565743508e-06, + "loss": 15.6597, + "step": 440 + }, + { + "epoch": 0.0252845225467993, + "grad_norm": 0.0, + "learning_rate": 9.506377700783412e-06, + "loss": 15.6818, + "step": 441 + }, + { + "epoch": 0.025341857065046013, + "grad_norm": 0.0, + "learning_rate": 9.503692839443988e-06, + "loss": 16.0401, + "step": 442 + }, + { + "epoch": 0.02539919158329272, + "grad_norm": 0.0, + "learning_rate": 9.501001077530563e-06, + "loss": 15.2495, + "step": 443 + }, + { + "epoch": 0.025456526101539433, + "grad_norm": 0.0, + "learning_rate": 9.498302419167465e-06, + "loss": 15.6474, + "step": 444 + }, + { + "epoch": 0.025513860619786142, + "grad_norm": 0.0, + "learning_rate": 9.495596868489588e-06, + "loss": 15.633, + "step": 445 + }, + { + "epoch": 0.025571195138032854, + "grad_norm": 0.0, + "learning_rate": 9.492884429642383e-06, + "loss": 15.1942, + "step": 446 + }, + { + "epoch": 0.025628529656279563, + "grad_norm": 0.0, + "learning_rate": 9.490165106781863e-06, + "loss": 15.7698, + "step": 447 + }, + { + "epoch": 0.025685864174526275, + "grad_norm": 0.0, + "learning_rate": 9.487438904074581e-06, + "loss": 15.7203, + "step": 448 + }, + { + "epoch": 0.025743198692772983, + "grad_norm": 0.0, + "learning_rate": 9.484705825697635e-06, + "loss": 15.8956, + "step": 449 + }, + { + "epoch": 0.025800533211019695, + "grad_norm": 0.0, + "learning_rate": 9.481965875838657e-06, + "loss": 15.7252, + "step": 450 + }, + { + "epoch": 0.025857867729266404, + "grad_norm": 0.0, + "learning_rate": 9.47921905869581e-06, + "loss": 15.5917, + "step": 451 + }, + { + "epoch": 0.025915202247513116, + "grad_norm": 0.0, + "learning_rate": 9.476465378477773e-06, + "loss": 15.7833, + "step": 452 + }, + { + "epoch": 0.025972536765759825, + "grad_norm": 0.0, + "learning_rate": 9.473704839403748e-06, + "loss": 15.8857, + "step": 453 + }, + { + "epoch": 0.026029871284006537, + "grad_norm": 0.0, + "learning_rate": 9.47093744570344e-06, + "loss": 15.9607, + "step": 454 + }, + { + "epoch": 0.026087205802253245, + "grad_norm": 0.0, + "learning_rate": 9.468163201617063e-06, + "loss": 15.846, + "step": 455 + }, + { + "epoch": 0.026144540320499957, + "grad_norm": 0.0, + "learning_rate": 9.465382111395319e-06, + "loss": 15.6212, + "step": 456 + }, + { + "epoch": 0.026201874838746666, + "grad_norm": 0.0, + "learning_rate": 9.462594179299408e-06, + "loss": 15.5892, + "step": 457 + }, + { + "epoch": 0.026259209356993378, + "grad_norm": 0.0, + "learning_rate": 9.459799409601006e-06, + "loss": 15.3363, + "step": 458 + }, + { + "epoch": 0.02631654387524009, + "grad_norm": 0.0, + "learning_rate": 9.456997806582272e-06, + "loss": 15.6226, + "step": 459 + }, + { + "epoch": 0.0263738783934868, + "grad_norm": 0.0, + "learning_rate": 9.45418937453583e-06, + "loss": 15.5168, + "step": 460 + }, + { + "epoch": 0.02643121291173351, + "grad_norm": 0.0, + "learning_rate": 9.45137411776477e-06, + "loss": 15.5484, + "step": 461 + }, + { + "epoch": 0.02648854742998022, + "grad_norm": 0.0, + "learning_rate": 9.44855204058264e-06, + "loss": 15.8039, + "step": 462 + }, + { + "epoch": 0.02654588194822693, + "grad_norm": 0.0, + "learning_rate": 9.445723147313434e-06, + "loss": 15.3607, + "step": 463 + }, + { + "epoch": 0.02660321646647364, + "grad_norm": 0.0, + "learning_rate": 9.442887442291593e-06, + "loss": 15.6776, + "step": 464 + }, + { + "epoch": 0.026660550984720352, + "grad_norm": 0.0, + "learning_rate": 9.440044929861995e-06, + "loss": 15.5051, + "step": 465 + }, + { + "epoch": 0.02671788550296706, + "grad_norm": 0.0, + "learning_rate": 9.437195614379947e-06, + "loss": 15.5395, + "step": 466 + }, + { + "epoch": 0.026775220021213773, + "grad_norm": 0.0, + "learning_rate": 9.43433950021118e-06, + "loss": 15.0945, + "step": 467 + }, + { + "epoch": 0.02683255453946048, + "grad_norm": 0.0, + "learning_rate": 9.431476591731842e-06, + "loss": 15.5566, + "step": 468 + }, + { + "epoch": 0.026889889057707193, + "grad_norm": 0.0, + "learning_rate": 9.428606893328493e-06, + "loss": 15.6923, + "step": 469 + }, + { + "epoch": 0.026947223575953902, + "grad_norm": 0.0, + "learning_rate": 9.425730409398094e-06, + "loss": 15.7286, + "step": 470 + }, + { + "epoch": 0.027004558094200614, + "grad_norm": 0.0, + "learning_rate": 9.422847144348002e-06, + "loss": 15.5483, + "step": 471 + }, + { + "epoch": 0.027061892612447323, + "grad_norm": 0.0, + "learning_rate": 9.41995710259597e-06, + "loss": 15.5248, + "step": 472 + }, + { + "epoch": 0.027119227130694035, + "grad_norm": 0.0, + "learning_rate": 9.417060288570126e-06, + "loss": 15.9598, + "step": 473 + }, + { + "epoch": 0.027176561648940743, + "grad_norm": 0.0, + "learning_rate": 9.414156706708978e-06, + "loss": 15.779, + "step": 474 + }, + { + "epoch": 0.027233896167187455, + "grad_norm": 0.0, + "learning_rate": 9.41124636146141e-06, + "loss": 15.7317, + "step": 475 + }, + { + "epoch": 0.027291230685434164, + "grad_norm": 0.0, + "learning_rate": 9.408329257286658e-06, + "loss": 15.4797, + "step": 476 + }, + { + "epoch": 0.027348565203680876, + "grad_norm": 0.0, + "learning_rate": 9.405405398654322e-06, + "loss": 15.865, + "step": 477 + }, + { + "epoch": 0.027405899721927588, + "grad_norm": 0.0, + "learning_rate": 9.402474790044348e-06, + "loss": 15.3192, + "step": 478 + }, + { + "epoch": 0.027463234240174297, + "grad_norm": 0.0, + "learning_rate": 9.399537435947023e-06, + "loss": 15.4034, + "step": 479 + }, + { + "epoch": 0.02752056875842101, + "grad_norm": 0.0, + "learning_rate": 9.396593340862972e-06, + "loss": 15.5196, + "step": 480 + }, + { + "epoch": 0.027577903276667717, + "grad_norm": 0.0, + "learning_rate": 9.39364250930315e-06, + "loss": 16.0287, + "step": 481 + }, + { + "epoch": 0.02763523779491443, + "grad_norm": 0.0, + "learning_rate": 9.39068494578883e-06, + "loss": 15.6026, + "step": 482 + }, + { + "epoch": 0.027692572313161138, + "grad_norm": 0.0, + "learning_rate": 9.3877206548516e-06, + "loss": 15.3239, + "step": 483 + }, + { + "epoch": 0.02774990683140785, + "grad_norm": 0.0, + "learning_rate": 9.384749641033358e-06, + "loss": 15.66, + "step": 484 + }, + { + "epoch": 0.02780724134965456, + "grad_norm": 0.0, + "learning_rate": 9.381771908886303e-06, + "loss": 15.6269, + "step": 485 + }, + { + "epoch": 0.02786457586790127, + "grad_norm": 0.0, + "learning_rate": 9.378787462972925e-06, + "loss": 15.63, + "step": 486 + }, + { + "epoch": 0.02792191038614798, + "grad_norm": 0.0, + "learning_rate": 9.375796307866003e-06, + "loss": 15.5099, + "step": 487 + }, + { + "epoch": 0.02797924490439469, + "grad_norm": 0.0, + "learning_rate": 9.372798448148597e-06, + "loss": 15.456, + "step": 488 + }, + { + "epoch": 0.0280365794226414, + "grad_norm": 0.0, + "learning_rate": 9.369793888414036e-06, + "loss": 15.8082, + "step": 489 + }, + { + "epoch": 0.028093913940888112, + "grad_norm": 0.0, + "learning_rate": 9.366782633265917e-06, + "loss": 15.6783, + "step": 490 + }, + { + "epoch": 0.02815124845913482, + "grad_norm": 0.0, + "learning_rate": 9.363764687318097e-06, + "loss": 15.4574, + "step": 491 + }, + { + "epoch": 0.028208582977381533, + "grad_norm": 0.0, + "learning_rate": 9.360740055194682e-06, + "loss": 15.6102, + "step": 492 + }, + { + "epoch": 0.02826591749562824, + "grad_norm": 0.0, + "learning_rate": 9.357708741530025e-06, + "loss": 15.3994, + "step": 493 + }, + { + "epoch": 0.028323252013874953, + "grad_norm": 0.0, + "learning_rate": 9.354670750968716e-06, + "loss": 15.3506, + "step": 494 + }, + { + "epoch": 0.028380586532121665, + "grad_norm": 0.0, + "learning_rate": 9.351626088165574e-06, + "loss": 15.783, + "step": 495 + }, + { + "epoch": 0.028437921050368374, + "grad_norm": 0.0, + "learning_rate": 9.348574757785642e-06, + "loss": 15.5606, + "step": 496 + }, + { + "epoch": 0.028495255568615086, + "grad_norm": 0.0, + "learning_rate": 9.345516764504179e-06, + "loss": 15.3805, + "step": 497 + }, + { + "epoch": 0.028552590086861795, + "grad_norm": 0.0, + "learning_rate": 9.342452113006653e-06, + "loss": 15.6996, + "step": 498 + }, + { + "epoch": 0.028609924605108507, + "grad_norm": 0.0, + "learning_rate": 9.339380807988734e-06, + "loss": 15.3441, + "step": 499 + }, + { + "epoch": 0.028667259123355215, + "grad_norm": 0.0, + "learning_rate": 9.336302854156287e-06, + "loss": 15.5148, + "step": 500 + }, + { + "epoch": 0.028724593641601927, + "grad_norm": 0.0, + "learning_rate": 9.333218256225362e-06, + "loss": 15.529, + "step": 501 + }, + { + "epoch": 0.028781928159848636, + "grad_norm": 0.0, + "learning_rate": 9.330127018922195e-06, + "loss": 15.8365, + "step": 502 + }, + { + "epoch": 0.028839262678095348, + "grad_norm": 0.0, + "learning_rate": 9.327029146983184e-06, + "loss": 15.9286, + "step": 503 + }, + { + "epoch": 0.028896597196342057, + "grad_norm": 0.0, + "learning_rate": 9.323924645154906e-06, + "loss": 15.4567, + "step": 504 + }, + { + "epoch": 0.02895393171458877, + "grad_norm": 0.0, + "learning_rate": 9.320813518194084e-06, + "loss": 15.8201, + "step": 505 + }, + { + "epoch": 0.029011266232835477, + "grad_norm": 0.0, + "learning_rate": 9.317695770867601e-06, + "loss": 15.7347, + "step": 506 + }, + { + "epoch": 0.02906860075108219, + "grad_norm": 0.0, + "learning_rate": 9.31457140795248e-06, + "loss": 15.5777, + "step": 507 + }, + { + "epoch": 0.029125935269328898, + "grad_norm": 0.0, + "learning_rate": 9.311440434235879e-06, + "loss": 15.6935, + "step": 508 + }, + { + "epoch": 0.02918326978757561, + "grad_norm": 0.0, + "learning_rate": 9.30830285451509e-06, + "loss": 15.7309, + "step": 509 + }, + { + "epoch": 0.02924060430582232, + "grad_norm": 0.0, + "learning_rate": 9.30515867359752e-06, + "loss": 15.6683, + "step": 510 + }, + { + "epoch": 0.02929793882406903, + "grad_norm": 0.0, + "learning_rate": 9.302007896300697e-06, + "loss": 15.4964, + "step": 511 + }, + { + "epoch": 0.029355273342315743, + "grad_norm": 0.0, + "learning_rate": 9.298850527452253e-06, + "loss": 15.5383, + "step": 512 + }, + { + "epoch": 0.02941260786056245, + "grad_norm": 0.0, + "learning_rate": 9.295686571889919e-06, + "loss": 15.6289, + "step": 513 + }, + { + "epoch": 0.029469942378809164, + "grad_norm": 0.0, + "learning_rate": 9.292516034461517e-06, + "loss": 15.329, + "step": 514 + }, + { + "epoch": 0.029527276897055872, + "grad_norm": 0.0, + "learning_rate": 9.289338920024958e-06, + "loss": 15.5542, + "step": 515 + }, + { + "epoch": 0.029584611415302584, + "grad_norm": 0.0, + "learning_rate": 9.286155233448226e-06, + "loss": 15.3286, + "step": 516 + }, + { + "epoch": 0.029641945933549293, + "grad_norm": 0.0, + "learning_rate": 9.28296497960938e-06, + "loss": 15.4839, + "step": 517 + }, + { + "epoch": 0.029699280451796005, + "grad_norm": 0.0, + "learning_rate": 9.279768163396535e-06, + "loss": 15.6228, + "step": 518 + }, + { + "epoch": 0.029756614970042713, + "grad_norm": 0.0, + "learning_rate": 9.276564789707865e-06, + "loss": 15.5924, + "step": 519 + }, + { + "epoch": 0.029813949488289426, + "grad_norm": 0.0, + "learning_rate": 9.273354863451589e-06, + "loss": 15.719, + "step": 520 + }, + { + "epoch": 0.029871284006536134, + "grad_norm": 0.0, + "learning_rate": 9.27013838954597e-06, + "loss": 15.6843, + "step": 521 + }, + { + "epoch": 0.029928618524782846, + "grad_norm": 0.0, + "learning_rate": 9.266915372919301e-06, + "loss": 15.5317, + "step": 522 + }, + { + "epoch": 0.029985953043029555, + "grad_norm": 0.0, + "learning_rate": 9.263685818509895e-06, + "loss": 15.5594, + "step": 523 + }, + { + "epoch": 0.030043287561276267, + "grad_norm": 0.0, + "learning_rate": 9.260449731266092e-06, + "loss": 15.8769, + "step": 524 + }, + { + "epoch": 0.030100622079522975, + "grad_norm": 0.0, + "learning_rate": 9.257207116146231e-06, + "loss": 15.1793, + "step": 525 + }, + { + "epoch": 0.030157956597769688, + "grad_norm": 0.0, + "learning_rate": 9.253957978118664e-06, + "loss": 15.6719, + "step": 526 + }, + { + "epoch": 0.030215291116016396, + "grad_norm": 0.0, + "learning_rate": 9.250702322161726e-06, + "loss": 15.5782, + "step": 527 + }, + { + "epoch": 0.030272625634263108, + "grad_norm": 0.0, + "learning_rate": 9.24744015326375e-06, + "loss": 15.6119, + "step": 528 + }, + { + "epoch": 0.030329960152509817, + "grad_norm": 0.0, + "learning_rate": 9.244171476423037e-06, + "loss": 15.4868, + "step": 529 + }, + { + "epoch": 0.03038729467075653, + "grad_norm": 0.0, + "learning_rate": 9.24089629664787e-06, + "loss": 15.7071, + "step": 530 + }, + { + "epoch": 0.03044462918900324, + "grad_norm": 0.0, + "learning_rate": 9.237614618956488e-06, + "loss": 15.7531, + "step": 531 + }, + { + "epoch": 0.03050196370724995, + "grad_norm": 0.0, + "learning_rate": 9.234326448377089e-06, + "loss": 15.436, + "step": 532 + }, + { + "epoch": 0.03055929822549666, + "grad_norm": 0.0, + "learning_rate": 9.231031789947822e-06, + "loss": 15.4103, + "step": 533 + }, + { + "epoch": 0.03061663274374337, + "grad_norm": 0.0, + "learning_rate": 9.227730648716771e-06, + "loss": 15.3176, + "step": 534 + }, + { + "epoch": 0.030673967261990082, + "grad_norm": 0.0, + "learning_rate": 9.22442302974196e-06, + "loss": 15.5685, + "step": 535 + }, + { + "epoch": 0.03073130178023679, + "grad_norm": 0.0, + "learning_rate": 9.221108938091333e-06, + "loss": 15.9073, + "step": 536 + }, + { + "epoch": 0.030788636298483503, + "grad_norm": 0.0, + "learning_rate": 9.217788378842749e-06, + "loss": 15.6944, + "step": 537 + }, + { + "epoch": 0.03084597081673021, + "grad_norm": 0.0, + "learning_rate": 9.214461357083986e-06, + "loss": 15.636, + "step": 538 + }, + { + "epoch": 0.030903305334976924, + "grad_norm": 0.0, + "learning_rate": 9.211127877912715e-06, + "loss": 15.4674, + "step": 539 + }, + { + "epoch": 0.030960639853223632, + "grad_norm": 0.0, + "learning_rate": 9.207787946436509e-06, + "loss": 16.0203, + "step": 540 + }, + { + "epoch": 0.031017974371470344, + "grad_norm": 0.0, + "learning_rate": 9.204441567772817e-06, + "loss": 15.6157, + "step": 541 + }, + { + "epoch": 0.031075308889717053, + "grad_norm": 0.0, + "learning_rate": 9.201088747048974e-06, + "loss": 15.929, + "step": 542 + }, + { + "epoch": 0.031132643407963765, + "grad_norm": 0.0, + "learning_rate": 9.197729489402185e-06, + "loss": 15.4643, + "step": 543 + }, + { + "epoch": 0.031189977926210474, + "grad_norm": 0.0, + "learning_rate": 9.194363799979517e-06, + "loss": 15.5438, + "step": 544 + }, + { + "epoch": 0.031247312444457186, + "grad_norm": 0.0, + "learning_rate": 9.19099168393789e-06, + "loss": 15.5475, + "step": 545 + }, + { + "epoch": 0.031304646962703894, + "grad_norm": 0.0, + "learning_rate": 9.18761314644407e-06, + "loss": 15.1468, + "step": 546 + }, + { + "epoch": 0.03136198148095061, + "grad_norm": 0.0, + "learning_rate": 9.184228192674667e-06, + "loss": 15.3172, + "step": 547 + }, + { + "epoch": 0.03141931599919732, + "grad_norm": 0.0, + "learning_rate": 9.180836827816118e-06, + "loss": 15.5644, + "step": 548 + }, + { + "epoch": 0.03147665051744403, + "grad_norm": 0.0, + "learning_rate": 9.177439057064684e-06, + "loss": 15.5696, + "step": 549 + }, + { + "epoch": 0.031533985035690736, + "grad_norm": 0.0, + "learning_rate": 9.17403488562644e-06, + "loss": 15.3968, + "step": 550 + }, + { + "epoch": 0.03159131955393745, + "grad_norm": 0.0, + "learning_rate": 9.170624318717274e-06, + "loss": 15.6295, + "step": 551 + }, + { + "epoch": 0.03164865407218416, + "grad_norm": 0.0, + "learning_rate": 9.167207361562863e-06, + "loss": 15.5716, + "step": 552 + }, + { + "epoch": 0.03170598859043087, + "grad_norm": 0.0, + "learning_rate": 9.163784019398686e-06, + "loss": 15.6282, + "step": 553 + }, + { + "epoch": 0.03176332310867758, + "grad_norm": 0.0, + "learning_rate": 9.160354297469994e-06, + "loss": 15.2821, + "step": 554 + }, + { + "epoch": 0.03182065762692429, + "grad_norm": 0.0, + "learning_rate": 9.156918201031823e-06, + "loss": 15.4259, + "step": 555 + }, + { + "epoch": 0.031877992145171, + "grad_norm": 0.0, + "learning_rate": 9.153475735348973e-06, + "loss": 15.3835, + "step": 556 + }, + { + "epoch": 0.03193532666341771, + "grad_norm": 0.0, + "learning_rate": 9.150026905696e-06, + "loss": 15.6208, + "step": 557 + }, + { + "epoch": 0.03199266118166442, + "grad_norm": 0.0, + "learning_rate": 9.146571717357211e-06, + "loss": 15.3262, + "step": 558 + }, + { + "epoch": 0.032049995699911134, + "grad_norm": 0.0, + "learning_rate": 9.143110175626662e-06, + "loss": 15.6206, + "step": 559 + }, + { + "epoch": 0.03210733021815784, + "grad_norm": 0.0, + "learning_rate": 9.139642285808137e-06, + "loss": 15.5763, + "step": 560 + }, + { + "epoch": 0.03216466473640455, + "grad_norm": 0.0, + "learning_rate": 9.136168053215148e-06, + "loss": 15.5814, + "step": 561 + }, + { + "epoch": 0.03222199925465126, + "grad_norm": 0.0, + "learning_rate": 9.13268748317093e-06, + "loss": 15.8486, + "step": 562 + }, + { + "epoch": 0.032279333772897975, + "grad_norm": 0.0, + "learning_rate": 9.12920058100842e-06, + "loss": 15.5351, + "step": 563 + }, + { + "epoch": 0.032336668291144684, + "grad_norm": 0.0, + "learning_rate": 9.125707352070265e-06, + "loss": 15.677, + "step": 564 + }, + { + "epoch": 0.03239400280939139, + "grad_norm": 0.0, + "learning_rate": 9.122207801708802e-06, + "loss": 15.7423, + "step": 565 + }, + { + "epoch": 0.03245133732763811, + "grad_norm": 0.0, + "learning_rate": 9.118701935286054e-06, + "loss": 15.7189, + "step": 566 + }, + { + "epoch": 0.032508671845884816, + "grad_norm": 0.0, + "learning_rate": 9.115189758173721e-06, + "loss": 15.9269, + "step": 567 + }, + { + "epoch": 0.032566006364131525, + "grad_norm": 0.0, + "learning_rate": 9.111671275753175e-06, + "loss": 15.6356, + "step": 568 + }, + { + "epoch": 0.032623340882378234, + "grad_norm": 0.0, + "learning_rate": 9.108146493415448e-06, + "loss": 15.61, + "step": 569 + }, + { + "epoch": 0.03268067540062495, + "grad_norm": 0.0, + "learning_rate": 9.10461541656122e-06, + "loss": 15.6629, + "step": 570 + }, + { + "epoch": 0.03273800991887166, + "grad_norm": 0.0, + "learning_rate": 9.101078050600823e-06, + "loss": 15.712, + "step": 571 + }, + { + "epoch": 0.032795344437118366, + "grad_norm": 0.0, + "learning_rate": 9.097534400954218e-06, + "loss": 15.4088, + "step": 572 + }, + { + "epoch": 0.032852678955365075, + "grad_norm": 0.0, + "learning_rate": 9.093984473051e-06, + "loss": 15.3972, + "step": 573 + }, + { + "epoch": 0.03291001347361179, + "grad_norm": 0.0, + "learning_rate": 9.090428272330381e-06, + "loss": 15.8337, + "step": 574 + }, + { + "epoch": 0.0329673479918585, + "grad_norm": 0.0, + "learning_rate": 9.086865804241184e-06, + "loss": 15.5395, + "step": 575 + }, + { + "epoch": 0.03302468251010521, + "grad_norm": 0.0, + "learning_rate": 9.083297074241833e-06, + "loss": 15.8856, + "step": 576 + }, + { + "epoch": 0.033082017028351916, + "grad_norm": 0.0, + "learning_rate": 9.079722087800353e-06, + "loss": 15.5518, + "step": 577 + }, + { + "epoch": 0.03313935154659863, + "grad_norm": 0.0, + "learning_rate": 9.076140850394345e-06, + "loss": 15.6313, + "step": 578 + }, + { + "epoch": 0.03319668606484534, + "grad_norm": 0.0, + "learning_rate": 9.072553367511e-06, + "loss": 15.6402, + "step": 579 + }, + { + "epoch": 0.03325402058309205, + "grad_norm": 0.0, + "learning_rate": 9.06895964464707e-06, + "loss": 15.7837, + "step": 580 + }, + { + "epoch": 0.03331135510133876, + "grad_norm": 0.0, + "learning_rate": 9.065359687308865e-06, + "loss": 15.3619, + "step": 581 + }, + { + "epoch": 0.03336868961958547, + "grad_norm": 0.0, + "learning_rate": 9.061753501012257e-06, + "loss": 15.6569, + "step": 582 + }, + { + "epoch": 0.03342602413783218, + "grad_norm": 0.0, + "learning_rate": 9.058141091282656e-06, + "loss": 15.8729, + "step": 583 + }, + { + "epoch": 0.03348335865607889, + "grad_norm": 0.0, + "learning_rate": 9.054522463655008e-06, + "loss": 15.5701, + "step": 584 + }, + { + "epoch": 0.033540693174325606, + "grad_norm": 0.0, + "learning_rate": 9.050897623673791e-06, + "loss": 15.7377, + "step": 585 + }, + { + "epoch": 0.033598027692572315, + "grad_norm": 0.0, + "learning_rate": 9.047266576892993e-06, + "loss": 15.5294, + "step": 586 + }, + { + "epoch": 0.03365536221081902, + "grad_norm": 0.0, + "learning_rate": 9.043629328876117e-06, + "loss": 15.7416, + "step": 587 + }, + { + "epoch": 0.03371269672906573, + "grad_norm": 0.0, + "learning_rate": 9.039985885196171e-06, + "loss": 15.628, + "step": 588 + }, + { + "epoch": 0.03377003124731245, + "grad_norm": 0.0, + "learning_rate": 9.036336251435647e-06, + "loss": 15.6772, + "step": 589 + }, + { + "epoch": 0.033827365765559156, + "grad_norm": 0.0, + "learning_rate": 9.032680433186531e-06, + "loss": 16.1895, + "step": 590 + }, + { + "epoch": 0.033884700283805864, + "grad_norm": 0.0, + "learning_rate": 9.029018436050278e-06, + "loss": 15.6948, + "step": 591 + }, + { + "epoch": 0.03394203480205257, + "grad_norm": 0.0, + "learning_rate": 9.025350265637816e-06, + "loss": 15.3231, + "step": 592 + }, + { + "epoch": 0.03399936932029929, + "grad_norm": 0.0, + "learning_rate": 9.021675927569525e-06, + "loss": 15.5173, + "step": 593 + }, + { + "epoch": 0.034056703838546, + "grad_norm": 0.0, + "learning_rate": 9.017995427475238e-06, + "loss": 15.3488, + "step": 594 + }, + { + "epoch": 0.034114038356792706, + "grad_norm": 0.0, + "learning_rate": 9.014308770994235e-06, + "loss": 15.8426, + "step": 595 + }, + { + "epoch": 0.034171372875039414, + "grad_norm": 0.0, + "learning_rate": 9.01061596377522e-06, + "loss": 15.4642, + "step": 596 + }, + { + "epoch": 0.03422870739328613, + "grad_norm": 0.0, + "learning_rate": 9.006917011476326e-06, + "loss": 15.4795, + "step": 597 + }, + { + "epoch": 0.03428604191153284, + "grad_norm": 0.0, + "learning_rate": 9.003211919765102e-06, + "loss": 15.8165, + "step": 598 + }, + { + "epoch": 0.03434337642977955, + "grad_norm": 0.0, + "learning_rate": 8.999500694318501e-06, + "loss": 15.5455, + "step": 599 + }, + { + "epoch": 0.03440071094802626, + "grad_norm": 0.0, + "learning_rate": 8.995783340822878e-06, + "loss": 15.7814, + "step": 600 + }, + { + "epoch": 0.03445804546627297, + "grad_norm": 0.0, + "learning_rate": 8.992059864973972e-06, + "loss": 15.9455, + "step": 601 + }, + { + "epoch": 0.03451537998451968, + "grad_norm": 0.0, + "learning_rate": 8.988330272476908e-06, + "loss": 15.2725, + "step": 602 + }, + { + "epoch": 0.03457271450276639, + "grad_norm": 0.0, + "learning_rate": 8.98459456904618e-06, + "loss": 15.4554, + "step": 603 + }, + { + "epoch": 0.034630049021013104, + "grad_norm": 0.0, + "learning_rate": 8.980852760405645e-06, + "loss": 15.5684, + "step": 604 + }, + { + "epoch": 0.03468738353925981, + "grad_norm": 0.0, + "learning_rate": 8.977104852288519e-06, + "loss": 15.7634, + "step": 605 + }, + { + "epoch": 0.03474471805750652, + "grad_norm": 0.0, + "learning_rate": 8.973350850437355e-06, + "loss": 15.6467, + "step": 606 + }, + { + "epoch": 0.03480205257575323, + "grad_norm": 0.0, + "learning_rate": 8.96959076060405e-06, + "loss": 15.7511, + "step": 607 + }, + { + "epoch": 0.034859387093999945, + "grad_norm": 0.0, + "learning_rate": 8.965824588549827e-06, + "loss": 15.5423, + "step": 608 + }, + { + "epoch": 0.034916721612246654, + "grad_norm": 0.0, + "learning_rate": 8.962052340045228e-06, + "loss": 15.4482, + "step": 609 + }, + { + "epoch": 0.03497405613049336, + "grad_norm": 0.0, + "learning_rate": 8.958274020870107e-06, + "loss": 15.5465, + "step": 610 + }, + { + "epoch": 0.03503139064874007, + "grad_norm": 0.0, + "learning_rate": 8.954489636813615e-06, + "loss": 15.5435, + "step": 611 + }, + { + "epoch": 0.03508872516698679, + "grad_norm": 0.0, + "learning_rate": 8.9506991936742e-06, + "loss": 15.6975, + "step": 612 + }, + { + "epoch": 0.035146059685233495, + "grad_norm": 0.0, + "learning_rate": 8.946902697259593e-06, + "loss": 15.788, + "step": 613 + }, + { + "epoch": 0.035203394203480204, + "grad_norm": 0.0, + "learning_rate": 8.943100153386798e-06, + "loss": 15.9397, + "step": 614 + }, + { + "epoch": 0.03526072872172691, + "grad_norm": 0.0, + "learning_rate": 8.939291567882087e-06, + "loss": 15.7003, + "step": 615 + }, + { + "epoch": 0.03531806323997363, + "grad_norm": 0.0, + "learning_rate": 8.935476946580988e-06, + "loss": 15.5504, + "step": 616 + }, + { + "epoch": 0.03537539775822034, + "grad_norm": 0.0, + "learning_rate": 8.931656295328275e-06, + "loss": 15.5768, + "step": 617 + }, + { + "epoch": 0.035432732276467045, + "grad_norm": 0.0, + "learning_rate": 8.927829619977965e-06, + "loss": 15.6244, + "step": 618 + }, + { + "epoch": 0.03549006679471376, + "grad_norm": 0.0, + "learning_rate": 8.923996926393306e-06, + "loss": 15.4595, + "step": 619 + }, + { + "epoch": 0.03554740131296047, + "grad_norm": 0.0, + "learning_rate": 8.92015822044676e-06, + "loss": 15.5153, + "step": 620 + }, + { + "epoch": 0.03560473583120718, + "grad_norm": 0.0, + "learning_rate": 8.91631350802001e-06, + "loss": 15.5014, + "step": 621 + }, + { + "epoch": 0.03566207034945389, + "grad_norm": 0.0, + "learning_rate": 8.912462795003932e-06, + "loss": 15.3184, + "step": 622 + }, + { + "epoch": 0.0357194048677006, + "grad_norm": 0.0, + "learning_rate": 8.908606087298608e-06, + "loss": 15.8297, + "step": 623 + }, + { + "epoch": 0.03577673938594731, + "grad_norm": 0.0, + "learning_rate": 8.904743390813296e-06, + "loss": 15.3076, + "step": 624 + }, + { + "epoch": 0.03583407390419402, + "grad_norm": 0.0, + "learning_rate": 8.900874711466436e-06, + "loss": 15.5906, + "step": 625 + }, + { + "epoch": 0.03589140842244073, + "grad_norm": 0.0, + "learning_rate": 8.897000055185628e-06, + "loss": 15.3853, + "step": 626 + }, + { + "epoch": 0.03594874294068744, + "grad_norm": 0.0, + "learning_rate": 8.893119427907636e-06, + "loss": 15.7627, + "step": 627 + }, + { + "epoch": 0.03600607745893415, + "grad_norm": 0.0, + "learning_rate": 8.889232835578372e-06, + "loss": 15.5701, + "step": 628 + }, + { + "epoch": 0.03606341197718086, + "grad_norm": 0.0, + "learning_rate": 8.885340284152883e-06, + "loss": 15.4473, + "step": 629 + }, + { + "epoch": 0.03612074649542757, + "grad_norm": 0.0, + "learning_rate": 8.881441779595355e-06, + "loss": 15.4852, + "step": 630 + }, + { + "epoch": 0.036178081013674285, + "grad_norm": 0.0, + "learning_rate": 8.877537327879087e-06, + "loss": 15.9832, + "step": 631 + }, + { + "epoch": 0.03623541553192099, + "grad_norm": 0.0, + "learning_rate": 8.873626934986492e-06, + "loss": 15.3101, + "step": 632 + }, + { + "epoch": 0.0362927500501677, + "grad_norm": 0.0, + "learning_rate": 8.869710606909091e-06, + "loss": 15.407, + "step": 633 + }, + { + "epoch": 0.03635008456841441, + "grad_norm": 0.0, + "learning_rate": 8.865788349647496e-06, + "loss": 15.7147, + "step": 634 + }, + { + "epoch": 0.036407419086661126, + "grad_norm": 0.0, + "learning_rate": 8.8618601692114e-06, + "loss": 15.4474, + "step": 635 + }, + { + "epoch": 0.036464753604907835, + "grad_norm": 0.0, + "learning_rate": 8.857926071619578e-06, + "loss": 15.5297, + "step": 636 + }, + { + "epoch": 0.03652208812315454, + "grad_norm": 0.0, + "learning_rate": 8.853986062899869e-06, + "loss": 15.1538, + "step": 637 + }, + { + "epoch": 0.03657942264140126, + "grad_norm": 0.0, + "learning_rate": 8.850040149089164e-06, + "loss": 15.865, + "step": 638 + }, + { + "epoch": 0.03663675715964797, + "grad_norm": 0.0, + "learning_rate": 8.846088336233407e-06, + "loss": 15.3478, + "step": 639 + }, + { + "epoch": 0.036694091677894676, + "grad_norm": 0.0, + "learning_rate": 8.842130630387583e-06, + "loss": 15.6966, + "step": 640 + }, + { + "epoch": 0.036751426196141385, + "grad_norm": 0.0, + "learning_rate": 8.838167037615699e-06, + "loss": 15.8752, + "step": 641 + }, + { + "epoch": 0.0368087607143881, + "grad_norm": 0.0, + "learning_rate": 8.834197563990789e-06, + "loss": 15.4659, + "step": 642 + }, + { + "epoch": 0.03686609523263481, + "grad_norm": 0.0, + "learning_rate": 8.83022221559489e-06, + "loss": 15.51, + "step": 643 + }, + { + "epoch": 0.03692342975088152, + "grad_norm": 0.0, + "learning_rate": 8.826240998519052e-06, + "loss": 15.7284, + "step": 644 + }, + { + "epoch": 0.036980764269128226, + "grad_norm": 0.0, + "learning_rate": 8.822253918863301e-06, + "loss": 15.3795, + "step": 645 + }, + { + "epoch": 0.03703809878737494, + "grad_norm": 0.0, + "learning_rate": 8.818260982736662e-06, + "loss": 15.6749, + "step": 646 + }, + { + "epoch": 0.03709543330562165, + "grad_norm": 0.0, + "learning_rate": 8.814262196257121e-06, + "loss": 15.7075, + "step": 647 + }, + { + "epoch": 0.03715276782386836, + "grad_norm": 0.0, + "learning_rate": 8.810257565551634e-06, + "loss": 15.3014, + "step": 648 + }, + { + "epoch": 0.03721010234211507, + "grad_norm": 0.0, + "learning_rate": 8.80624709675611e-06, + "loss": 15.6637, + "step": 649 + }, + { + "epoch": 0.03726743686036178, + "grad_norm": 0.0, + "learning_rate": 8.802230796015406e-06, + "loss": 15.8715, + "step": 650 + }, + { + "epoch": 0.03732477137860849, + "grad_norm": 0.0, + "learning_rate": 8.79820866948331e-06, + "loss": 15.6308, + "step": 651 + }, + { + "epoch": 0.0373821058968552, + "grad_norm": 0.0, + "learning_rate": 8.794180723322537e-06, + "loss": 15.8427, + "step": 652 + }, + { + "epoch": 0.037439440415101916, + "grad_norm": 0.0, + "learning_rate": 8.790146963704722e-06, + "loss": 15.6036, + "step": 653 + }, + { + "epoch": 0.037496774933348624, + "grad_norm": 0.0, + "learning_rate": 8.786107396810405e-06, + "loss": 15.7593, + "step": 654 + }, + { + "epoch": 0.03755410945159533, + "grad_norm": 0.0, + "learning_rate": 8.782062028829028e-06, + "loss": 15.302, + "step": 655 + }, + { + "epoch": 0.03761144396984204, + "grad_norm": 0.0, + "learning_rate": 8.778010865958913e-06, + "loss": 15.7448, + "step": 656 + }, + { + "epoch": 0.03766877848808876, + "grad_norm": 0.0, + "learning_rate": 8.773953914407267e-06, + "loss": 15.527, + "step": 657 + }, + { + "epoch": 0.037726113006335465, + "grad_norm": 0.0, + "learning_rate": 8.769891180390168e-06, + "loss": 15.6792, + "step": 658 + }, + { + "epoch": 0.037783447524582174, + "grad_norm": 0.0, + "learning_rate": 8.765822670132549e-06, + "loss": 15.4365, + "step": 659 + }, + { + "epoch": 0.03784078204282888, + "grad_norm": 0.0, + "learning_rate": 8.761748389868197e-06, + "loss": 15.2451, + "step": 660 + }, + { + "epoch": 0.0378981165610756, + "grad_norm": 0.0, + "learning_rate": 8.757668345839739e-06, + "loss": 15.2096, + "step": 661 + }, + { + "epoch": 0.03795545107932231, + "grad_norm": 0.0, + "learning_rate": 8.75358254429863e-06, + "loss": 15.507, + "step": 662 + }, + { + "epoch": 0.038012785597569015, + "grad_norm": 0.0, + "learning_rate": 8.749490991505153e-06, + "loss": 15.5514, + "step": 663 + }, + { + "epoch": 0.038070120115815724, + "grad_norm": 0.0, + "learning_rate": 8.745393693728395e-06, + "loss": 15.6321, + "step": 664 + }, + { + "epoch": 0.03812745463406244, + "grad_norm": 0.0, + "learning_rate": 8.741290657246255e-06, + "loss": 15.6004, + "step": 665 + }, + { + "epoch": 0.03818478915230915, + "grad_norm": 0.0, + "learning_rate": 8.737181888345419e-06, + "loss": 15.2442, + "step": 666 + }, + { + "epoch": 0.03824212367055586, + "grad_norm": 0.0, + "learning_rate": 8.733067393321354e-06, + "loss": 15.4412, + "step": 667 + }, + { + "epoch": 0.038299458188802565, + "grad_norm": 0.0, + "learning_rate": 8.728947178478308e-06, + "loss": 15.5328, + "step": 668 + }, + { + "epoch": 0.03835679270704928, + "grad_norm": 0.0, + "learning_rate": 8.724821250129286e-06, + "loss": 15.3495, + "step": 669 + }, + { + "epoch": 0.03841412722529599, + "grad_norm": 0.0, + "learning_rate": 8.72068961459605e-06, + "loss": 15.3269, + "step": 670 + }, + { + "epoch": 0.0384714617435427, + "grad_norm": 0.0, + "learning_rate": 8.716552278209106e-06, + "loss": 15.337, + "step": 671 + }, + { + "epoch": 0.038528796261789414, + "grad_norm": 0.0, + "learning_rate": 8.712409247307696e-06, + "loss": 15.5897, + "step": 672 + }, + { + "epoch": 0.03858613078003612, + "grad_norm": 0.0, + "learning_rate": 8.708260528239788e-06, + "loss": 15.5198, + "step": 673 + }, + { + "epoch": 0.03864346529828283, + "grad_norm": 0.0, + "learning_rate": 8.704106127362063e-06, + "loss": 15.4092, + "step": 674 + }, + { + "epoch": 0.03870079981652954, + "grad_norm": 0.0, + "learning_rate": 8.699946051039907e-06, + "loss": 15.6915, + "step": 675 + }, + { + "epoch": 0.038758134334776255, + "grad_norm": 0.0, + "learning_rate": 8.695780305647405e-06, + "loss": 15.4175, + "step": 676 + }, + { + "epoch": 0.038815468853022964, + "grad_norm": 0.0, + "learning_rate": 8.691608897567328e-06, + "loss": 15.4632, + "step": 677 + }, + { + "epoch": 0.03887280337126967, + "grad_norm": 0.0, + "learning_rate": 8.68743183319112e-06, + "loss": 15.816, + "step": 678 + }, + { + "epoch": 0.03893013788951638, + "grad_norm": 0.0, + "learning_rate": 8.683249118918895e-06, + "loss": 15.2258, + "step": 679 + }, + { + "epoch": 0.038987472407763096, + "grad_norm": 0.0, + "learning_rate": 8.67906076115942e-06, + "loss": 15.7341, + "step": 680 + }, + { + "epoch": 0.039044806926009805, + "grad_norm": 0.0, + "learning_rate": 8.674866766330117e-06, + "loss": 15.3757, + "step": 681 + }, + { + "epoch": 0.039102141444256514, + "grad_norm": 0.0, + "learning_rate": 8.670667140857034e-06, + "loss": 15.7797, + "step": 682 + }, + { + "epoch": 0.03915947596250322, + "grad_norm": 0.0, + "learning_rate": 8.666461891174854e-06, + "loss": 15.3798, + "step": 683 + }, + { + "epoch": 0.03921681048074994, + "grad_norm": 0.0, + "learning_rate": 8.662251023726874e-06, + "loss": 15.3629, + "step": 684 + }, + { + "epoch": 0.039274144998996646, + "grad_norm": 0.0, + "learning_rate": 8.658034544965003e-06, + "loss": 15.5817, + "step": 685 + }, + { + "epoch": 0.039331479517243355, + "grad_norm": 0.0, + "learning_rate": 8.653812461349742e-06, + "loss": 15.8776, + "step": 686 + }, + { + "epoch": 0.03938881403549006, + "grad_norm": 0.0, + "learning_rate": 8.649584779350184e-06, + "loss": 15.4464, + "step": 687 + }, + { + "epoch": 0.03944614855373678, + "grad_norm": 0.0, + "learning_rate": 8.645351505443997e-06, + "loss": 15.5939, + "step": 688 + }, + { + "epoch": 0.03950348307198349, + "grad_norm": 0.0, + "learning_rate": 8.641112646117419e-06, + "loss": 15.7406, + "step": 689 + }, + { + "epoch": 0.039560817590230196, + "grad_norm": 0.0, + "learning_rate": 8.636868207865244e-06, + "loss": 15.4987, + "step": 690 + }, + { + "epoch": 0.03961815210847691, + "grad_norm": 0.0, + "learning_rate": 8.632618197190817e-06, + "loss": 15.8416, + "step": 691 + }, + { + "epoch": 0.03967548662672362, + "grad_norm": 0.0, + "learning_rate": 8.628362620606017e-06, + "loss": 15.942, + "step": 692 + }, + { + "epoch": 0.03973282114497033, + "grad_norm": 0.0, + "learning_rate": 8.624101484631255e-06, + "loss": 15.5289, + "step": 693 + }, + { + "epoch": 0.03979015566321704, + "grad_norm": 0.0, + "learning_rate": 8.619834795795458e-06, + "loss": 15.5221, + "step": 694 + }, + { + "epoch": 0.03984749018146375, + "grad_norm": 0.0, + "learning_rate": 8.615562560636063e-06, + "loss": 15.4384, + "step": 695 + }, + { + "epoch": 0.03990482469971046, + "grad_norm": 0.0, + "learning_rate": 8.611284785699001e-06, + "loss": 15.4652, + "step": 696 + }, + { + "epoch": 0.03996215921795717, + "grad_norm": 0.0, + "learning_rate": 8.607001477538697e-06, + "loss": 15.6511, + "step": 697 + }, + { + "epoch": 0.04001949373620388, + "grad_norm": 0.0, + "learning_rate": 8.602712642718047e-06, + "loss": 15.5522, + "step": 698 + }, + { + "epoch": 0.040076828254450594, + "grad_norm": 0.0, + "learning_rate": 8.598418287808424e-06, + "loss": 15.8704, + "step": 699 + }, + { + "epoch": 0.0401341627726973, + "grad_norm": 0.0, + "learning_rate": 8.594118419389648e-06, + "loss": 15.3411, + "step": 700 + }, + { + "epoch": 0.04019149729094401, + "grad_norm": 0.0, + "learning_rate": 8.589813044049995e-06, + "loss": 15.7049, + "step": 701 + }, + { + "epoch": 0.04024883180919072, + "grad_norm": 0.0, + "learning_rate": 8.585502168386177e-06, + "loss": 15.7626, + "step": 702 + }, + { + "epoch": 0.040306166327437436, + "grad_norm": 0.0, + "learning_rate": 8.581185799003334e-06, + "loss": 15.8192, + "step": 703 + }, + { + "epoch": 0.040363500845684144, + "grad_norm": 0.0, + "learning_rate": 8.576863942515019e-06, + "loss": 15.7336, + "step": 704 + }, + { + "epoch": 0.04042083536393085, + "grad_norm": 0.0, + "learning_rate": 8.572536605543197e-06, + "loss": 15.7258, + "step": 705 + }, + { + "epoch": 0.04047816988217757, + "grad_norm": 0.0, + "learning_rate": 8.568203794718228e-06, + "loss": 15.6306, + "step": 706 + }, + { + "epoch": 0.04053550440042428, + "grad_norm": 0.0, + "learning_rate": 8.563865516678863e-06, + "loss": 15.858, + "step": 707 + }, + { + "epoch": 0.040592838918670986, + "grad_norm": 0.0, + "learning_rate": 8.559521778072225e-06, + "loss": 15.4747, + "step": 708 + }, + { + "epoch": 0.040650173436917694, + "grad_norm": 0.0, + "learning_rate": 8.555172585553804e-06, + "loss": 15.491, + "step": 709 + }, + { + "epoch": 0.04070750795516441, + "grad_norm": 0.0, + "learning_rate": 8.550817945787452e-06, + "loss": 15.6943, + "step": 710 + }, + { + "epoch": 0.04076484247341112, + "grad_norm": 0.0, + "learning_rate": 8.546457865445359e-06, + "loss": 15.9682, + "step": 711 + }, + { + "epoch": 0.04082217699165783, + "grad_norm": 0.0, + "learning_rate": 8.542092351208058e-06, + "loss": 15.6275, + "step": 712 + }, + { + "epoch": 0.040879511509904536, + "grad_norm": 0.0, + "learning_rate": 8.537721409764406e-06, + "loss": 15.8521, + "step": 713 + }, + { + "epoch": 0.04093684602815125, + "grad_norm": 0.0, + "learning_rate": 8.533345047811572e-06, + "loss": 15.4229, + "step": 714 + }, + { + "epoch": 0.04099418054639796, + "grad_norm": 0.0, + "learning_rate": 8.528963272055036e-06, + "loss": 15.3714, + "step": 715 + }, + { + "epoch": 0.04105151506464467, + "grad_norm": 0.0, + "learning_rate": 8.524576089208567e-06, + "loss": 15.2679, + "step": 716 + }, + { + "epoch": 0.04110884958289138, + "grad_norm": 0.0, + "learning_rate": 8.520183505994227e-06, + "loss": 15.6539, + "step": 717 + }, + { + "epoch": 0.04116618410113809, + "grad_norm": 0.0, + "learning_rate": 8.515785529142339e-06, + "loss": 15.5492, + "step": 718 + }, + { + "epoch": 0.0412235186193848, + "grad_norm": 0.0, + "learning_rate": 8.511382165391508e-06, + "loss": 15.3739, + "step": 719 + }, + { + "epoch": 0.04128085313763151, + "grad_norm": 0.0, + "learning_rate": 8.50697342148858e-06, + "loss": 15.6877, + "step": 720 + }, + { + "epoch": 0.04133818765587822, + "grad_norm": 0.0, + "learning_rate": 8.502559304188644e-06, + "loss": 15.6241, + "step": 721 + }, + { + "epoch": 0.041395522174124934, + "grad_norm": 0.0, + "learning_rate": 8.498139820255033e-06, + "loss": 15.3811, + "step": 722 + }, + { + "epoch": 0.04145285669237164, + "grad_norm": 0.0, + "learning_rate": 8.49371497645929e-06, + "loss": 15.1943, + "step": 723 + }, + { + "epoch": 0.04151019121061835, + "grad_norm": 0.0, + "learning_rate": 8.489284779581179e-06, + "loss": 15.5301, + "step": 724 + }, + { + "epoch": 0.041567525728865067, + "grad_norm": 0.0, + "learning_rate": 8.48484923640866e-06, + "loss": 15.8323, + "step": 725 + }, + { + "epoch": 0.041624860247111775, + "grad_norm": 0.0, + "learning_rate": 8.480408353737894e-06, + "loss": 15.6009, + "step": 726 + }, + { + "epoch": 0.041682194765358484, + "grad_norm": 0.0, + "learning_rate": 8.475962138373212e-06, + "loss": 15.5931, + "step": 727 + }, + { + "epoch": 0.04173952928360519, + "grad_norm": 0.0, + "learning_rate": 8.471510597127122e-06, + "loss": 15.7055, + "step": 728 + }, + { + "epoch": 0.04179686380185191, + "grad_norm": 0.0, + "learning_rate": 8.467053736820292e-06, + "loss": 15.3792, + "step": 729 + }, + { + "epoch": 0.041854198320098616, + "grad_norm": 0.0, + "learning_rate": 8.46259156428154e-06, + "loss": 15.7116, + "step": 730 + }, + { + "epoch": 0.041911532838345325, + "grad_norm": 0.0, + "learning_rate": 8.458124086347818e-06, + "loss": 15.0395, + "step": 731 + }, + { + "epoch": 0.041968867356592034, + "grad_norm": 0.0, + "learning_rate": 8.453651309864215e-06, + "loss": 15.159, + "step": 732 + }, + { + "epoch": 0.04202620187483875, + "grad_norm": 0.0, + "learning_rate": 8.449173241683934e-06, + "loss": 15.8408, + "step": 733 + }, + { + "epoch": 0.04208353639308546, + "grad_norm": 0.0, + "learning_rate": 8.444689888668288e-06, + "loss": 15.5556, + "step": 734 + }, + { + "epoch": 0.042140870911332166, + "grad_norm": 0.0, + "learning_rate": 8.440201257686684e-06, + "loss": 15.5377, + "step": 735 + }, + { + "epoch": 0.042198205429578875, + "grad_norm": 0.0, + "learning_rate": 8.43570735561662e-06, + "loss": 15.1917, + "step": 736 + }, + { + "epoch": 0.04225553994782559, + "grad_norm": 0.0, + "learning_rate": 8.43120818934367e-06, + "loss": 15.3535, + "step": 737 + }, + { + "epoch": 0.0423128744660723, + "grad_norm": 0.0, + "learning_rate": 8.426703765761468e-06, + "loss": 15.2347, + "step": 738 + }, + { + "epoch": 0.04237020898431901, + "grad_norm": 0.0, + "learning_rate": 8.422194091771709e-06, + "loss": 15.5976, + "step": 739 + }, + { + "epoch": 0.042427543502565716, + "grad_norm": 0.0, + "learning_rate": 8.417679174284135e-06, + "loss": 15.6626, + "step": 740 + }, + { + "epoch": 0.04248487802081243, + "grad_norm": 0.0, + "learning_rate": 8.413159020216512e-06, + "loss": 15.7098, + "step": 741 + }, + { + "epoch": 0.04254221253905914, + "grad_norm": 0.0, + "learning_rate": 8.408633636494643e-06, + "loss": 15.3637, + "step": 742 + }, + { + "epoch": 0.04259954705730585, + "grad_norm": 0.0, + "learning_rate": 8.404103030052332e-06, + "loss": 15.7865, + "step": 743 + }, + { + "epoch": 0.042656881575552565, + "grad_norm": 0.0, + "learning_rate": 8.399567207831394e-06, + "loss": 16.0023, + "step": 744 + }, + { + "epoch": 0.04271421609379927, + "grad_norm": 0.0, + "learning_rate": 8.395026176781627e-06, + "loss": 15.3886, + "step": 745 + }, + { + "epoch": 0.04277155061204598, + "grad_norm": 0.0, + "learning_rate": 8.390479943860817e-06, + "loss": 15.4875, + "step": 746 + }, + { + "epoch": 0.04282888513029269, + "grad_norm": 0.0, + "learning_rate": 8.385928516034718e-06, + "loss": 15.4501, + "step": 747 + }, + { + "epoch": 0.042886219648539406, + "grad_norm": 0.0, + "learning_rate": 8.381371900277045e-06, + "loss": 15.6047, + "step": 748 + }, + { + "epoch": 0.042943554166786115, + "grad_norm": 0.0, + "learning_rate": 8.37681010356946e-06, + "loss": 15.6225, + "step": 749 + }, + { + "epoch": 0.04300088868503282, + "grad_norm": 0.0, + "learning_rate": 8.372243132901563e-06, + "loss": 15.376, + "step": 750 + }, + { + "epoch": 0.04305822320327953, + "grad_norm": 0.0, + "learning_rate": 8.367670995270883e-06, + "loss": 15.7378, + "step": 751 + }, + { + "epoch": 0.04311555772152625, + "grad_norm": 0.0, + "learning_rate": 8.363093697682865e-06, + "loss": 15.644, + "step": 752 + }, + { + "epoch": 0.043172892239772956, + "grad_norm": 0.0, + "learning_rate": 8.358511247150861e-06, + "loss": 15.7673, + "step": 753 + }, + { + "epoch": 0.043230226758019664, + "grad_norm": 0.0, + "learning_rate": 8.353923650696119e-06, + "loss": 15.1973, + "step": 754 + }, + { + "epoch": 0.04328756127626637, + "grad_norm": 0.0, + "learning_rate": 8.349330915347766e-06, + "loss": 15.6869, + "step": 755 + }, + { + "epoch": 0.04334489579451309, + "grad_norm": 0.0, + "learning_rate": 8.344733048142814e-06, + "loss": 15.4703, + "step": 756 + }, + { + "epoch": 0.0434022303127598, + "grad_norm": 0.0, + "learning_rate": 8.340130056126126e-06, + "loss": 15.4401, + "step": 757 + }, + { + "epoch": 0.043459564831006506, + "grad_norm": 0.0, + "learning_rate": 8.335521946350424e-06, + "loss": 15.6927, + "step": 758 + }, + { + "epoch": 0.04351689934925322, + "grad_norm": 0.0, + "learning_rate": 8.33090872587627e-06, + "loss": 15.32, + "step": 759 + }, + { + "epoch": 0.04357423386749993, + "grad_norm": 0.0, + "learning_rate": 8.326290401772057e-06, + "loss": 15.8423, + "step": 760 + }, + { + "epoch": 0.04363156838574664, + "grad_norm": 0.0, + "learning_rate": 8.321666981113998e-06, + "loss": 15.8161, + "step": 761 + }, + { + "epoch": 0.04368890290399335, + "grad_norm": 0.0, + "learning_rate": 8.317038470986113e-06, + "loss": 15.6991, + "step": 762 + }, + { + "epoch": 0.04374623742224006, + "grad_norm": 0.0, + "learning_rate": 8.312404878480222e-06, + "loss": 15.5216, + "step": 763 + }, + { + "epoch": 0.04380357194048677, + "grad_norm": 0.0, + "learning_rate": 8.307766210695933e-06, + "loss": 15.4135, + "step": 764 + }, + { + "epoch": 0.04386090645873348, + "grad_norm": 0.0, + "learning_rate": 8.303122474740625e-06, + "loss": 15.435, + "step": 765 + }, + { + "epoch": 0.04391824097698019, + "grad_norm": 0.0, + "learning_rate": 8.298473677729453e-06, + "loss": 15.6051, + "step": 766 + }, + { + "epoch": 0.043975575495226904, + "grad_norm": 0.0, + "learning_rate": 8.293819826785315e-06, + "loss": 15.5189, + "step": 767 + }, + { + "epoch": 0.04403291001347361, + "grad_norm": 0.0, + "learning_rate": 8.289160929038858e-06, + "loss": 15.6029, + "step": 768 + }, + { + "epoch": 0.04409024453172032, + "grad_norm": 0.0, + "learning_rate": 8.284496991628465e-06, + "loss": 15.4684, + "step": 769 + }, + { + "epoch": 0.04414757904996703, + "grad_norm": 0.0, + "learning_rate": 8.279828021700235e-06, + "loss": 15.5391, + "step": 770 + }, + { + "epoch": 0.044204913568213745, + "grad_norm": 0.0, + "learning_rate": 8.27515402640798e-06, + "loss": 15.3327, + "step": 771 + }, + { + "epoch": 0.044262248086460454, + "grad_norm": 0.0, + "learning_rate": 8.270475012913212e-06, + "loss": 15.4333, + "step": 772 + }, + { + "epoch": 0.04431958260470716, + "grad_norm": 0.0, + "learning_rate": 8.265790988385132e-06, + "loss": 15.4344, + "step": 773 + }, + { + "epoch": 0.04437691712295387, + "grad_norm": 0.0, + "learning_rate": 8.261101960000619e-06, + "loss": 15.1989, + "step": 774 + }, + { + "epoch": 0.04443425164120059, + "grad_norm": 0.0, + "learning_rate": 8.25640793494422e-06, + "loss": 15.5202, + "step": 775 + }, + { + "epoch": 0.044491586159447295, + "grad_norm": 0.0, + "learning_rate": 8.251708920408135e-06, + "loss": 15.9113, + "step": 776 + }, + { + "epoch": 0.044548920677694004, + "grad_norm": 0.0, + "learning_rate": 8.247004923592212e-06, + "loss": 15.5743, + "step": 777 + }, + { + "epoch": 0.04460625519594072, + "grad_norm": 0.0, + "learning_rate": 8.24229595170393e-06, + "loss": 15.4708, + "step": 778 + }, + { + "epoch": 0.04466358971418743, + "grad_norm": 0.0, + "learning_rate": 8.237582011958392e-06, + "loss": 15.5196, + "step": 779 + }, + { + "epoch": 0.04472092423243414, + "grad_norm": 0.0, + "learning_rate": 8.232863111578314e-06, + "loss": 15.7244, + "step": 780 + }, + { + "epoch": 0.044778258750680845, + "grad_norm": 0.0, + "learning_rate": 8.228139257794012e-06, + "loss": 15.2314, + "step": 781 + }, + { + "epoch": 0.04483559326892756, + "grad_norm": 0.0, + "learning_rate": 8.223410457843392e-06, + "loss": 15.4909, + "step": 782 + }, + { + "epoch": 0.04489292778717427, + "grad_norm": 0.0, + "learning_rate": 8.218676718971936e-06, + "loss": 15.3051, + "step": 783 + }, + { + "epoch": 0.04495026230542098, + "grad_norm": 0.0, + "learning_rate": 8.213938048432697e-06, + "loss": 15.151, + "step": 784 + }, + { + "epoch": 0.04500759682366769, + "grad_norm": 0.0, + "learning_rate": 8.209194453486283e-06, + "loss": 15.5094, + "step": 785 + }, + { + "epoch": 0.0450649313419144, + "grad_norm": 0.0, + "learning_rate": 8.204445941400844e-06, + "loss": 15.2905, + "step": 786 + }, + { + "epoch": 0.04512226586016111, + "grad_norm": 0.0, + "learning_rate": 8.19969251945207e-06, + "loss": 15.5901, + "step": 787 + }, + { + "epoch": 0.04517960037840782, + "grad_norm": 0.0, + "learning_rate": 8.194934194923167e-06, + "loss": 15.7173, + "step": 788 + }, + { + "epoch": 0.04523693489665453, + "grad_norm": 0.0, + "learning_rate": 8.190170975104862e-06, + "loss": 15.4733, + "step": 789 + }, + { + "epoch": 0.04529426941490124, + "grad_norm": 0.0, + "learning_rate": 8.185402867295373e-06, + "loss": 15.7784, + "step": 790 + }, + { + "epoch": 0.04535160393314795, + "grad_norm": 0.0, + "learning_rate": 8.180629878800413e-06, + "loss": 15.7074, + "step": 791 + }, + { + "epoch": 0.04540893845139466, + "grad_norm": 0.0, + "learning_rate": 8.175852016933172e-06, + "loss": 15.3187, + "step": 792 + }, + { + "epoch": 0.04546627296964137, + "grad_norm": 0.0, + "learning_rate": 8.171069289014307e-06, + "loss": 15.8274, + "step": 793 + }, + { + "epoch": 0.045523607487888085, + "grad_norm": 0.0, + "learning_rate": 8.166281702371929e-06, + "loss": 15.3859, + "step": 794 + }, + { + "epoch": 0.04558094200613479, + "grad_norm": 0.0, + "learning_rate": 8.161489264341596e-06, + "loss": 15.6264, + "step": 795 + }, + { + "epoch": 0.0456382765243815, + "grad_norm": 0.0, + "learning_rate": 8.156691982266299e-06, + "loss": 15.773, + "step": 796 + }, + { + "epoch": 0.04569561104262822, + "grad_norm": 0.0, + "learning_rate": 8.151889863496448e-06, + "loss": 15.4782, + "step": 797 + }, + { + "epoch": 0.045752945560874926, + "grad_norm": 0.0, + "learning_rate": 8.14708291538987e-06, + "loss": 15.7671, + "step": 798 + }, + { + "epoch": 0.045810280079121635, + "grad_norm": 0.0, + "learning_rate": 8.142271145311784e-06, + "loss": 15.0243, + "step": 799 + }, + { + "epoch": 0.04586761459736834, + "grad_norm": 0.0, + "learning_rate": 8.137454560634803e-06, + "loss": 15.6344, + "step": 800 + }, + { + "epoch": 0.04592494911561506, + "grad_norm": 0.0, + "learning_rate": 8.132633168738917e-06, + "loss": 15.5289, + "step": 801 + }, + { + "epoch": 0.04598228363386177, + "grad_norm": 0.0, + "learning_rate": 8.127806977011476e-06, + "loss": 15.4143, + "step": 802 + }, + { + "epoch": 0.046039618152108476, + "grad_norm": 0.0, + "learning_rate": 8.122975992847189e-06, + "loss": 15.4817, + "step": 803 + }, + { + "epoch": 0.046096952670355185, + "grad_norm": 0.0, + "learning_rate": 8.118140223648108e-06, + "loss": 15.7881, + "step": 804 + }, + { + "epoch": 0.0461542871886019, + "grad_norm": 0.0, + "learning_rate": 8.113299676823614e-06, + "loss": 15.3555, + "step": 805 + }, + { + "epoch": 0.04621162170684861, + "grad_norm": 0.0, + "learning_rate": 8.108454359790414e-06, + "loss": 15.3229, + "step": 806 + }, + { + "epoch": 0.04626895622509532, + "grad_norm": 0.0, + "learning_rate": 8.103604279972513e-06, + "loss": 15.589, + "step": 807 + }, + { + "epoch": 0.046326290743342026, + "grad_norm": 0.0, + "learning_rate": 8.098749444801226e-06, + "loss": 15.1841, + "step": 808 + }, + { + "epoch": 0.04638362526158874, + "grad_norm": 0.0, + "learning_rate": 8.093889861715144e-06, + "loss": 15.4068, + "step": 809 + }, + { + "epoch": 0.04644095977983545, + "grad_norm": 0.0, + "learning_rate": 8.089025538160142e-06, + "loss": 14.9371, + "step": 810 + }, + { + "epoch": 0.04649829429808216, + "grad_norm": 0.0, + "learning_rate": 8.08415648158935e-06, + "loss": 15.4922, + "step": 811 + }, + { + "epoch": 0.046555628816328874, + "grad_norm": 0.0, + "learning_rate": 8.079282699463155e-06, + "loss": 15.5137, + "step": 812 + }, + { + "epoch": 0.04661296333457558, + "grad_norm": 0.0, + "learning_rate": 8.074404199249184e-06, + "loss": 15.7668, + "step": 813 + }, + { + "epoch": 0.04667029785282229, + "grad_norm": 0.0, + "learning_rate": 8.069520988422292e-06, + "loss": 15.527, + "step": 814 + }, + { + "epoch": 0.046727632371069, + "grad_norm": 0.0, + "learning_rate": 8.064633074464548e-06, + "loss": 15.4981, + "step": 815 + }, + { + "epoch": 0.046784966889315716, + "grad_norm": 0.0, + "learning_rate": 8.059740464865237e-06, + "loss": 15.3851, + "step": 816 + }, + { + "epoch": 0.046842301407562424, + "grad_norm": 0.0, + "learning_rate": 8.054843167120827e-06, + "loss": 15.5792, + "step": 817 + }, + { + "epoch": 0.04689963592580913, + "grad_norm": 0.0, + "learning_rate": 8.04994118873498e-06, + "loss": 15.5655, + "step": 818 + }, + { + "epoch": 0.04695697044405584, + "grad_norm": 0.0, + "learning_rate": 8.04503453721852e-06, + "loss": 15.2363, + "step": 819 + }, + { + "epoch": 0.04701430496230256, + "grad_norm": 0.0, + "learning_rate": 8.040123220089437e-06, + "loss": 15.2825, + "step": 820 + }, + { + "epoch": 0.047071639480549265, + "grad_norm": 0.0, + "learning_rate": 8.035207244872871e-06, + "loss": 15.3609, + "step": 821 + }, + { + "epoch": 0.047128973998795974, + "grad_norm": 0.0, + "learning_rate": 8.030286619101094e-06, + "loss": 15.4033, + "step": 822 + }, + { + "epoch": 0.04718630851704268, + "grad_norm": 0.0, + "learning_rate": 8.025361350313506e-06, + "loss": 15.7496, + "step": 823 + }, + { + "epoch": 0.0472436430352894, + "grad_norm": 0.0, + "learning_rate": 8.020431446056622e-06, + "loss": 15.4934, + "step": 824 + }, + { + "epoch": 0.04730097755353611, + "grad_norm": 0.0, + "learning_rate": 8.01549691388406e-06, + "loss": 15.8934, + "step": 825 + }, + { + "epoch": 0.047358312071782815, + "grad_norm": 0.0, + "learning_rate": 8.010557761356523e-06, + "loss": 15.6078, + "step": 826 + }, + { + "epoch": 0.047415646590029524, + "grad_norm": 0.0, + "learning_rate": 8.005613996041803e-06, + "loss": 15.3614, + "step": 827 + }, + { + "epoch": 0.04747298110827624, + "grad_norm": 0.0, + "learning_rate": 8.000665625514752e-06, + "loss": 15.5648, + "step": 828 + }, + { + "epoch": 0.04753031562652295, + "grad_norm": 0.0, + "learning_rate": 7.99571265735728e-06, + "loss": 15.2218, + "step": 829 + }, + { + "epoch": 0.04758765014476966, + "grad_norm": 0.0, + "learning_rate": 7.990755099158346e-06, + "loss": 15.5548, + "step": 830 + }, + { + "epoch": 0.04764498466301637, + "grad_norm": 0.0, + "learning_rate": 7.985792958513932e-06, + "loss": 15.6068, + "step": 831 + }, + { + "epoch": 0.04770231918126308, + "grad_norm": 0.0, + "learning_rate": 7.980826243027052e-06, + "loss": 15.4351, + "step": 832 + }, + { + "epoch": 0.04775965369950979, + "grad_norm": 0.0, + "learning_rate": 7.975854960307724e-06, + "loss": 15.438, + "step": 833 + }, + { + "epoch": 0.0478169882177565, + "grad_norm": 0.0, + "learning_rate": 7.970879117972964e-06, + "loss": 15.4016, + "step": 834 + }, + { + "epoch": 0.047874322736003214, + "grad_norm": 0.0, + "learning_rate": 7.965898723646777e-06, + "loss": 15.4042, + "step": 835 + }, + { + "epoch": 0.04793165725424992, + "grad_norm": 0.0, + "learning_rate": 7.960913784960138e-06, + "loss": 15.2968, + "step": 836 + }, + { + "epoch": 0.04798899177249663, + "grad_norm": 0.0, + "learning_rate": 7.955924309550991e-06, + "loss": 15.1225, + "step": 837 + }, + { + "epoch": 0.04804632629074334, + "grad_norm": 0.0, + "learning_rate": 7.950930305064224e-06, + "loss": 15.3629, + "step": 838 + }, + { + "epoch": 0.048103660808990055, + "grad_norm": 0.0, + "learning_rate": 7.94593177915167e-06, + "loss": 15.3011, + "step": 839 + }, + { + "epoch": 0.048160995327236764, + "grad_norm": 0.0, + "learning_rate": 7.940928739472088e-06, + "loss": 15.3236, + "step": 840 + }, + { + "epoch": 0.04821832984548347, + "grad_norm": 0.0, + "learning_rate": 7.935921193691153e-06, + "loss": 15.7638, + "step": 841 + }, + { + "epoch": 0.04827566436373018, + "grad_norm": 0.0, + "learning_rate": 7.930909149481446e-06, + "loss": 15.2309, + "step": 842 + }, + { + "epoch": 0.048332998881976896, + "grad_norm": 0.0, + "learning_rate": 7.925892614522433e-06, + "loss": 15.6848, + "step": 843 + }, + { + "epoch": 0.048390333400223605, + "grad_norm": 0.0, + "learning_rate": 7.920871596500473e-06, + "loss": 15.4714, + "step": 844 + }, + { + "epoch": 0.048447667918470314, + "grad_norm": 0.0, + "learning_rate": 7.915846103108784e-06, + "loss": 15.3015, + "step": 845 + }, + { + "epoch": 0.04850500243671702, + "grad_norm": 0.0, + "learning_rate": 7.910816142047447e-06, + "loss": 15.3926, + "step": 846 + }, + { + "epoch": 0.04856233695496374, + "grad_norm": 0.0, + "learning_rate": 7.905781721023384e-06, + "loss": 15.4996, + "step": 847 + }, + { + "epoch": 0.048619671473210446, + "grad_norm": 0.0, + "learning_rate": 7.900742847750352e-06, + "loss": 15.3543, + "step": 848 + }, + { + "epoch": 0.048677005991457155, + "grad_norm": 0.0, + "learning_rate": 7.895699529948932e-06, + "loss": 15.3942, + "step": 849 + }, + { + "epoch": 0.04873434050970387, + "grad_norm": 0.0, + "learning_rate": 7.890651775346512e-06, + "loss": 15.5402, + "step": 850 + }, + { + "epoch": 0.04879167502795058, + "grad_norm": 0.0, + "learning_rate": 7.885599591677283e-06, + "loss": 15.9481, + "step": 851 + }, + { + "epoch": 0.04884900954619729, + "grad_norm": 0.0, + "learning_rate": 7.880542986682212e-06, + "loss": 15.3632, + "step": 852 + }, + { + "epoch": 0.048906344064443996, + "grad_norm": 0.0, + "learning_rate": 7.875481968109052e-06, + "loss": 15.7589, + "step": 853 + }, + { + "epoch": 0.04896367858269071, + "grad_norm": 0.0, + "learning_rate": 7.870416543712315e-06, + "loss": 15.648, + "step": 854 + }, + { + "epoch": 0.04902101310093742, + "grad_norm": 0.0, + "learning_rate": 7.865346721253256e-06, + "loss": 15.5267, + "step": 855 + }, + { + "epoch": 0.04907834761918413, + "grad_norm": 0.0, + "learning_rate": 7.860272508499877e-06, + "loss": 15.6383, + "step": 856 + }, + { + "epoch": 0.04913568213743084, + "grad_norm": 0.0, + "learning_rate": 7.855193913226907e-06, + "loss": 15.2878, + "step": 857 + }, + { + "epoch": 0.04919301665567755, + "grad_norm": 0.0, + "learning_rate": 7.850110943215785e-06, + "loss": 15.734, + "step": 858 + }, + { + "epoch": 0.04925035117392426, + "grad_norm": 0.0, + "learning_rate": 7.845023606254658e-06, + "loss": 15.7939, + "step": 859 + }, + { + "epoch": 0.04930768569217097, + "grad_norm": 0.0, + "learning_rate": 7.83993191013836e-06, + "loss": 15.3507, + "step": 860 + }, + { + "epoch": 0.04936502021041768, + "grad_norm": 0.0, + "learning_rate": 7.834835862668405e-06, + "loss": 15.2812, + "step": 861 + }, + { + "epoch": 0.049422354728664394, + "grad_norm": 0.0, + "learning_rate": 7.829735471652978e-06, + "loss": 15.1451, + "step": 862 + }, + { + "epoch": 0.0494796892469111, + "grad_norm": 0.0, + "learning_rate": 7.82463074490691e-06, + "loss": 15.6257, + "step": 863 + }, + { + "epoch": 0.04953702376515781, + "grad_norm": 0.0, + "learning_rate": 7.819521690251688e-06, + "loss": 15.3859, + "step": 864 + }, + { + "epoch": 0.04959435828340453, + "grad_norm": 0.0, + "learning_rate": 7.814408315515419e-06, + "loss": 15.6041, + "step": 865 + }, + { + "epoch": 0.049651692801651236, + "grad_norm": 0.0, + "learning_rate": 7.809290628532836e-06, + "loss": 15.5338, + "step": 866 + }, + { + "epoch": 0.049709027319897944, + "grad_norm": 0.0, + "learning_rate": 7.804168637145276e-06, + "loss": 15.3608, + "step": 867 + }, + { + "epoch": 0.04976636183814465, + "grad_norm": 0.0, + "learning_rate": 7.799042349200672e-06, + "loss": 15.3891, + "step": 868 + }, + { + "epoch": 0.04982369635639137, + "grad_norm": 0.0, + "learning_rate": 7.793911772553542e-06, + "loss": 15.2893, + "step": 869 + }, + { + "epoch": 0.04988103087463808, + "grad_norm": 0.0, + "learning_rate": 7.788776915064972e-06, + "loss": 15.6297, + "step": 870 + }, + { + "epoch": 0.049938365392884786, + "grad_norm": 0.0, + "learning_rate": 7.783637784602608e-06, + "loss": 15.6214, + "step": 871 + }, + { + "epoch": 0.049995699911131494, + "grad_norm": 0.0, + "learning_rate": 7.778494389040646e-06, + "loss": 15.3664, + "step": 872 + }, + { + "epoch": 0.05005303442937821, + "grad_norm": 0.0, + "learning_rate": 7.773346736259815e-06, + "loss": 15.2891, + "step": 873 + }, + { + "epoch": 0.05011036894762492, + "grad_norm": 0.0, + "learning_rate": 7.768194834147362e-06, + "loss": 15.8051, + "step": 874 + }, + { + "epoch": 0.05016770346587163, + "grad_norm": 0.0, + "learning_rate": 7.763038690597055e-06, + "loss": 15.5375, + "step": 875 + }, + { + "epoch": 0.050225037984118336, + "grad_norm": 0.0, + "learning_rate": 7.757878313509153e-06, + "loss": 15.6352, + "step": 876 + }, + { + "epoch": 0.05028237250236505, + "grad_norm": 0.0, + "learning_rate": 7.752713710790405e-06, + "loss": 15.6374, + "step": 877 + }, + { + "epoch": 0.05033970702061176, + "grad_norm": 0.0, + "learning_rate": 7.747544890354031e-06, + "loss": 15.4307, + "step": 878 + }, + { + "epoch": 0.05039704153885847, + "grad_norm": 0.0, + "learning_rate": 7.742371860119718e-06, + "loss": 15.0681, + "step": 879 + }, + { + "epoch": 0.05045437605710518, + "grad_norm": 0.0, + "learning_rate": 7.7371946280136e-06, + "loss": 15.284, + "step": 880 + }, + { + "epoch": 0.05051171057535189, + "grad_norm": 0.0, + "learning_rate": 7.73201320196825e-06, + "loss": 15.3285, + "step": 881 + }, + { + "epoch": 0.0505690450935986, + "grad_norm": 0.0, + "learning_rate": 7.72682758992267e-06, + "loss": 15.2923, + "step": 882 + }, + { + "epoch": 0.05062637961184531, + "grad_norm": 0.0, + "learning_rate": 7.721637799822269e-06, + "loss": 15.4685, + "step": 883 + }, + { + "epoch": 0.050683714130092025, + "grad_norm": 0.0, + "learning_rate": 7.716443839618863e-06, + "loss": 15.4828, + "step": 884 + }, + { + "epoch": 0.050741048648338734, + "grad_norm": 0.0, + "learning_rate": 7.711245717270659e-06, + "loss": 15.5624, + "step": 885 + }, + { + "epoch": 0.05079838316658544, + "grad_norm": 0.0, + "learning_rate": 7.706043440742235e-06, + "loss": 15.46, + "step": 886 + }, + { + "epoch": 0.05085571768483215, + "grad_norm": 0.0, + "learning_rate": 7.70083701800454e-06, + "loss": 15.5009, + "step": 887 + }, + { + "epoch": 0.05091305220307887, + "grad_norm": 0.0, + "learning_rate": 7.695626457034867e-06, + "loss": 15.2884, + "step": 888 + }, + { + "epoch": 0.050970386721325575, + "grad_norm": 0.0, + "learning_rate": 7.690411765816864e-06, + "loss": 16.1451, + "step": 889 + }, + { + "epoch": 0.051027721239572284, + "grad_norm": 0.0, + "learning_rate": 7.685192952340495e-06, + "loss": 15.5103, + "step": 890 + }, + { + "epoch": 0.05108505575781899, + "grad_norm": 0.0, + "learning_rate": 7.679970024602044e-06, + "loss": 15.2557, + "step": 891 + }, + { + "epoch": 0.05114239027606571, + "grad_norm": 0.0, + "learning_rate": 7.674742990604101e-06, + "loss": 15.7524, + "step": 892 + }, + { + "epoch": 0.051199724794312416, + "grad_norm": 0.0, + "learning_rate": 7.669511858355545e-06, + "loss": 15.4103, + "step": 893 + }, + { + "epoch": 0.051257059312559125, + "grad_norm": 0.0, + "learning_rate": 7.664276635871535e-06, + "loss": 15.8326, + "step": 894 + }, + { + "epoch": 0.051314393830805834, + "grad_norm": 0.0, + "learning_rate": 7.659037331173498e-06, + "loss": 15.4746, + "step": 895 + }, + { + "epoch": 0.05137172834905255, + "grad_norm": 0.0, + "learning_rate": 7.653793952289114e-06, + "loss": 15.2673, + "step": 896 + }, + { + "epoch": 0.05142906286729926, + "grad_norm": 0.0, + "learning_rate": 7.648546507252308e-06, + "loss": 15.5551, + "step": 897 + }, + { + "epoch": 0.051486397385545966, + "grad_norm": 0.0, + "learning_rate": 7.643295004103232e-06, + "loss": 15.3011, + "step": 898 + }, + { + "epoch": 0.051543731903792675, + "grad_norm": 0.0, + "learning_rate": 7.638039450888259e-06, + "loss": 15.2572, + "step": 899 + }, + { + "epoch": 0.05160106642203939, + "grad_norm": 0.0, + "learning_rate": 7.632779855659966e-06, + "loss": 15.536, + "step": 900 + }, + { + "epoch": 0.0516584009402861, + "grad_norm": 0.0, + "learning_rate": 7.627516226477123e-06, + "loss": 15.4528, + "step": 901 + }, + { + "epoch": 0.05171573545853281, + "grad_norm": 0.0, + "learning_rate": 7.62224857140468e-06, + "loss": 15.1914, + "step": 902 + }, + { + "epoch": 0.05177306997677952, + "grad_norm": 0.0, + "learning_rate": 7.616976898513759e-06, + "loss": 15.3216, + "step": 903 + }, + { + "epoch": 0.05183040449502623, + "grad_norm": 0.0, + "learning_rate": 7.611701215881635e-06, + "loss": 15.4943, + "step": 904 + }, + { + "epoch": 0.05188773901327294, + "grad_norm": 0.0, + "learning_rate": 7.606421531591725e-06, + "loss": 15.8347, + "step": 905 + }, + { + "epoch": 0.05194507353151965, + "grad_norm": 0.0, + "learning_rate": 7.601137853733583e-06, + "loss": 15.4142, + "step": 906 + }, + { + "epoch": 0.052002408049766365, + "grad_norm": 0.0, + "learning_rate": 7.595850190402877e-06, + "loss": 15.3806, + "step": 907 + }, + { + "epoch": 0.05205974256801307, + "grad_norm": 0.0, + "learning_rate": 7.590558549701383e-06, + "loss": 15.4033, + "step": 908 + }, + { + "epoch": 0.05211707708625978, + "grad_norm": 0.0, + "learning_rate": 7.585262939736975e-06, + "loss": 15.6882, + "step": 909 + }, + { + "epoch": 0.05217441160450649, + "grad_norm": 0.0, + "learning_rate": 7.579963368623602e-06, + "loss": 15.5859, + "step": 910 + }, + { + "epoch": 0.052231746122753206, + "grad_norm": 0.0, + "learning_rate": 7.574659844481285e-06, + "loss": 15.5471, + "step": 911 + }, + { + "epoch": 0.052289080640999915, + "grad_norm": 0.0, + "learning_rate": 7.569352375436102e-06, + "loss": 15.3833, + "step": 912 + }, + { + "epoch": 0.05234641515924662, + "grad_norm": 0.0, + "learning_rate": 7.564040969620179e-06, + "loss": 15.4828, + "step": 913 + }, + { + "epoch": 0.05240374967749333, + "grad_norm": 0.0, + "learning_rate": 7.558725635171669e-06, + "loss": 15.7124, + "step": 914 + }, + { + "epoch": 0.05246108419574005, + "grad_norm": 0.0, + "learning_rate": 7.553406380234744e-06, + "loss": 15.6596, + "step": 915 + }, + { + "epoch": 0.052518418713986756, + "grad_norm": 0.0, + "learning_rate": 7.548083212959588e-06, + "loss": 15.318, + "step": 916 + }, + { + "epoch": 0.052575753232233464, + "grad_norm": 0.0, + "learning_rate": 7.542756141502376e-06, + "loss": 15.3831, + "step": 917 + }, + { + "epoch": 0.05263308775048018, + "grad_norm": 0.0, + "learning_rate": 7.537425174025265e-06, + "loss": 15.5979, + "step": 918 + }, + { + "epoch": 0.05269042226872689, + "grad_norm": 0.0, + "learning_rate": 7.532090318696382e-06, + "loss": 15.5016, + "step": 919 + }, + { + "epoch": 0.0527477567869736, + "grad_norm": 0.0, + "learning_rate": 7.526751583689812e-06, + "loss": 15.1329, + "step": 920 + }, + { + "epoch": 0.052805091305220306, + "grad_norm": 0.0, + "learning_rate": 7.521408977185584e-06, + "loss": 15.3954, + "step": 921 + }, + { + "epoch": 0.05286242582346702, + "grad_norm": 0.0, + "learning_rate": 7.516062507369655e-06, + "loss": 15.5931, + "step": 922 + }, + { + "epoch": 0.05291976034171373, + "grad_norm": 0.0, + "learning_rate": 7.510712182433908e-06, + "loss": 15.7145, + "step": 923 + }, + { + "epoch": 0.05297709485996044, + "grad_norm": 0.0, + "learning_rate": 7.505358010576132e-06, + "loss": 15.1615, + "step": 924 + }, + { + "epoch": 0.05303442937820715, + "grad_norm": 0.0, + "learning_rate": 7.500000000000001e-06, + "loss": 15.3657, + "step": 925 + }, + { + "epoch": 0.05309176389645386, + "grad_norm": 0.0, + "learning_rate": 7.494638158915083e-06, + "loss": 15.1117, + "step": 926 + }, + { + "epoch": 0.05314909841470057, + "grad_norm": 0.0, + "learning_rate": 7.489272495536809e-06, + "loss": 15.3789, + "step": 927 + }, + { + "epoch": 0.05320643293294728, + "grad_norm": 0.0, + "learning_rate": 7.483903018086466e-06, + "loss": 15.4555, + "step": 928 + }, + { + "epoch": 0.05326376745119399, + "grad_norm": 0.0, + "learning_rate": 7.4785297347911865e-06, + "loss": 15.0974, + "step": 929 + }, + { + "epoch": 0.053321101969440704, + "grad_norm": 0.0, + "learning_rate": 7.473152653883934e-06, + "loss": 15.3593, + "step": 930 + }, + { + "epoch": 0.05337843648768741, + "grad_norm": 0.0, + "learning_rate": 7.467771783603492e-06, + "loss": 15.8678, + "step": 931 + }, + { + "epoch": 0.05343577100593412, + "grad_norm": 0.0, + "learning_rate": 7.4623871321944485e-06, + "loss": 15.7244, + "step": 932 + }, + { + "epoch": 0.05349310552418083, + "grad_norm": 0.0, + "learning_rate": 7.456998707907184e-06, + "loss": 15.1704, + "step": 933 + }, + { + "epoch": 0.053550440042427545, + "grad_norm": 0.0, + "learning_rate": 7.4516065189978625e-06, + "loss": 15.4617, + "step": 934 + }, + { + "epoch": 0.053607774560674254, + "grad_norm": 0.0, + "learning_rate": 7.446210573728414e-06, + "loss": 15.3451, + "step": 935 + }, + { + "epoch": 0.05366510907892096, + "grad_norm": 0.0, + "learning_rate": 7.440810880366524e-06, + "loss": 15.4365, + "step": 936 + }, + { + "epoch": 0.05372244359716768, + "grad_norm": 0.0, + "learning_rate": 7.435407447185623e-06, + "loss": 15.4767, + "step": 937 + }, + { + "epoch": 0.05377977811541439, + "grad_norm": 0.0, + "learning_rate": 7.430000282464872e-06, + "loss": 15.3876, + "step": 938 + }, + { + "epoch": 0.053837112633661095, + "grad_norm": 0.0, + "learning_rate": 7.424589394489145e-06, + "loss": 15.5479, + "step": 939 + }, + { + "epoch": 0.053894447151907804, + "grad_norm": 0.0, + "learning_rate": 7.419174791549023e-06, + "loss": 15.3345, + "step": 940 + }, + { + "epoch": 0.05395178167015452, + "grad_norm": 0.0, + "learning_rate": 7.413756481940783e-06, + "loss": 15.145, + "step": 941 + }, + { + "epoch": 0.05400911618840123, + "grad_norm": 0.0, + "learning_rate": 7.408334473966375e-06, + "loss": 15.428, + "step": 942 + }, + { + "epoch": 0.05406645070664794, + "grad_norm": 0.0, + "learning_rate": 7.402908775933419e-06, + "loss": 15.3543, + "step": 943 + }, + { + "epoch": 0.054123785224894645, + "grad_norm": 0.0, + "learning_rate": 7.39747939615519e-06, + "loss": 15.5398, + "step": 944 + }, + { + "epoch": 0.05418111974314136, + "grad_norm": 0.0, + "learning_rate": 7.392046342950604e-06, + "loss": 15.2644, + "step": 945 + }, + { + "epoch": 0.05423845426138807, + "grad_norm": 0.0, + "learning_rate": 7.386609624644201e-06, + "loss": 15.641, + "step": 946 + }, + { + "epoch": 0.05429578877963478, + "grad_norm": 0.0, + "learning_rate": 7.38116924956614e-06, + "loss": 15.3402, + "step": 947 + }, + { + "epoch": 0.05435312329788149, + "grad_norm": 0.0, + "learning_rate": 7.375725226052186e-06, + "loss": 15.4802, + "step": 948 + }, + { + "epoch": 0.0544104578161282, + "grad_norm": 0.0, + "learning_rate": 7.370277562443689e-06, + "loss": 15.6399, + "step": 949 + }, + { + "epoch": 0.05446779233437491, + "grad_norm": 0.0, + "learning_rate": 7.364826267087577e-06, + "loss": 15.3748, + "step": 950 + }, + { + "epoch": 0.05452512685262162, + "grad_norm": 0.0, + "learning_rate": 7.359371348336346e-06, + "loss": 15.559, + "step": 951 + }, + { + "epoch": 0.05458246137086833, + "grad_norm": 0.0, + "learning_rate": 7.353912814548042e-06, + "loss": 15.786, + "step": 952 + }, + { + "epoch": 0.05463979588911504, + "grad_norm": 0.0, + "learning_rate": 7.348450674086247e-06, + "loss": 15.3269, + "step": 953 + }, + { + "epoch": 0.05469713040736175, + "grad_norm": 0.0, + "learning_rate": 7.342984935320074e-06, + "loss": 15.3853, + "step": 954 + }, + { + "epoch": 0.05475446492560846, + "grad_norm": 0.0, + "learning_rate": 7.337515606624148e-06, + "loss": 15.5078, + "step": 955 + }, + { + "epoch": 0.054811799443855176, + "grad_norm": 0.0, + "learning_rate": 7.332042696378591e-06, + "loss": 15.6212, + "step": 956 + }, + { + "epoch": 0.054869133962101885, + "grad_norm": 0.0, + "learning_rate": 7.326566212969016e-06, + "loss": 15.6121, + "step": 957 + }, + { + "epoch": 0.05492646848034859, + "grad_norm": 0.0, + "learning_rate": 7.321086164786513e-06, + "loss": 15.406, + "step": 958 + }, + { + "epoch": 0.0549838029985953, + "grad_norm": 0.0, + "learning_rate": 7.315602560227627e-06, + "loss": 15.0786, + "step": 959 + }, + { + "epoch": 0.05504113751684202, + "grad_norm": 0.0, + "learning_rate": 7.310115407694358e-06, + "loss": 15.2173, + "step": 960 + }, + { + "epoch": 0.055098472035088726, + "grad_norm": 0.0, + "learning_rate": 7.30462471559414e-06, + "loss": 15.5237, + "step": 961 + }, + { + "epoch": 0.055155806553335435, + "grad_norm": 0.0, + "learning_rate": 7.299130492339833e-06, + "loss": 15.7292, + "step": 962 + }, + { + "epoch": 0.05521314107158214, + "grad_norm": 0.0, + "learning_rate": 7.293632746349702e-06, + "loss": 15.5719, + "step": 963 + }, + { + "epoch": 0.05527047558982886, + "grad_norm": 0.0, + "learning_rate": 7.288131486047414e-06, + "loss": 15.5459, + "step": 964 + }, + { + "epoch": 0.05532781010807557, + "grad_norm": 0.0, + "learning_rate": 7.282626719862021e-06, + "loss": 15.7095, + "step": 965 + }, + { + "epoch": 0.055385144626322276, + "grad_norm": 0.0, + "learning_rate": 7.277118456227941e-06, + "loss": 15.6278, + "step": 966 + }, + { + "epoch": 0.055442479144568985, + "grad_norm": 0.0, + "learning_rate": 7.2716067035849595e-06, + "loss": 15.4754, + "step": 967 + }, + { + "epoch": 0.0554998136628157, + "grad_norm": 0.0, + "learning_rate": 7.266091470378199e-06, + "loss": 15.3922, + "step": 968 + }, + { + "epoch": 0.05555714818106241, + "grad_norm": 0.0, + "learning_rate": 7.260572765058124e-06, + "loss": 15.4944, + "step": 969 + }, + { + "epoch": 0.05561448269930912, + "grad_norm": 0.0, + "learning_rate": 7.25505059608051e-06, + "loss": 15.5733, + "step": 970 + }, + { + "epoch": 0.05567181721755583, + "grad_norm": 0.0, + "learning_rate": 7.249524971906445e-06, + "loss": 15.4449, + "step": 971 + }, + { + "epoch": 0.05572915173580254, + "grad_norm": 0.0, + "learning_rate": 7.243995901002312e-06, + "loss": 15.4991, + "step": 972 + }, + { + "epoch": 0.05578648625404925, + "grad_norm": 0.0, + "learning_rate": 7.23846339183977e-06, + "loss": 15.4896, + "step": 973 + }, + { + "epoch": 0.05584382077229596, + "grad_norm": 0.0, + "learning_rate": 7.232927452895749e-06, + "loss": 15.4687, + "step": 974 + }, + { + "epoch": 0.055901155290542674, + "grad_norm": 0.0, + "learning_rate": 7.227388092652436e-06, + "loss": 15.5742, + "step": 975 + }, + { + "epoch": 0.05595848980878938, + "grad_norm": 0.0, + "learning_rate": 7.221845319597258e-06, + "loss": 15.5478, + "step": 976 + }, + { + "epoch": 0.05601582432703609, + "grad_norm": 0.0, + "learning_rate": 7.216299142222869e-06, + "loss": 15.545, + "step": 977 + }, + { + "epoch": 0.0560731588452828, + "grad_norm": 0.0, + "learning_rate": 7.210749569027145e-06, + "loss": 15.5581, + "step": 978 + }, + { + "epoch": 0.056130493363529516, + "grad_norm": 0.0, + "learning_rate": 7.2051966085131584e-06, + "loss": 15.317, + "step": 979 + }, + { + "epoch": 0.056187827881776224, + "grad_norm": 0.0, + "learning_rate": 7.199640269189176e-06, + "loss": 15.5235, + "step": 980 + }, + { + "epoch": 0.05624516240002293, + "grad_norm": 0.0, + "learning_rate": 7.194080559568642e-06, + "loss": 15.6216, + "step": 981 + }, + { + "epoch": 0.05630249691826964, + "grad_norm": 0.0, + "learning_rate": 7.18851748817016e-06, + "loss": 15.7069, + "step": 982 + }, + { + "epoch": 0.05635983143651636, + "grad_norm": 0.0, + "learning_rate": 7.18295106351749e-06, + "loss": 15.3323, + "step": 983 + }, + { + "epoch": 0.056417165954763066, + "grad_norm": 0.0, + "learning_rate": 7.177381294139527e-06, + "loss": 15.568, + "step": 984 + }, + { + "epoch": 0.056474500473009774, + "grad_norm": 0.0, + "learning_rate": 7.1718081885702905e-06, + "loss": 15.2707, + "step": 985 + }, + { + "epoch": 0.05653183499125648, + "grad_norm": 0.0, + "learning_rate": 7.1662317553489126e-06, + "loss": 15.5602, + "step": 986 + }, + { + "epoch": 0.0565891695095032, + "grad_norm": 0.0, + "learning_rate": 7.160652003019624e-06, + "loss": 15.4934, + "step": 987 + }, + { + "epoch": 0.05664650402774991, + "grad_norm": 0.0, + "learning_rate": 7.155068940131741e-06, + "loss": 15.3652, + "step": 988 + }, + { + "epoch": 0.056703838545996615, + "grad_norm": 0.0, + "learning_rate": 7.149482575239653e-06, + "loss": 15.1606, + "step": 989 + }, + { + "epoch": 0.05676117306424333, + "grad_norm": 0.0, + "learning_rate": 7.143892916902805e-06, + "loss": 15.6928, + "step": 990 + }, + { + "epoch": 0.05681850758249004, + "grad_norm": 0.0, + "learning_rate": 7.138299973685694e-06, + "loss": 15.3702, + "step": 991 + }, + { + "epoch": 0.05687584210073675, + "grad_norm": 0.0, + "learning_rate": 7.132703754157846e-06, + "loss": 15.5082, + "step": 992 + }, + { + "epoch": 0.05693317661898346, + "grad_norm": 0.0, + "learning_rate": 7.1271042668938094e-06, + "loss": 15.4877, + "step": 993 + }, + { + "epoch": 0.05699051113723017, + "grad_norm": 0.0, + "learning_rate": 7.121501520473137e-06, + "loss": 15.6682, + "step": 994 + }, + { + "epoch": 0.05704784565547688, + "grad_norm": 0.0, + "learning_rate": 7.115895523480376e-06, + "loss": 15.3158, + "step": 995 + }, + { + "epoch": 0.05710518017372359, + "grad_norm": 0.0, + "learning_rate": 7.110286284505058e-06, + "loss": 15.2529, + "step": 996 + }, + { + "epoch": 0.0571625146919703, + "grad_norm": 0.0, + "learning_rate": 7.104673812141676e-06, + "loss": 15.392, + "step": 997 + }, + { + "epoch": 0.057219849210217014, + "grad_norm": 0.0, + "learning_rate": 7.099058114989679e-06, + "loss": 15.4152, + "step": 998 + }, + { + "epoch": 0.05727718372846372, + "grad_norm": 0.0, + "learning_rate": 7.09343920165346e-06, + "loss": 15.5532, + "step": 999 + }, + { + "epoch": 0.05733451824671043, + "grad_norm": 0.0, + "learning_rate": 7.087817080742337e-06, + "loss": 15.4105, + "step": 1000 + }, + { + "epoch": 0.05739185276495714, + "grad_norm": 0.0, + "learning_rate": 7.082191760870543e-06, + "loss": 15.8258, + "step": 1001 + }, + { + "epoch": 0.057449187283203855, + "grad_norm": 0.0, + "learning_rate": 7.076563250657213e-06, + "loss": 15.3265, + "step": 1002 + }, + { + "epoch": 0.057506521801450564, + "grad_norm": 0.0, + "learning_rate": 7.070931558726373e-06, + "loss": 15.5073, + "step": 1003 + }, + { + "epoch": 0.05756385631969727, + "grad_norm": 0.0, + "learning_rate": 7.065296693706916e-06, + "loss": 15.2938, + "step": 1004 + }, + { + "epoch": 0.05762119083794398, + "grad_norm": 0.0, + "learning_rate": 7.059658664232605e-06, + "loss": 15.242, + "step": 1005 + }, + { + "epoch": 0.057678525356190696, + "grad_norm": 0.0, + "learning_rate": 7.054017478942048e-06, + "loss": 15.3581, + "step": 1006 + }, + { + "epoch": 0.057735859874437405, + "grad_norm": 0.0, + "learning_rate": 7.048373146478691e-06, + "loss": 15.524, + "step": 1007 + }, + { + "epoch": 0.057793194392684114, + "grad_norm": 0.0, + "learning_rate": 7.042725675490797e-06, + "loss": 15.3818, + "step": 1008 + }, + { + "epoch": 0.05785052891093083, + "grad_norm": 0.0, + "learning_rate": 7.037075074631441e-06, + "loss": 15.0783, + "step": 1009 + }, + { + "epoch": 0.05790786342917754, + "grad_norm": 0.0, + "learning_rate": 7.031421352558495e-06, + "loss": 15.3572, + "step": 1010 + }, + { + "epoch": 0.057965197947424246, + "grad_norm": 0.0, + "learning_rate": 7.025764517934612e-06, + "loss": 15.0614, + "step": 1011 + }, + { + "epoch": 0.058022532465670955, + "grad_norm": 0.0, + "learning_rate": 7.0201045794272135e-06, + "loss": 15.2281, + "step": 1012 + }, + { + "epoch": 0.05807986698391767, + "grad_norm": 0.0, + "learning_rate": 7.0144415457084765e-06, + "loss": 15.6632, + "step": 1013 + }, + { + "epoch": 0.05813720150216438, + "grad_norm": 0.0, + "learning_rate": 7.008775425455323e-06, + "loss": 15.3456, + "step": 1014 + }, + { + "epoch": 0.05819453602041109, + "grad_norm": 0.0, + "learning_rate": 7.003106227349399e-06, + "loss": 15.2589, + "step": 1015 + }, + { + "epoch": 0.058251870538657796, + "grad_norm": 0.0, + "learning_rate": 6.997433960077072e-06, + "loss": 15.1371, + "step": 1016 + }, + { + "epoch": 0.05830920505690451, + "grad_norm": 0.0, + "learning_rate": 6.991758632329411e-06, + "loss": 15.6141, + "step": 1017 + }, + { + "epoch": 0.05836653957515122, + "grad_norm": 0.0, + "learning_rate": 6.9860802528021705e-06, + "loss": 15.4954, + "step": 1018 + }, + { + "epoch": 0.05842387409339793, + "grad_norm": 0.0, + "learning_rate": 6.980398830195785e-06, + "loss": 15.5896, + "step": 1019 + }, + { + "epoch": 0.05848120861164464, + "grad_norm": 0.0, + "learning_rate": 6.97471437321535e-06, + "loss": 15.4669, + "step": 1020 + }, + { + "epoch": 0.05853854312989135, + "grad_norm": 0.0, + "learning_rate": 6.969026890570612e-06, + "loss": 15.2941, + "step": 1021 + }, + { + "epoch": 0.05859587764813806, + "grad_norm": 0.0, + "learning_rate": 6.963336390975949e-06, + "loss": 15.3614, + "step": 1022 + }, + { + "epoch": 0.05865321216638477, + "grad_norm": 0.0, + "learning_rate": 6.957642883150365e-06, + "loss": 15.4045, + "step": 1023 + }, + { + "epoch": 0.058710546684631486, + "grad_norm": 0.0, + "learning_rate": 6.9519463758174745e-06, + "loss": 15.6422, + "step": 1024 + }, + { + "epoch": 0.058767881202878194, + "grad_norm": 0.0, + "learning_rate": 6.9462468777054855e-06, + "loss": 15.4819, + "step": 1025 + }, + { + "epoch": 0.0588252157211249, + "grad_norm": 0.0, + "learning_rate": 6.940544397547189e-06, + "loss": 15.569, + "step": 1026 + }, + { + "epoch": 0.05888255023937161, + "grad_norm": 0.0, + "learning_rate": 6.934838944079944e-06, + "loss": 15.6353, + "step": 1027 + }, + { + "epoch": 0.05893988475761833, + "grad_norm": 0.0, + "learning_rate": 6.929130526045667e-06, + "loss": 15.1708, + "step": 1028 + }, + { + "epoch": 0.058997219275865036, + "grad_norm": 0.0, + "learning_rate": 6.9234191521908176e-06, + "loss": 15.071, + "step": 1029 + }, + { + "epoch": 0.059054553794111744, + "grad_norm": 0.0, + "learning_rate": 6.917704831266381e-06, + "loss": 15.3808, + "step": 1030 + }, + { + "epoch": 0.05911188831235845, + "grad_norm": 0.0, + "learning_rate": 6.911987572027861e-06, + "loss": 15.6273, + "step": 1031 + }, + { + "epoch": 0.05916922283060517, + "grad_norm": 0.0, + "learning_rate": 6.906267383235261e-06, + "loss": 15.4842, + "step": 1032 + }, + { + "epoch": 0.05922655734885188, + "grad_norm": 0.0, + "learning_rate": 6.9005442736530745e-06, + "loss": 15.3772, + "step": 1033 + }, + { + "epoch": 0.059283891867098586, + "grad_norm": 0.0, + "learning_rate": 6.894818252050272e-06, + "loss": 15.1444, + "step": 1034 + }, + { + "epoch": 0.059341226385345294, + "grad_norm": 0.0, + "learning_rate": 6.889089327200282e-06, + "loss": 15.694, + "step": 1035 + }, + { + "epoch": 0.05939856090359201, + "grad_norm": 0.0, + "learning_rate": 6.883357507880985e-06, + "loss": 15.2363, + "step": 1036 + }, + { + "epoch": 0.05945589542183872, + "grad_norm": 0.0, + "learning_rate": 6.877622802874693e-06, + "loss": 15.2058, + "step": 1037 + }, + { + "epoch": 0.05951322994008543, + "grad_norm": 0.0, + "learning_rate": 6.871885220968142e-06, + "loss": 15.431, + "step": 1038 + }, + { + "epoch": 0.059570564458332136, + "grad_norm": 0.0, + "learning_rate": 6.866144770952474e-06, + "loss": 15.2124, + "step": 1039 + }, + { + "epoch": 0.05962789897657885, + "grad_norm": 0.0, + "learning_rate": 6.86040146162323e-06, + "loss": 15.531, + "step": 1040 + }, + { + "epoch": 0.05968523349482556, + "grad_norm": 0.0, + "learning_rate": 6.854655301780324e-06, + "loss": 15.1198, + "step": 1041 + }, + { + "epoch": 0.05974256801307227, + "grad_norm": 0.0, + "learning_rate": 6.848906300228047e-06, + "loss": 15.3151, + "step": 1042 + }, + { + "epoch": 0.059799902531318984, + "grad_norm": 0.0, + "learning_rate": 6.843154465775036e-06, + "loss": 15.3529, + "step": 1043 + }, + { + "epoch": 0.05985723704956569, + "grad_norm": 0.0, + "learning_rate": 6.837399807234273e-06, + "loss": 15.306, + "step": 1044 + }, + { + "epoch": 0.0599145715678124, + "grad_norm": 0.0, + "learning_rate": 6.831642333423068e-06, + "loss": 15.5283, + "step": 1045 + }, + { + "epoch": 0.05997190608605911, + "grad_norm": 0.0, + "learning_rate": 6.825882053163039e-06, + "loss": 15.5263, + "step": 1046 + }, + { + "epoch": 0.060029240604305825, + "grad_norm": 0.0, + "learning_rate": 6.820118975280109e-06, + "loss": 15.101, + "step": 1047 + }, + { + "epoch": 0.060086575122552534, + "grad_norm": 0.0, + "learning_rate": 6.814353108604488e-06, + "loss": 15.4583, + "step": 1048 + }, + { + "epoch": 0.06014390964079924, + "grad_norm": 0.0, + "learning_rate": 6.8085844619706555e-06, + "loss": 15.0133, + "step": 1049 + }, + { + "epoch": 0.06020124415904595, + "grad_norm": 0.0, + "learning_rate": 6.802813044217353e-06, + "loss": 15.3445, + "step": 1050 + }, + { + "epoch": 0.06025857867729267, + "grad_norm": 0.0, + "learning_rate": 6.797038864187564e-06, + "loss": 15.4779, + "step": 1051 + }, + { + "epoch": 0.060315913195539375, + "grad_norm": 0.0, + "learning_rate": 6.791261930728513e-06, + "loss": 15.5129, + "step": 1052 + }, + { + "epoch": 0.060373247713786084, + "grad_norm": 0.0, + "learning_rate": 6.785482252691634e-06, + "loss": 15.3129, + "step": 1053 + }, + { + "epoch": 0.06043058223203279, + "grad_norm": 0.0, + "learning_rate": 6.77969983893257e-06, + "loss": 15.2355, + "step": 1054 + }, + { + "epoch": 0.06048791675027951, + "grad_norm": 0.0, + "learning_rate": 6.773914698311157e-06, + "loss": 15.3794, + "step": 1055 + }, + { + "epoch": 0.060545251268526216, + "grad_norm": 0.0, + "learning_rate": 6.768126839691408e-06, + "loss": 15.4321, + "step": 1056 + }, + { + "epoch": 0.060602585786772925, + "grad_norm": 0.0, + "learning_rate": 6.762336271941499e-06, + "loss": 15.3637, + "step": 1057 + }, + { + "epoch": 0.060659920305019634, + "grad_norm": 0.0, + "learning_rate": 6.756543003933758e-06, + "loss": 15.3693, + "step": 1058 + }, + { + "epoch": 0.06071725482326635, + "grad_norm": 0.0, + "learning_rate": 6.750747044544654e-06, + "loss": 15.0256, + "step": 1059 + }, + { + "epoch": 0.06077458934151306, + "grad_norm": 0.0, + "learning_rate": 6.7449484026547705e-06, + "loss": 15.4155, + "step": 1060 + }, + { + "epoch": 0.060831923859759766, + "grad_norm": 0.0, + "learning_rate": 6.739147087148812e-06, + "loss": 15.5012, + "step": 1061 + }, + { + "epoch": 0.06088925837800648, + "grad_norm": 0.0, + "learning_rate": 6.733343106915573e-06, + "loss": 15.3416, + "step": 1062 + }, + { + "epoch": 0.06094659289625319, + "grad_norm": 0.0, + "learning_rate": 6.7275364708479316e-06, + "loss": 15.4652, + "step": 1063 + }, + { + "epoch": 0.0610039274144999, + "grad_norm": 0.0, + "learning_rate": 6.721727187842837e-06, + "loss": 15.2292, + "step": 1064 + }, + { + "epoch": 0.06106126193274661, + "grad_norm": 0.0, + "learning_rate": 6.715915266801292e-06, + "loss": 15.1645, + "step": 1065 + }, + { + "epoch": 0.06111859645099332, + "grad_norm": 0.0, + "learning_rate": 6.710100716628345e-06, + "loss": 15.1633, + "step": 1066 + }, + { + "epoch": 0.06117593096924003, + "grad_norm": 0.0, + "learning_rate": 6.704283546233066e-06, + "loss": 15.2905, + "step": 1067 + }, + { + "epoch": 0.06123326548748674, + "grad_norm": 0.0, + "learning_rate": 6.6984637645285475e-06, + "loss": 14.9021, + "step": 1068 + }, + { + "epoch": 0.06129060000573345, + "grad_norm": 0.0, + "learning_rate": 6.692641380431879e-06, + "loss": 15.3016, + "step": 1069 + }, + { + "epoch": 0.061347934523980165, + "grad_norm": 0.0, + "learning_rate": 6.6868164028641355e-06, + "loss": 15.2637, + "step": 1070 + }, + { + "epoch": 0.06140526904222687, + "grad_norm": 0.0, + "learning_rate": 6.68098884075037e-06, + "loss": 15.2153, + "step": 1071 + }, + { + "epoch": 0.06146260356047358, + "grad_norm": 0.0, + "learning_rate": 6.675158703019594e-06, + "loss": 15.416, + "step": 1072 + }, + { + "epoch": 0.06151993807872029, + "grad_norm": 0.0, + "learning_rate": 6.669325998604766e-06, + "loss": 15.0624, + "step": 1073 + }, + { + "epoch": 0.061577272596967006, + "grad_norm": 0.0, + "learning_rate": 6.663490736442771e-06, + "loss": 14.9949, + "step": 1074 + }, + { + "epoch": 0.061634607115213715, + "grad_norm": 0.0, + "learning_rate": 6.657652925474424e-06, + "loss": 15.2689, + "step": 1075 + }, + { + "epoch": 0.06169194163346042, + "grad_norm": 0.0, + "learning_rate": 6.6518125746444376e-06, + "loss": 14.9976, + "step": 1076 + }, + { + "epoch": 0.06174927615170714, + "grad_norm": 0.0, + "learning_rate": 6.645969692901416e-06, + "loss": 15.2178, + "step": 1077 + }, + { + "epoch": 0.06180661066995385, + "grad_norm": 0.0, + "learning_rate": 6.640124289197845e-06, + "loss": 15.2663, + "step": 1078 + }, + { + "epoch": 0.061863945188200556, + "grad_norm": 0.0, + "learning_rate": 6.634276372490074e-06, + "loss": 15.3322, + "step": 1079 + }, + { + "epoch": 0.061921279706447264, + "grad_norm": 0.0, + "learning_rate": 6.6284259517383e-06, + "loss": 15.8089, + "step": 1080 + }, + { + "epoch": 0.06197861422469398, + "grad_norm": 0.0, + "learning_rate": 6.622573035906557e-06, + "loss": 15.5136, + "step": 1081 + }, + { + "epoch": 0.06203594874294069, + "grad_norm": 0.0, + "learning_rate": 6.616717633962703e-06, + "loss": 15.4216, + "step": 1082 + }, + { + "epoch": 0.0620932832611874, + "grad_norm": 0.0, + "learning_rate": 6.6108597548784104e-06, + "loss": 15.3619, + "step": 1083 + }, + { + "epoch": 0.062150617779434106, + "grad_norm": 0.0, + "learning_rate": 6.604999407629137e-06, + "loss": 15.339, + "step": 1084 + }, + { + "epoch": 0.06220795229768082, + "grad_norm": 0.0, + "learning_rate": 6.599136601194128e-06, + "loss": 14.9992, + "step": 1085 + }, + { + "epoch": 0.06226528681592753, + "grad_norm": 0.0, + "learning_rate": 6.593271344556399e-06, + "loss": 15.297, + "step": 1086 + }, + { + "epoch": 0.06232262133417424, + "grad_norm": 0.0, + "learning_rate": 6.5874036467027135e-06, + "loss": 15.0719, + "step": 1087 + }, + { + "epoch": 0.06237995585242095, + "grad_norm": 0.0, + "learning_rate": 6.58153351662358e-06, + "loss": 15.1815, + "step": 1088 + }, + { + "epoch": 0.06243729037066766, + "grad_norm": 0.0, + "learning_rate": 6.575660963313233e-06, + "loss": 15.363, + "step": 1089 + }, + { + "epoch": 0.06249462488891437, + "grad_norm": 0.0, + "learning_rate": 6.5697859957696195e-06, + "loss": 15.193, + "step": 1090 + }, + { + "epoch": 0.06255195940716109, + "grad_norm": 0.0, + "learning_rate": 6.563908622994385e-06, + "loss": 15.1983, + "step": 1091 + }, + { + "epoch": 0.06260929392540779, + "grad_norm": 0.0, + "learning_rate": 6.558028853992859e-06, + "loss": 15.357, + "step": 1092 + }, + { + "epoch": 0.0626666284436545, + "grad_norm": 0.0, + "learning_rate": 6.552146697774049e-06, + "loss": 15.4091, + "step": 1093 + }, + { + "epoch": 0.06272396296190122, + "grad_norm": 0.0, + "learning_rate": 6.546262163350609e-06, + "loss": 15.4367, + "step": 1094 + }, + { + "epoch": 0.06278129748014792, + "grad_norm": 0.0, + "learning_rate": 6.540375259738849e-06, + "loss": 15.3776, + "step": 1095 + }, + { + "epoch": 0.06283863199839464, + "grad_norm": 0.0, + "learning_rate": 6.534485995958699e-06, + "loss": 15.3741, + "step": 1096 + }, + { + "epoch": 0.06289596651664134, + "grad_norm": 0.0, + "learning_rate": 6.528594381033714e-06, + "loss": 15.4107, + "step": 1097 + }, + { + "epoch": 0.06295330103488805, + "grad_norm": 0.0, + "learning_rate": 6.522700423991043e-06, + "loss": 15.1333, + "step": 1098 + }, + { + "epoch": 0.06301063555313477, + "grad_norm": 0.0, + "learning_rate": 6.51680413386143e-06, + "loss": 15.1594, + "step": 1099 + }, + { + "epoch": 0.06306797007138147, + "grad_norm": 0.0, + "learning_rate": 6.510905519679192e-06, + "loss": 15.0248, + "step": 1100 + }, + { + "epoch": 0.06312530458962819, + "grad_norm": 0.0, + "learning_rate": 6.5050045904822035e-06, + "loss": 15.3396, + "step": 1101 + }, + { + "epoch": 0.0631826391078749, + "grad_norm": 0.0, + "learning_rate": 6.499101355311891e-06, + "loss": 15.5805, + "step": 1102 + }, + { + "epoch": 0.0632399736261216, + "grad_norm": 0.0, + "learning_rate": 6.493195823213212e-06, + "loss": 15.3565, + "step": 1103 + }, + { + "epoch": 0.06329730814436832, + "grad_norm": 0.0, + "learning_rate": 6.487288003234646e-06, + "loss": 15.2634, + "step": 1104 + }, + { + "epoch": 0.06335464266261502, + "grad_norm": 0.0, + "learning_rate": 6.481377904428171e-06, + "loss": 15.388, + "step": 1105 + }, + { + "epoch": 0.06341197718086174, + "grad_norm": 0.0, + "learning_rate": 6.475465535849263e-06, + "loss": 15.4613, + "step": 1106 + }, + { + "epoch": 0.06346931169910845, + "grad_norm": 0.0, + "learning_rate": 6.469550906556874e-06, + "loss": 15.601, + "step": 1107 + }, + { + "epoch": 0.06352664621735515, + "grad_norm": 0.0, + "learning_rate": 6.4636340256134224e-06, + "loss": 15.2509, + "step": 1108 + }, + { + "epoch": 0.06358398073560187, + "grad_norm": 0.0, + "learning_rate": 6.457714902084769e-06, + "loss": 15.6251, + "step": 1109 + }, + { + "epoch": 0.06364131525384858, + "grad_norm": 0.0, + "learning_rate": 6.451793545040218e-06, + "loss": 15.2786, + "step": 1110 + }, + { + "epoch": 0.06369864977209529, + "grad_norm": 0.0, + "learning_rate": 6.445869963552496e-06, + "loss": 15.3809, + "step": 1111 + }, + { + "epoch": 0.063755984290342, + "grad_norm": 0.0, + "learning_rate": 6.439944166697731e-06, + "loss": 15.7765, + "step": 1112 + }, + { + "epoch": 0.06381331880858872, + "grad_norm": 0.0, + "learning_rate": 6.434016163555452e-06, + "loss": 15.0431, + "step": 1113 + }, + { + "epoch": 0.06387065332683542, + "grad_norm": 0.0, + "learning_rate": 6.428085963208567e-06, + "loss": 15.2539, + "step": 1114 + }, + { + "epoch": 0.06392798784508213, + "grad_norm": 0.0, + "learning_rate": 6.422153574743348e-06, + "loss": 15.2095, + "step": 1115 + }, + { + "epoch": 0.06398532236332884, + "grad_norm": 0.0, + "learning_rate": 6.416219007249424e-06, + "loss": 15.5957, + "step": 1116 + }, + { + "epoch": 0.06404265688157555, + "grad_norm": 0.0, + "learning_rate": 6.410282269819756e-06, + "loss": 15.3808, + "step": 1117 + }, + { + "epoch": 0.06409999139982227, + "grad_norm": 0.0, + "learning_rate": 6.404343371550639e-06, + "loss": 15.4368, + "step": 1118 + }, + { + "epoch": 0.06415732591806897, + "grad_norm": 0.0, + "learning_rate": 6.39840232154167e-06, + "loss": 15.2563, + "step": 1119 + }, + { + "epoch": 0.06421466043631568, + "grad_norm": 0.0, + "learning_rate": 6.392459128895747e-06, + "loss": 14.9078, + "step": 1120 + }, + { + "epoch": 0.0642719949545624, + "grad_norm": 0.0, + "learning_rate": 6.3865138027190535e-06, + "loss": 15.11, + "step": 1121 + }, + { + "epoch": 0.0643293294728091, + "grad_norm": 0.0, + "learning_rate": 6.380566352121037e-06, + "loss": 15.2601, + "step": 1122 + }, + { + "epoch": 0.06438666399105582, + "grad_norm": 0.0, + "learning_rate": 6.374616786214402e-06, + "loss": 15.2519, + "step": 1123 + }, + { + "epoch": 0.06444399850930252, + "grad_norm": 0.0, + "learning_rate": 6.368665114115096e-06, + "loss": 15.1558, + "step": 1124 + }, + { + "epoch": 0.06450133302754923, + "grad_norm": 0.0, + "learning_rate": 6.362711344942289e-06, + "loss": 15.2966, + "step": 1125 + }, + { + "epoch": 0.06455866754579595, + "grad_norm": 0.0, + "learning_rate": 6.356755487818371e-06, + "loss": 15.4227, + "step": 1126 + }, + { + "epoch": 0.06461600206404265, + "grad_norm": 0.0, + "learning_rate": 6.350797551868923e-06, + "loss": 15.6837, + "step": 1127 + }, + { + "epoch": 0.06467333658228937, + "grad_norm": 0.0, + "learning_rate": 6.344837546222718e-06, + "loss": 15.2018, + "step": 1128 + }, + { + "epoch": 0.06473067110053608, + "grad_norm": 0.0, + "learning_rate": 6.338875480011698e-06, + "loss": 15.0102, + "step": 1129 + }, + { + "epoch": 0.06478800561878278, + "grad_norm": 0.0, + "learning_rate": 6.33291136237096e-06, + "loss": 15.2271, + "step": 1130 + }, + { + "epoch": 0.0648453401370295, + "grad_norm": 0.0, + "learning_rate": 6.326945202438748e-06, + "loss": 15.2503, + "step": 1131 + }, + { + "epoch": 0.06490267465527622, + "grad_norm": 0.0, + "learning_rate": 6.3209770093564315e-06, + "loss": 15.0981, + "step": 1132 + }, + { + "epoch": 0.06496000917352292, + "grad_norm": 0.0, + "learning_rate": 6.3150067922684965e-06, + "loss": 15.335, + "step": 1133 + }, + { + "epoch": 0.06501734369176963, + "grad_norm": 0.0, + "learning_rate": 6.3090345603225324e-06, + "loss": 15.3527, + "step": 1134 + }, + { + "epoch": 0.06507467821001633, + "grad_norm": 0.0, + "learning_rate": 6.303060322669214e-06, + "loss": 15.294, + "step": 1135 + }, + { + "epoch": 0.06513201272826305, + "grad_norm": 0.0, + "learning_rate": 6.297084088462288e-06, + "loss": 14.9693, + "step": 1136 + }, + { + "epoch": 0.06518934724650977, + "grad_norm": 0.0, + "learning_rate": 6.291105866858562e-06, + "loss": 15.3394, + "step": 1137 + }, + { + "epoch": 0.06524668176475647, + "grad_norm": 0.0, + "learning_rate": 6.285125667017886e-06, + "loss": 15.804, + "step": 1138 + }, + { + "epoch": 0.06530401628300318, + "grad_norm": 0.0, + "learning_rate": 6.279143498103149e-06, + "loss": 15.4625, + "step": 1139 + }, + { + "epoch": 0.0653613508012499, + "grad_norm": 0.0, + "learning_rate": 6.273159369280244e-06, + "loss": 15.1583, + "step": 1140 + }, + { + "epoch": 0.0654186853194966, + "grad_norm": 0.0, + "learning_rate": 6.267173289718079e-06, + "loss": 15.213, + "step": 1141 + }, + { + "epoch": 0.06547601983774332, + "grad_norm": 0.0, + "learning_rate": 6.261185268588546e-06, + "loss": 15.3858, + "step": 1142 + }, + { + "epoch": 0.06553335435599002, + "grad_norm": 0.0, + "learning_rate": 6.25519531506651e-06, + "loss": 15.3585, + "step": 1143 + }, + { + "epoch": 0.06559068887423673, + "grad_norm": 0.0, + "learning_rate": 6.249203438329799e-06, + "loss": 15.5088, + "step": 1144 + }, + { + "epoch": 0.06564802339248345, + "grad_norm": 0.0, + "learning_rate": 6.24320964755919e-06, + "loss": 15.382, + "step": 1145 + }, + { + "epoch": 0.06570535791073015, + "grad_norm": 0.0, + "learning_rate": 6.237213951938389e-06, + "loss": 15.5293, + "step": 1146 + }, + { + "epoch": 0.06576269242897687, + "grad_norm": 0.0, + "learning_rate": 6.23121636065402e-06, + "loss": 15.1687, + "step": 1147 + }, + { + "epoch": 0.06582002694722358, + "grad_norm": 0.0, + "learning_rate": 6.225216882895615e-06, + "loss": 15.5723, + "step": 1148 + }, + { + "epoch": 0.06587736146547028, + "grad_norm": 0.0, + "learning_rate": 6.219215527855596e-06, + "loss": 15.4502, + "step": 1149 + }, + { + "epoch": 0.065934695983717, + "grad_norm": 0.0, + "learning_rate": 6.213212304729259e-06, + "loss": 15.6545, + "step": 1150 + }, + { + "epoch": 0.06599203050196371, + "grad_norm": 0.0, + "learning_rate": 6.207207222714763e-06, + "loss": 15.2629, + "step": 1151 + }, + { + "epoch": 0.06604936502021042, + "grad_norm": 0.0, + "learning_rate": 6.201200291013117e-06, + "loss": 15.1331, + "step": 1152 + }, + { + "epoch": 0.06610669953845713, + "grad_norm": 0.0, + "learning_rate": 6.195191518828163e-06, + "loss": 15.142, + "step": 1153 + }, + { + "epoch": 0.06616403405670383, + "grad_norm": 0.0, + "learning_rate": 6.1891809153665614e-06, + "loss": 15.5586, + "step": 1154 + }, + { + "epoch": 0.06622136857495055, + "grad_norm": 0.0, + "learning_rate": 6.183168489837781e-06, + "loss": 15.1567, + "step": 1155 + }, + { + "epoch": 0.06627870309319726, + "grad_norm": 0.0, + "learning_rate": 6.177154251454082e-06, + "loss": 15.3777, + "step": 1156 + }, + { + "epoch": 0.06633603761144397, + "grad_norm": 0.0, + "learning_rate": 6.1711382094305e-06, + "loss": 15.5164, + "step": 1157 + }, + { + "epoch": 0.06639337212969068, + "grad_norm": 0.0, + "learning_rate": 6.165120372984836e-06, + "loss": 14.9784, + "step": 1158 + }, + { + "epoch": 0.0664507066479374, + "grad_norm": 0.0, + "learning_rate": 6.1591007513376425e-06, + "loss": 15.284, + "step": 1159 + }, + { + "epoch": 0.0665080411661841, + "grad_norm": 0.0, + "learning_rate": 6.153079353712201e-06, + "loss": 15.2941, + "step": 1160 + }, + { + "epoch": 0.06656537568443081, + "grad_norm": 0.0, + "learning_rate": 6.1470561893345215e-06, + "loss": 15.3123, + "step": 1161 + }, + { + "epoch": 0.06662271020267752, + "grad_norm": 0.0, + "learning_rate": 6.141031267433316e-06, + "loss": 15.1475, + "step": 1162 + }, + { + "epoch": 0.06668004472092423, + "grad_norm": 0.0, + "learning_rate": 6.1350045972399926e-06, + "loss": 15.3684, + "step": 1163 + }, + { + "epoch": 0.06673737923917095, + "grad_norm": 0.0, + "learning_rate": 6.128976187988633e-06, + "loss": 15.2453, + "step": 1164 + }, + { + "epoch": 0.06679471375741765, + "grad_norm": 0.0, + "learning_rate": 6.122946048915991e-06, + "loss": 15.0222, + "step": 1165 + }, + { + "epoch": 0.06685204827566436, + "grad_norm": 0.0, + "learning_rate": 6.116914189261466e-06, + "loss": 15.4126, + "step": 1166 + }, + { + "epoch": 0.06690938279391108, + "grad_norm": 0.0, + "learning_rate": 6.110880618267092e-06, + "loss": 15.4454, + "step": 1167 + }, + { + "epoch": 0.06696671731215778, + "grad_norm": 0.0, + "learning_rate": 6.1048453451775305e-06, + "loss": 15.4416, + "step": 1168 + }, + { + "epoch": 0.0670240518304045, + "grad_norm": 0.0, + "learning_rate": 6.0988083792400466e-06, + "loss": 15.4173, + "step": 1169 + }, + { + "epoch": 0.06708138634865121, + "grad_norm": 0.0, + "learning_rate": 6.092769729704502e-06, + "loss": 15.521, + "step": 1170 + }, + { + "epoch": 0.06713872086689791, + "grad_norm": 0.0, + "learning_rate": 6.086729405823335e-06, + "loss": 14.8905, + "step": 1171 + }, + { + "epoch": 0.06719605538514463, + "grad_norm": 0.0, + "learning_rate": 6.080687416851553e-06, + "loss": 15.0262, + "step": 1172 + }, + { + "epoch": 0.06725338990339133, + "grad_norm": 0.0, + "learning_rate": 6.074643772046712e-06, + "loss": 15.3374, + "step": 1173 + }, + { + "epoch": 0.06731072442163805, + "grad_norm": 0.0, + "learning_rate": 6.0685984806689055e-06, + "loss": 15.2824, + "step": 1174 + }, + { + "epoch": 0.06736805893988476, + "grad_norm": 0.0, + "learning_rate": 6.06255155198075e-06, + "loss": 15.2405, + "step": 1175 + }, + { + "epoch": 0.06742539345813146, + "grad_norm": 0.0, + "learning_rate": 6.056502995247371e-06, + "loss": 15.4861, + "step": 1176 + }, + { + "epoch": 0.06748272797637818, + "grad_norm": 0.0, + "learning_rate": 6.05045281973639e-06, + "loss": 15.3327, + "step": 1177 + }, + { + "epoch": 0.0675400624946249, + "grad_norm": 0.0, + "learning_rate": 6.044401034717905e-06, + "loss": 15.0999, + "step": 1178 + }, + { + "epoch": 0.0675973970128716, + "grad_norm": 0.0, + "learning_rate": 6.038347649464483e-06, + "loss": 15.274, + "step": 1179 + }, + { + "epoch": 0.06765473153111831, + "grad_norm": 0.0, + "learning_rate": 6.032292673251143e-06, + "loss": 15.4088, + "step": 1180 + }, + { + "epoch": 0.06771206604936501, + "grad_norm": 0.0, + "learning_rate": 6.0262361153553395e-06, + "loss": 15.5959, + "step": 1181 + }, + { + "epoch": 0.06776940056761173, + "grad_norm": 0.0, + "learning_rate": 6.020177985056953e-06, + "loss": 15.287, + "step": 1182 + }, + { + "epoch": 0.06782673508585844, + "grad_norm": 0.0, + "learning_rate": 6.014118291638272e-06, + "loss": 15.0794, + "step": 1183 + }, + { + "epoch": 0.06788406960410515, + "grad_norm": 0.0, + "learning_rate": 6.008057044383978e-06, + "loss": 15.3053, + "step": 1184 + }, + { + "epoch": 0.06794140412235186, + "grad_norm": 0.0, + "learning_rate": 6.0019942525811385e-06, + "loss": 15.4402, + "step": 1185 + }, + { + "epoch": 0.06799873864059858, + "grad_norm": 0.0, + "learning_rate": 5.995929925519181e-06, + "loss": 15.2618, + "step": 1186 + }, + { + "epoch": 0.06805607315884528, + "grad_norm": 0.0, + "learning_rate": 5.989864072489892e-06, + "loss": 15.4546, + "step": 1187 + }, + { + "epoch": 0.068113407677092, + "grad_norm": 0.0, + "learning_rate": 5.98379670278739e-06, + "loss": 15.4383, + "step": 1188 + }, + { + "epoch": 0.06817074219533871, + "grad_norm": 0.0, + "learning_rate": 5.977727825708123e-06, + "loss": 15.2373, + "step": 1189 + }, + { + "epoch": 0.06822807671358541, + "grad_norm": 0.0, + "learning_rate": 5.971657450550844e-06, + "loss": 15.3016, + "step": 1190 + }, + { + "epoch": 0.06828541123183213, + "grad_norm": 0.0, + "learning_rate": 5.965585586616602e-06, + "loss": 15.619, + "step": 1191 + }, + { + "epoch": 0.06834274575007883, + "grad_norm": 0.0, + "learning_rate": 5.959512243208732e-06, + "loss": 15.3967, + "step": 1192 + }, + { + "epoch": 0.06840008026832554, + "grad_norm": 0.0, + "learning_rate": 5.953437429632829e-06, + "loss": 15.1485, + "step": 1193 + }, + { + "epoch": 0.06845741478657226, + "grad_norm": 0.0, + "learning_rate": 5.947361155196744e-06, + "loss": 15.3058, + "step": 1194 + }, + { + "epoch": 0.06851474930481896, + "grad_norm": 0.0, + "learning_rate": 5.941283429210568e-06, + "loss": 15.2705, + "step": 1195 + }, + { + "epoch": 0.06857208382306568, + "grad_norm": 0.0, + "learning_rate": 5.935204260986611e-06, + "loss": 14.8969, + "step": 1196 + }, + { + "epoch": 0.06862941834131239, + "grad_norm": 0.0, + "learning_rate": 5.9291236598393996e-06, + "loss": 15.0611, + "step": 1197 + }, + { + "epoch": 0.0686867528595591, + "grad_norm": 0.0, + "learning_rate": 5.9230416350856505e-06, + "loss": 15.297, + "step": 1198 + }, + { + "epoch": 0.06874408737780581, + "grad_norm": 0.0, + "learning_rate": 5.9169581960442615e-06, + "loss": 15.4516, + "step": 1199 + }, + { + "epoch": 0.06880142189605253, + "grad_norm": 0.0, + "learning_rate": 5.910873352036302e-06, + "loss": 15.607, + "step": 1200 + }, + { + "epoch": 0.06885875641429923, + "grad_norm": 0.0, + "learning_rate": 5.904787112384991e-06, + "loss": 15.0717, + "step": 1201 + }, + { + "epoch": 0.06891609093254594, + "grad_norm": 0.0, + "learning_rate": 5.898699486415686e-06, + "loss": 15.1691, + "step": 1202 + }, + { + "epoch": 0.06897342545079264, + "grad_norm": 0.0, + "learning_rate": 5.892610483455867e-06, + "loss": 15.5078, + "step": 1203 + }, + { + "epoch": 0.06903075996903936, + "grad_norm": 0.0, + "learning_rate": 5.886520112835128e-06, + "loss": 15.2725, + "step": 1204 + }, + { + "epoch": 0.06908809448728608, + "grad_norm": 0.0, + "learning_rate": 5.880428383885157e-06, + "loss": 15.6883, + "step": 1205 + }, + { + "epoch": 0.06914542900553278, + "grad_norm": 0.0, + "learning_rate": 5.874335305939719e-06, + "loss": 15.4606, + "step": 1206 + }, + { + "epoch": 0.06920276352377949, + "grad_norm": 0.0, + "learning_rate": 5.8682408883346535e-06, + "loss": 15.3959, + "step": 1207 + }, + { + "epoch": 0.06926009804202621, + "grad_norm": 0.0, + "learning_rate": 5.8621451404078455e-06, + "loss": 15.3062, + "step": 1208 + }, + { + "epoch": 0.06931743256027291, + "grad_norm": 0.0, + "learning_rate": 5.856048071499223e-06, + "loss": 15.5863, + "step": 1209 + }, + { + "epoch": 0.06937476707851963, + "grad_norm": 0.0, + "learning_rate": 5.849949690950736e-06, + "loss": 15.1853, + "step": 1210 + }, + { + "epoch": 0.06943210159676633, + "grad_norm": 0.0, + "learning_rate": 5.843850008106344e-06, + "loss": 14.9957, + "step": 1211 + }, + { + "epoch": 0.06948943611501304, + "grad_norm": 0.0, + "learning_rate": 5.837749032312005e-06, + "loss": 15.2185, + "step": 1212 + }, + { + "epoch": 0.06954677063325976, + "grad_norm": 0.0, + "learning_rate": 5.831646772915651e-06, + "loss": 15.132, + "step": 1213 + }, + { + "epoch": 0.06960410515150646, + "grad_norm": 0.0, + "learning_rate": 5.82554323926719e-06, + "loss": 15.1604, + "step": 1214 + }, + { + "epoch": 0.06966143966975318, + "grad_norm": 0.0, + "learning_rate": 5.819438440718476e-06, + "loss": 14.8557, + "step": 1215 + }, + { + "epoch": 0.06971877418799989, + "grad_norm": 0.0, + "learning_rate": 5.8133323866233005e-06, + "loss": 15.6393, + "step": 1216 + }, + { + "epoch": 0.06977610870624659, + "grad_norm": 0.0, + "learning_rate": 5.807225086337383e-06, + "loss": 15.1016, + "step": 1217 + }, + { + "epoch": 0.06983344322449331, + "grad_norm": 0.0, + "learning_rate": 5.8011165492183516e-06, + "loss": 15.2926, + "step": 1218 + }, + { + "epoch": 0.06989077774274002, + "grad_norm": 0.0, + "learning_rate": 5.795006784625728e-06, + "loss": 15.0293, + "step": 1219 + }, + { + "epoch": 0.06994811226098673, + "grad_norm": 0.0, + "learning_rate": 5.788895801920914e-06, + "loss": 15.1197, + "step": 1220 + }, + { + "epoch": 0.07000544677923344, + "grad_norm": 0.0, + "learning_rate": 5.782783610467177e-06, + "loss": 15.5134, + "step": 1221 + }, + { + "epoch": 0.07006278129748014, + "grad_norm": 0.0, + "learning_rate": 5.776670219629643e-06, + "loss": 15.3721, + "step": 1222 + }, + { + "epoch": 0.07012011581572686, + "grad_norm": 0.0, + "learning_rate": 5.770555638775267e-06, + "loss": 15.3128, + "step": 1223 + }, + { + "epoch": 0.07017745033397357, + "grad_norm": 0.0, + "learning_rate": 5.764439877272833e-06, + "loss": 15.3769, + "step": 1224 + }, + { + "epoch": 0.07023478485222028, + "grad_norm": 0.0, + "learning_rate": 5.75832294449293e-06, + "loss": 15.113, + "step": 1225 + }, + { + "epoch": 0.07029211937046699, + "grad_norm": 0.0, + "learning_rate": 5.752204849807948e-06, + "loss": 15.3764, + "step": 1226 + }, + { + "epoch": 0.0703494538887137, + "grad_norm": 0.0, + "learning_rate": 5.74608560259205e-06, + "loss": 15.2141, + "step": 1227 + }, + { + "epoch": 0.07040678840696041, + "grad_norm": 0.0, + "learning_rate": 5.739965212221168e-06, + "loss": 15.0714, + "step": 1228 + }, + { + "epoch": 0.07046412292520712, + "grad_norm": 0.0, + "learning_rate": 5.733843688072987e-06, + "loss": 15.5016, + "step": 1229 + }, + { + "epoch": 0.07052145744345382, + "grad_norm": 0.0, + "learning_rate": 5.727721039526928e-06, + "loss": 15.3094, + "step": 1230 + }, + { + "epoch": 0.07057879196170054, + "grad_norm": 0.0, + "learning_rate": 5.7215972759641335e-06, + "loss": 15.3874, + "step": 1231 + }, + { + "epoch": 0.07063612647994726, + "grad_norm": 0.0, + "learning_rate": 5.715472406767457e-06, + "loss": 15.219, + "step": 1232 + }, + { + "epoch": 0.07069346099819396, + "grad_norm": 0.0, + "learning_rate": 5.709346441321443e-06, + "loss": 15.4031, + "step": 1233 + }, + { + "epoch": 0.07075079551644067, + "grad_norm": 0.0, + "learning_rate": 5.703219389012317e-06, + "loss": 15.1785, + "step": 1234 + }, + { + "epoch": 0.07080813003468739, + "grad_norm": 0.0, + "learning_rate": 5.697091259227973e-06, + "loss": 15.2524, + "step": 1235 + }, + { + "epoch": 0.07086546455293409, + "grad_norm": 0.0, + "learning_rate": 5.69096206135795e-06, + "loss": 15.2352, + "step": 1236 + }, + { + "epoch": 0.0709227990711808, + "grad_norm": 0.0, + "learning_rate": 5.684831804793427e-06, + "loss": 15.1509, + "step": 1237 + }, + { + "epoch": 0.07098013358942752, + "grad_norm": 0.0, + "learning_rate": 5.6787004989272066e-06, + "loss": 15.2295, + "step": 1238 + }, + { + "epoch": 0.07103746810767422, + "grad_norm": 0.0, + "learning_rate": 5.6725681531536955e-06, + "loss": 15.2069, + "step": 1239 + }, + { + "epoch": 0.07109480262592094, + "grad_norm": 0.0, + "learning_rate": 5.666434776868895e-06, + "loss": 15.4838, + "step": 1240 + }, + { + "epoch": 0.07115213714416764, + "grad_norm": 0.0, + "learning_rate": 5.660300379470387e-06, + "loss": 15.1852, + "step": 1241 + }, + { + "epoch": 0.07120947166241436, + "grad_norm": 0.0, + "learning_rate": 5.654164970357316e-06, + "loss": 15.2174, + "step": 1242 + }, + { + "epoch": 0.07126680618066107, + "grad_norm": 0.0, + "learning_rate": 5.64802855893038e-06, + "loss": 15.5695, + "step": 1243 + }, + { + "epoch": 0.07132414069890777, + "grad_norm": 0.0, + "learning_rate": 5.641891154591805e-06, + "loss": 15.093, + "step": 1244 + }, + { + "epoch": 0.07138147521715449, + "grad_norm": 0.0, + "learning_rate": 5.635752766745347e-06, + "loss": 15.5662, + "step": 1245 + }, + { + "epoch": 0.0714388097354012, + "grad_norm": 0.0, + "learning_rate": 5.629613404796267e-06, + "loss": 15.529, + "step": 1246 + }, + { + "epoch": 0.0714961442536479, + "grad_norm": 0.0, + "learning_rate": 5.623473078151313e-06, + "loss": 15.3623, + "step": 1247 + }, + { + "epoch": 0.07155347877189462, + "grad_norm": 0.0, + "learning_rate": 5.617331796218717e-06, + "loss": 15.7112, + "step": 1248 + }, + { + "epoch": 0.07161081329014132, + "grad_norm": 0.0, + "learning_rate": 5.611189568408173e-06, + "loss": 15.6012, + "step": 1249 + }, + { + "epoch": 0.07166814780838804, + "grad_norm": 0.0, + "learning_rate": 5.605046404130824e-06, + "loss": 15.1824, + "step": 1250 + }, + { + "epoch": 0.07172548232663475, + "grad_norm": 0.0, + "learning_rate": 5.598902312799247e-06, + "loss": 15.3857, + "step": 1251 + }, + { + "epoch": 0.07178281684488146, + "grad_norm": 0.0, + "learning_rate": 5.592757303827441e-06, + "loss": 15.3178, + "step": 1252 + }, + { + "epoch": 0.07184015136312817, + "grad_norm": 0.0, + "learning_rate": 5.586611386630811e-06, + "loss": 14.9769, + "step": 1253 + }, + { + "epoch": 0.07189748588137489, + "grad_norm": 0.0, + "learning_rate": 5.5804645706261515e-06, + "loss": 15.2135, + "step": 1254 + }, + { + "epoch": 0.07195482039962159, + "grad_norm": 0.0, + "learning_rate": 5.574316865231637e-06, + "loss": 15.5411, + "step": 1255 + }, + { + "epoch": 0.0720121549178683, + "grad_norm": 0.0, + "learning_rate": 5.568168279866801e-06, + "loss": 15.4522, + "step": 1256 + }, + { + "epoch": 0.07206948943611502, + "grad_norm": 0.0, + "learning_rate": 5.562018823952532e-06, + "loss": 15.5142, + "step": 1257 + }, + { + "epoch": 0.07212682395436172, + "grad_norm": 0.0, + "learning_rate": 5.5558685069110444e-06, + "loss": 15.1283, + "step": 1258 + }, + { + "epoch": 0.07218415847260844, + "grad_norm": 0.0, + "learning_rate": 5.549717338165876e-06, + "loss": 15.3748, + "step": 1259 + }, + { + "epoch": 0.07224149299085514, + "grad_norm": 0.0, + "learning_rate": 5.5435653271418686e-06, + "loss": 15.4054, + "step": 1260 + }, + { + "epoch": 0.07229882750910185, + "grad_norm": 0.0, + "learning_rate": 5.537412483265156e-06, + "loss": 15.1071, + "step": 1261 + }, + { + "epoch": 0.07235616202734857, + "grad_norm": 0.0, + "learning_rate": 5.5312588159631485e-06, + "loss": 15.3223, + "step": 1262 + }, + { + "epoch": 0.07241349654559527, + "grad_norm": 0.0, + "learning_rate": 5.525104334664517e-06, + "loss": 15.4098, + "step": 1263 + }, + { + "epoch": 0.07247083106384199, + "grad_norm": 0.0, + "learning_rate": 5.518949048799176e-06, + "loss": 15.1196, + "step": 1264 + }, + { + "epoch": 0.0725281655820887, + "grad_norm": 0.0, + "learning_rate": 5.512792967798278e-06, + "loss": 15.1726, + "step": 1265 + }, + { + "epoch": 0.0725855001003354, + "grad_norm": 0.0, + "learning_rate": 5.506636101094193e-06, + "loss": 15.07, + "step": 1266 + }, + { + "epoch": 0.07264283461858212, + "grad_norm": 0.0, + "learning_rate": 5.500478458120493e-06, + "loss": 15.5946, + "step": 1267 + }, + { + "epoch": 0.07270016913682882, + "grad_norm": 0.0, + "learning_rate": 5.4943200483119385e-06, + "loss": 15.0114, + "step": 1268 + }, + { + "epoch": 0.07275750365507554, + "grad_norm": 0.0, + "learning_rate": 5.48816088110447e-06, + "loss": 15.1489, + "step": 1269 + }, + { + "epoch": 0.07281483817332225, + "grad_norm": 0.0, + "learning_rate": 5.482000965935182e-06, + "loss": 15.2691, + "step": 1270 + }, + { + "epoch": 0.07287217269156895, + "grad_norm": 0.0, + "learning_rate": 5.475840312242321e-06, + "loss": 15.2723, + "step": 1271 + }, + { + "epoch": 0.07292950720981567, + "grad_norm": 0.0, + "learning_rate": 5.4696789294652596e-06, + "loss": 15.2848, + "step": 1272 + }, + { + "epoch": 0.07298684172806238, + "grad_norm": 0.0, + "learning_rate": 5.463516827044492e-06, + "loss": 15.2138, + "step": 1273 + }, + { + "epoch": 0.07304417624630909, + "grad_norm": 0.0, + "learning_rate": 5.457354014421613e-06, + "loss": 15.3442, + "step": 1274 + }, + { + "epoch": 0.0731015107645558, + "grad_norm": 0.0, + "learning_rate": 5.4511905010393055e-06, + "loss": 15.102, + "step": 1275 + }, + { + "epoch": 0.07315884528280252, + "grad_norm": 0.0, + "learning_rate": 5.445026296341325e-06, + "loss": 15.5483, + "step": 1276 + }, + { + "epoch": 0.07321617980104922, + "grad_norm": 0.0, + "learning_rate": 5.438861409772489e-06, + "loss": 15.2401, + "step": 1277 + }, + { + "epoch": 0.07327351431929593, + "grad_norm": 0.0, + "learning_rate": 5.432695850778658e-06, + "loss": 15.4661, + "step": 1278 + }, + { + "epoch": 0.07333084883754264, + "grad_norm": 0.0, + "learning_rate": 5.4265296288067235e-06, + "loss": 15.4194, + "step": 1279 + }, + { + "epoch": 0.07338818335578935, + "grad_norm": 0.0, + "learning_rate": 5.420362753304594e-06, + "loss": 15.1753, + "step": 1280 + }, + { + "epoch": 0.07344551787403607, + "grad_norm": 0.0, + "learning_rate": 5.414195233721175e-06, + "loss": 15.5165, + "step": 1281 + }, + { + "epoch": 0.07350285239228277, + "grad_norm": 0.0, + "learning_rate": 5.408027079506362e-06, + "loss": 15.08, + "step": 1282 + }, + { + "epoch": 0.07356018691052948, + "grad_norm": 0.0, + "learning_rate": 5.401858300111024e-06, + "loss": 15.2593, + "step": 1283 + }, + { + "epoch": 0.0736175214287762, + "grad_norm": 0.0, + "learning_rate": 5.395688904986987e-06, + "loss": 15.019, + "step": 1284 + }, + { + "epoch": 0.0736748559470229, + "grad_norm": 0.0, + "learning_rate": 5.389518903587016e-06, + "loss": 15.4, + "step": 1285 + }, + { + "epoch": 0.07373219046526962, + "grad_norm": 0.0, + "learning_rate": 5.383348305364814e-06, + "loss": 15.1214, + "step": 1286 + }, + { + "epoch": 0.07378952498351632, + "grad_norm": 0.0, + "learning_rate": 5.37717711977499e-06, + "loss": 15.6768, + "step": 1287 + }, + { + "epoch": 0.07384685950176303, + "grad_norm": 0.0, + "learning_rate": 5.371005356273058e-06, + "loss": 15.1213, + "step": 1288 + }, + { + "epoch": 0.07390419402000975, + "grad_norm": 0.0, + "learning_rate": 5.364833024315414e-06, + "loss": 15.2674, + "step": 1289 + }, + { + "epoch": 0.07396152853825645, + "grad_norm": 0.0, + "learning_rate": 5.358660133359328e-06, + "loss": 15.4868, + "step": 1290 + }, + { + "epoch": 0.07401886305650317, + "grad_norm": 0.0, + "learning_rate": 5.352486692862926e-06, + "loss": 15.296, + "step": 1291 + }, + { + "epoch": 0.07407619757474988, + "grad_norm": 0.0, + "learning_rate": 5.346312712285172e-06, + "loss": 15.4523, + "step": 1292 + }, + { + "epoch": 0.07413353209299658, + "grad_norm": 0.0, + "learning_rate": 5.340138201085864e-06, + "loss": 15.1769, + "step": 1293 + }, + { + "epoch": 0.0741908666112433, + "grad_norm": 0.0, + "learning_rate": 5.3339631687256085e-06, + "loss": 15.244, + "step": 1294 + }, + { + "epoch": 0.07424820112949002, + "grad_norm": 0.0, + "learning_rate": 5.327787624665811e-06, + "loss": 15.4797, + "step": 1295 + }, + { + "epoch": 0.07430553564773672, + "grad_norm": 0.0, + "learning_rate": 5.321611578368664e-06, + "loss": 15.2982, + "step": 1296 + }, + { + "epoch": 0.07436287016598343, + "grad_norm": 0.0, + "learning_rate": 5.3154350392971245e-06, + "loss": 15.1468, + "step": 1297 + }, + { + "epoch": 0.07442020468423013, + "grad_norm": 0.0, + "learning_rate": 5.309258016914911e-06, + "loss": 15.3865, + "step": 1298 + }, + { + "epoch": 0.07447753920247685, + "grad_norm": 0.0, + "learning_rate": 5.303080520686474e-06, + "loss": 15.3151, + "step": 1299 + }, + { + "epoch": 0.07453487372072357, + "grad_norm": 0.0, + "learning_rate": 5.296902560077e-06, + "loss": 15.5556, + "step": 1300 + }, + { + "epoch": 0.07459220823897027, + "grad_norm": 0.0, + "learning_rate": 5.290724144552379e-06, + "loss": 15.4445, + "step": 1301 + }, + { + "epoch": 0.07464954275721698, + "grad_norm": 0.0, + "learning_rate": 5.284545283579204e-06, + "loss": 15.0258, + "step": 1302 + }, + { + "epoch": 0.0747068772754637, + "grad_norm": 0.0, + "learning_rate": 5.278365986624743e-06, + "loss": 15.1966, + "step": 1303 + }, + { + "epoch": 0.0747642117937104, + "grad_norm": 0.0, + "learning_rate": 5.272186263156937e-06, + "loss": 15.1388, + "step": 1304 + }, + { + "epoch": 0.07482154631195712, + "grad_norm": 0.0, + "learning_rate": 5.266006122644385e-06, + "loss": 15.0732, + "step": 1305 + }, + { + "epoch": 0.07487888083020383, + "grad_norm": 0.0, + "learning_rate": 5.259825574556315e-06, + "loss": 15.3282, + "step": 1306 + }, + { + "epoch": 0.07493621534845053, + "grad_norm": 0.0, + "learning_rate": 5.2536446283625865e-06, + "loss": 15.4205, + "step": 1307 + }, + { + "epoch": 0.07499354986669725, + "grad_norm": 0.0, + "learning_rate": 5.247463293533667e-06, + "loss": 15.5121, + "step": 1308 + }, + { + "epoch": 0.07505088438494395, + "grad_norm": 0.0, + "learning_rate": 5.241281579540619e-06, + "loss": 15.1642, + "step": 1309 + }, + { + "epoch": 0.07510821890319067, + "grad_norm": 0.0, + "learning_rate": 5.235099495855086e-06, + "loss": 15.0035, + "step": 1310 + }, + { + "epoch": 0.07516555342143738, + "grad_norm": 0.0, + "learning_rate": 5.228917051949279e-06, + "loss": 15.2187, + "step": 1311 + }, + { + "epoch": 0.07522288793968408, + "grad_norm": 0.0, + "learning_rate": 5.222734257295963e-06, + "loss": 15.0756, + "step": 1312 + }, + { + "epoch": 0.0752802224579308, + "grad_norm": 0.0, + "learning_rate": 5.216551121368432e-06, + "loss": 15.1728, + "step": 1313 + }, + { + "epoch": 0.07533755697617751, + "grad_norm": 0.0, + "learning_rate": 5.210367653640512e-06, + "loss": 15.5529, + "step": 1314 + }, + { + "epoch": 0.07539489149442422, + "grad_norm": 0.0, + "learning_rate": 5.2041838635865336e-06, + "loss": 14.919, + "step": 1315 + }, + { + "epoch": 0.07545222601267093, + "grad_norm": 0.0, + "learning_rate": 5.197999760681324e-06, + "loss": 15.5087, + "step": 1316 + }, + { + "epoch": 0.07550956053091763, + "grad_norm": 0.0, + "learning_rate": 5.191815354400183e-06, + "loss": 15.603, + "step": 1317 + }, + { + "epoch": 0.07556689504916435, + "grad_norm": 0.0, + "learning_rate": 5.1856306542188805e-06, + "loss": 15.3248, + "step": 1318 + }, + { + "epoch": 0.07562422956741106, + "grad_norm": 0.0, + "learning_rate": 5.17944566961364e-06, + "loss": 15.214, + "step": 1319 + }, + { + "epoch": 0.07568156408565777, + "grad_norm": 0.0, + "learning_rate": 5.173260410061112e-06, + "loss": 15.3195, + "step": 1320 + }, + { + "epoch": 0.07573889860390448, + "grad_norm": 0.0, + "learning_rate": 5.1670748850383734e-06, + "loss": 15.1759, + "step": 1321 + }, + { + "epoch": 0.0757962331221512, + "grad_norm": 0.0, + "learning_rate": 5.16088910402291e-06, + "loss": 15.2272, + "step": 1322 + }, + { + "epoch": 0.0758535676403979, + "grad_norm": 0.0, + "learning_rate": 5.154703076492597e-06, + "loss": 15.3173, + "step": 1323 + }, + { + "epoch": 0.07591090215864461, + "grad_norm": 0.0, + "learning_rate": 5.148516811925684e-06, + "loss": 14.9612, + "step": 1324 + }, + { + "epoch": 0.07596823667689133, + "grad_norm": 0.0, + "learning_rate": 5.14233031980079e-06, + "loss": 15.0465, + "step": 1325 + }, + { + "epoch": 0.07602557119513803, + "grad_norm": 0.0, + "learning_rate": 5.136143609596882e-06, + "loss": 15.1786, + "step": 1326 + }, + { + "epoch": 0.07608290571338475, + "grad_norm": 0.0, + "learning_rate": 5.129956690793255e-06, + "loss": 15.1265, + "step": 1327 + }, + { + "epoch": 0.07614024023163145, + "grad_norm": 0.0, + "learning_rate": 5.1237695728695294e-06, + "loss": 15.5801, + "step": 1328 + }, + { + "epoch": 0.07619757474987816, + "grad_norm": 0.0, + "learning_rate": 5.117582265305629e-06, + "loss": 15.5675, + "step": 1329 + }, + { + "epoch": 0.07625490926812488, + "grad_norm": 0.0, + "learning_rate": 5.111394777581769e-06, + "loss": 15.4257, + "step": 1330 + }, + { + "epoch": 0.07631224378637158, + "grad_norm": 0.0, + "learning_rate": 5.105207119178439e-06, + "loss": 15.2455, + "step": 1331 + }, + { + "epoch": 0.0763695783046183, + "grad_norm": 0.0, + "learning_rate": 5.099019299576391e-06, + "loss": 15.3326, + "step": 1332 + }, + { + "epoch": 0.07642691282286501, + "grad_norm": 0.0, + "learning_rate": 5.0928313282566255e-06, + "loss": 15.2436, + "step": 1333 + }, + { + "epoch": 0.07648424734111171, + "grad_norm": 0.0, + "learning_rate": 5.086643214700371e-06, + "loss": 15.4032, + "step": 1334 + }, + { + "epoch": 0.07654158185935843, + "grad_norm": 0.0, + "learning_rate": 5.080454968389078e-06, + "loss": 15.2234, + "step": 1335 + }, + { + "epoch": 0.07659891637760513, + "grad_norm": 0.0, + "learning_rate": 5.074266598804402e-06, + "loss": 15.2235, + "step": 1336 + }, + { + "epoch": 0.07665625089585185, + "grad_norm": 0.0, + "learning_rate": 5.068078115428179e-06, + "loss": 15.2521, + "step": 1337 + }, + { + "epoch": 0.07671358541409856, + "grad_norm": 0.0, + "learning_rate": 5.06188952774243e-06, + "loss": 15.389, + "step": 1338 + }, + { + "epoch": 0.07677091993234526, + "grad_norm": 0.0, + "learning_rate": 5.0557008452293275e-06, + "loss": 15.1074, + "step": 1339 + }, + { + "epoch": 0.07682825445059198, + "grad_norm": 0.0, + "learning_rate": 5.049512077371197e-06, + "loss": 15.0319, + "step": 1340 + }, + { + "epoch": 0.0768855889688387, + "grad_norm": 0.0, + "learning_rate": 5.043323233650485e-06, + "loss": 15.2615, + "step": 1341 + }, + { + "epoch": 0.0769429234870854, + "grad_norm": 0.0, + "learning_rate": 5.037134323549763e-06, + "loss": 15.044, + "step": 1342 + }, + { + "epoch": 0.07700025800533211, + "grad_norm": 0.0, + "learning_rate": 5.030945356551701e-06, + "loss": 15.0863, + "step": 1343 + }, + { + "epoch": 0.07705759252357883, + "grad_norm": 0.0, + "learning_rate": 5.024756342139053e-06, + "loss": 15.4426, + "step": 1344 + }, + { + "epoch": 0.07711492704182553, + "grad_norm": 0.0, + "learning_rate": 5.0185672897946515e-06, + "loss": 15.4382, + "step": 1345 + }, + { + "epoch": 0.07717226156007224, + "grad_norm": 0.0, + "learning_rate": 5.012378209001383e-06, + "loss": 15.4366, + "step": 1346 + }, + { + "epoch": 0.07722959607831895, + "grad_norm": 0.0, + "learning_rate": 5.00618910924218e-06, + "loss": 15.0979, + "step": 1347 + }, + { + "epoch": 0.07728693059656566, + "grad_norm": 0.0, + "learning_rate": 5e-06, + "loss": 15.0105, + "step": 1348 + }, + { + "epoch": 0.07734426511481238, + "grad_norm": 0.0, + "learning_rate": 4.993810890757823e-06, + "loss": 15.3259, + "step": 1349 + }, + { + "epoch": 0.07740159963305908, + "grad_norm": 0.0, + "learning_rate": 4.987621790998619e-06, + "loss": 14.9897, + "step": 1350 + }, + { + "epoch": 0.0774589341513058, + "grad_norm": 0.0, + "learning_rate": 4.981432710205351e-06, + "loss": 15.3484, + "step": 1351 + }, + { + "epoch": 0.07751626866955251, + "grad_norm": 0.0, + "learning_rate": 4.975243657860948e-06, + "loss": 15.2349, + "step": 1352 + }, + { + "epoch": 0.07757360318779921, + "grad_norm": 0.0, + "learning_rate": 4.969054643448302e-06, + "loss": 15.3359, + "step": 1353 + }, + { + "epoch": 0.07763093770604593, + "grad_norm": 0.0, + "learning_rate": 4.962865676450239e-06, + "loss": 15.5123, + "step": 1354 + }, + { + "epoch": 0.07768827222429263, + "grad_norm": 0.0, + "learning_rate": 4.956676766349517e-06, + "loss": 15.1643, + "step": 1355 + }, + { + "epoch": 0.07774560674253934, + "grad_norm": 0.0, + "learning_rate": 4.9504879226288045e-06, + "loss": 15.3605, + "step": 1356 + }, + { + "epoch": 0.07780294126078606, + "grad_norm": 0.0, + "learning_rate": 4.944299154770673e-06, + "loss": 14.9246, + "step": 1357 + }, + { + "epoch": 0.07786027577903276, + "grad_norm": 0.0, + "learning_rate": 4.938110472257572e-06, + "loss": 15.3627, + "step": 1358 + }, + { + "epoch": 0.07791761029727948, + "grad_norm": 0.0, + "learning_rate": 4.931921884571823e-06, + "loss": 15.2038, + "step": 1359 + }, + { + "epoch": 0.07797494481552619, + "grad_norm": 0.0, + "learning_rate": 4.925733401195601e-06, + "loss": 15.4349, + "step": 1360 + }, + { + "epoch": 0.0780322793337729, + "grad_norm": 0.0, + "learning_rate": 4.919545031610925e-06, + "loss": 15.1013, + "step": 1361 + }, + { + "epoch": 0.07808961385201961, + "grad_norm": 0.0, + "learning_rate": 4.913356785299631e-06, + "loss": 15.0782, + "step": 1362 + }, + { + "epoch": 0.07814694837026633, + "grad_norm": 0.0, + "learning_rate": 4.907168671743377e-06, + "loss": 15.248, + "step": 1363 + }, + { + "epoch": 0.07820428288851303, + "grad_norm": 0.0, + "learning_rate": 4.9009807004236105e-06, + "loss": 15.2272, + "step": 1364 + }, + { + "epoch": 0.07826161740675974, + "grad_norm": 0.0, + "learning_rate": 4.894792880821563e-06, + "loss": 15.2808, + "step": 1365 + }, + { + "epoch": 0.07831895192500644, + "grad_norm": 0.0, + "learning_rate": 4.888605222418232e-06, + "loss": 15.2294, + "step": 1366 + }, + { + "epoch": 0.07837628644325316, + "grad_norm": 0.0, + "learning_rate": 4.882417734694372e-06, + "loss": 15.302, + "step": 1367 + }, + { + "epoch": 0.07843362096149988, + "grad_norm": 0.0, + "learning_rate": 4.876230427130472e-06, + "loss": 15.2778, + "step": 1368 + }, + { + "epoch": 0.07849095547974658, + "grad_norm": 0.0, + "learning_rate": 4.8700433092067474e-06, + "loss": 15.2622, + "step": 1369 + }, + { + "epoch": 0.07854828999799329, + "grad_norm": 0.0, + "learning_rate": 4.86385639040312e-06, + "loss": 15.3537, + "step": 1370 + }, + { + "epoch": 0.07860562451624001, + "grad_norm": 0.0, + "learning_rate": 4.8576696801992105e-06, + "loss": 15.1753, + "step": 1371 + }, + { + "epoch": 0.07866295903448671, + "grad_norm": 0.0, + "learning_rate": 4.8514831880743175e-06, + "loss": 15.3063, + "step": 1372 + }, + { + "epoch": 0.07872029355273343, + "grad_norm": 0.0, + "learning_rate": 4.845296923507406e-06, + "loss": 15.5033, + "step": 1373 + }, + { + "epoch": 0.07877762807098013, + "grad_norm": 0.0, + "learning_rate": 4.839110895977092e-06, + "loss": 15.3774, + "step": 1374 + }, + { + "epoch": 0.07883496258922684, + "grad_norm": 0.0, + "learning_rate": 4.832925114961629e-06, + "loss": 14.7429, + "step": 1375 + }, + { + "epoch": 0.07889229710747356, + "grad_norm": 0.0, + "learning_rate": 4.8267395899388905e-06, + "loss": 15.3443, + "step": 1376 + }, + { + "epoch": 0.07894963162572026, + "grad_norm": 0.0, + "learning_rate": 4.820554330386363e-06, + "loss": 15.1586, + "step": 1377 + }, + { + "epoch": 0.07900696614396698, + "grad_norm": 0.0, + "learning_rate": 4.814369345781121e-06, + "loss": 15.1614, + "step": 1378 + }, + { + "epoch": 0.07906430066221369, + "grad_norm": 0.0, + "learning_rate": 4.8081846455998205e-06, + "loss": 15.2101, + "step": 1379 + }, + { + "epoch": 0.07912163518046039, + "grad_norm": 0.0, + "learning_rate": 4.802000239318678e-06, + "loss": 15.6754, + "step": 1380 + }, + { + "epoch": 0.07917896969870711, + "grad_norm": 0.0, + "learning_rate": 4.795816136413467e-06, + "loss": 15.2554, + "step": 1381 + }, + { + "epoch": 0.07923630421695382, + "grad_norm": 0.0, + "learning_rate": 4.789632346359489e-06, + "loss": 15.013, + "step": 1382 + }, + { + "epoch": 0.07929363873520053, + "grad_norm": 0.0, + "learning_rate": 4.78344887863157e-06, + "loss": 15.389, + "step": 1383 + }, + { + "epoch": 0.07935097325344724, + "grad_norm": 0.0, + "learning_rate": 4.777265742704039e-06, + "loss": 15.3423, + "step": 1384 + }, + { + "epoch": 0.07940830777169394, + "grad_norm": 0.0, + "learning_rate": 4.771082948050722e-06, + "loss": 15.0646, + "step": 1385 + }, + { + "epoch": 0.07946564228994066, + "grad_norm": 0.0, + "learning_rate": 4.764900504144915e-06, + "loss": 15.0467, + "step": 1386 + }, + { + "epoch": 0.07952297680818737, + "grad_norm": 0.0, + "learning_rate": 4.758718420459383e-06, + "loss": 15.2004, + "step": 1387 + }, + { + "epoch": 0.07958031132643408, + "grad_norm": 0.0, + "learning_rate": 4.7525367064663355e-06, + "loss": 15.5153, + "step": 1388 + }, + { + "epoch": 0.07963764584468079, + "grad_norm": 0.0, + "learning_rate": 4.746355371637416e-06, + "loss": 15.1112, + "step": 1389 + }, + { + "epoch": 0.0796949803629275, + "grad_norm": 0.0, + "learning_rate": 4.740174425443687e-06, + "loss": 15.2085, + "step": 1390 + }, + { + "epoch": 0.07975231488117421, + "grad_norm": 0.0, + "learning_rate": 4.733993877355618e-06, + "loss": 15.4787, + "step": 1391 + }, + { + "epoch": 0.07980964939942092, + "grad_norm": 0.0, + "learning_rate": 4.7278137368430635e-06, + "loss": 15.3414, + "step": 1392 + }, + { + "epoch": 0.07986698391766762, + "grad_norm": 0.0, + "learning_rate": 4.7216340133752604e-06, + "loss": 15.0015, + "step": 1393 + }, + { + "epoch": 0.07992431843591434, + "grad_norm": 0.0, + "learning_rate": 4.715454716420798e-06, + "loss": 15.4287, + "step": 1394 + }, + { + "epoch": 0.07998165295416106, + "grad_norm": 0.0, + "learning_rate": 4.7092758554476215e-06, + "loss": 15.2161, + "step": 1395 + }, + { + "epoch": 0.08003898747240776, + "grad_norm": 0.0, + "learning_rate": 4.703097439923e-06, + "loss": 14.9452, + "step": 1396 + }, + { + "epoch": 0.08009632199065447, + "grad_norm": 0.0, + "learning_rate": 4.696919479313527e-06, + "loss": 15.1395, + "step": 1397 + }, + { + "epoch": 0.08015365650890119, + "grad_norm": 0.0, + "learning_rate": 4.6907419830850906e-06, + "loss": 15.1513, + "step": 1398 + }, + { + "epoch": 0.08021099102714789, + "grad_norm": 0.0, + "learning_rate": 4.684564960702877e-06, + "loss": 15.5572, + "step": 1399 + }, + { + "epoch": 0.0802683255453946, + "grad_norm": 0.0, + "learning_rate": 4.678388421631337e-06, + "loss": 15.3592, + "step": 1400 + }, + { + "epoch": 0.08032566006364132, + "grad_norm": 0.0, + "learning_rate": 4.67221237533419e-06, + "loss": 15.0252, + "step": 1401 + }, + { + "epoch": 0.08038299458188802, + "grad_norm": 0.0, + "learning_rate": 4.666036831274392e-06, + "loss": 15.476, + "step": 1402 + }, + { + "epoch": 0.08044032910013474, + "grad_norm": 0.0, + "learning_rate": 4.659861798914138e-06, + "loss": 15.1597, + "step": 1403 + }, + { + "epoch": 0.08049766361838144, + "grad_norm": 0.0, + "learning_rate": 4.653687287714828e-06, + "loss": 15.3732, + "step": 1404 + }, + { + "epoch": 0.08055499813662816, + "grad_norm": 0.0, + "learning_rate": 4.647513307137076e-06, + "loss": 15.5174, + "step": 1405 + }, + { + "epoch": 0.08061233265487487, + "grad_norm": 0.0, + "learning_rate": 4.641339866640672e-06, + "loss": 15.0124, + "step": 1406 + }, + { + "epoch": 0.08066966717312157, + "grad_norm": 0.0, + "learning_rate": 4.635166975684587e-06, + "loss": 15.1956, + "step": 1407 + }, + { + "epoch": 0.08072700169136829, + "grad_norm": 0.0, + "learning_rate": 4.628994643726942e-06, + "loss": 15.4477, + "step": 1408 + }, + { + "epoch": 0.080784336209615, + "grad_norm": 0.0, + "learning_rate": 4.622822880225012e-06, + "loss": 15.0185, + "step": 1409 + }, + { + "epoch": 0.0808416707278617, + "grad_norm": 0.0, + "learning_rate": 4.616651694635186e-06, + "loss": 14.8942, + "step": 1410 + }, + { + "epoch": 0.08089900524610842, + "grad_norm": 0.0, + "learning_rate": 4.610481096412985e-06, + "loss": 15.2555, + "step": 1411 + }, + { + "epoch": 0.08095633976435514, + "grad_norm": 0.0, + "learning_rate": 4.604311095013015e-06, + "loss": 15.2582, + "step": 1412 + }, + { + "epoch": 0.08101367428260184, + "grad_norm": 0.0, + "learning_rate": 4.598141699888978e-06, + "loss": 15.1778, + "step": 1413 + }, + { + "epoch": 0.08107100880084855, + "grad_norm": 0.0, + "learning_rate": 4.591972920493638e-06, + "loss": 15.8129, + "step": 1414 + }, + { + "epoch": 0.08112834331909526, + "grad_norm": 0.0, + "learning_rate": 4.585804766278827e-06, + "loss": 15.2853, + "step": 1415 + }, + { + "epoch": 0.08118567783734197, + "grad_norm": 0.0, + "learning_rate": 4.579637246695407e-06, + "loss": 15.2172, + "step": 1416 + }, + { + "epoch": 0.08124301235558869, + "grad_norm": 0.0, + "learning_rate": 4.573470371193277e-06, + "loss": 15.5005, + "step": 1417 + }, + { + "epoch": 0.08130034687383539, + "grad_norm": 0.0, + "learning_rate": 4.5673041492213416e-06, + "loss": 15.1312, + "step": 1418 + }, + { + "epoch": 0.0813576813920821, + "grad_norm": 0.0, + "learning_rate": 4.561138590227512e-06, + "loss": 15.2109, + "step": 1419 + }, + { + "epoch": 0.08141501591032882, + "grad_norm": 0.0, + "learning_rate": 4.554973703658676e-06, + "loss": 15.4162, + "step": 1420 + }, + { + "epoch": 0.08147235042857552, + "grad_norm": 0.0, + "learning_rate": 4.548809498960697e-06, + "loss": 14.7716, + "step": 1421 + }, + { + "epoch": 0.08152968494682224, + "grad_norm": 0.0, + "learning_rate": 4.542645985578389e-06, + "loss": 14.9493, + "step": 1422 + }, + { + "epoch": 0.08158701946506894, + "grad_norm": 0.0, + "learning_rate": 4.53648317295551e-06, + "loss": 15.0915, + "step": 1423 + }, + { + "epoch": 0.08164435398331565, + "grad_norm": 0.0, + "learning_rate": 4.53032107053474e-06, + "loss": 15.0074, + "step": 1424 + }, + { + "epoch": 0.08170168850156237, + "grad_norm": 0.0, + "learning_rate": 4.52415968775768e-06, + "loss": 15.1501, + "step": 1425 + }, + { + "epoch": 0.08175902301980907, + "grad_norm": 0.0, + "learning_rate": 4.517999034064819e-06, + "loss": 15.3762, + "step": 1426 + }, + { + "epoch": 0.08181635753805579, + "grad_norm": 0.0, + "learning_rate": 4.511839118895532e-06, + "loss": 14.9713, + "step": 1427 + }, + { + "epoch": 0.0818736920563025, + "grad_norm": 0.0, + "learning_rate": 4.5056799516880615e-06, + "loss": 15.0249, + "step": 1428 + }, + { + "epoch": 0.0819310265745492, + "grad_norm": 0.0, + "learning_rate": 4.499521541879508e-06, + "loss": 14.9934, + "step": 1429 + }, + { + "epoch": 0.08198836109279592, + "grad_norm": 0.0, + "learning_rate": 4.493363898905808e-06, + "loss": 15.6353, + "step": 1430 + }, + { + "epoch": 0.08204569561104263, + "grad_norm": 0.0, + "learning_rate": 4.4872070322017235e-06, + "loss": 15.3187, + "step": 1431 + }, + { + "epoch": 0.08210303012928934, + "grad_norm": 0.0, + "learning_rate": 4.4810509512008245e-06, + "loss": 15.0563, + "step": 1432 + }, + { + "epoch": 0.08216036464753605, + "grad_norm": 0.0, + "learning_rate": 4.474895665335484e-06, + "loss": 15.2045, + "step": 1433 + }, + { + "epoch": 0.08221769916578275, + "grad_norm": 0.0, + "learning_rate": 4.4687411840368514e-06, + "loss": 15.3326, + "step": 1434 + }, + { + "epoch": 0.08227503368402947, + "grad_norm": 0.0, + "learning_rate": 4.462587516734845e-06, + "loss": 15.1827, + "step": 1435 + }, + { + "epoch": 0.08233236820227618, + "grad_norm": 0.0, + "learning_rate": 4.456434672858132e-06, + "loss": 15.3744, + "step": 1436 + }, + { + "epoch": 0.08238970272052289, + "grad_norm": 0.0, + "learning_rate": 4.450282661834127e-06, + "loss": 15.1566, + "step": 1437 + }, + { + "epoch": 0.0824470372387696, + "grad_norm": 0.0, + "learning_rate": 4.444131493088956e-06, + "loss": 15.2045, + "step": 1438 + }, + { + "epoch": 0.08250437175701632, + "grad_norm": 0.0, + "learning_rate": 4.437981176047469e-06, + "loss": 15.348, + "step": 1439 + }, + { + "epoch": 0.08256170627526302, + "grad_norm": 0.0, + "learning_rate": 4.431831720133198e-06, + "loss": 15.0272, + "step": 1440 + }, + { + "epoch": 0.08261904079350973, + "grad_norm": 0.0, + "learning_rate": 4.425683134768365e-06, + "loss": 15.2256, + "step": 1441 + }, + { + "epoch": 0.08267637531175644, + "grad_norm": 0.0, + "learning_rate": 4.4195354293738484e-06, + "loss": 15.4462, + "step": 1442 + }, + { + "epoch": 0.08273370983000315, + "grad_norm": 0.0, + "learning_rate": 4.41338861336919e-06, + "loss": 15.2976, + "step": 1443 + }, + { + "epoch": 0.08279104434824987, + "grad_norm": 0.0, + "learning_rate": 4.40724269617256e-06, + "loss": 15.2583, + "step": 1444 + }, + { + "epoch": 0.08284837886649657, + "grad_norm": 0.0, + "learning_rate": 4.401097687200754e-06, + "loss": 14.889, + "step": 1445 + }, + { + "epoch": 0.08290571338474328, + "grad_norm": 0.0, + "learning_rate": 4.394953595869178e-06, + "loss": 15.0715, + "step": 1446 + }, + { + "epoch": 0.08296304790299, + "grad_norm": 0.0, + "learning_rate": 4.388810431591829e-06, + "loss": 15.3028, + "step": 1447 + }, + { + "epoch": 0.0830203824212367, + "grad_norm": 0.0, + "learning_rate": 4.382668203781286e-06, + "loss": 15.3078, + "step": 1448 + }, + { + "epoch": 0.08307771693948342, + "grad_norm": 0.0, + "learning_rate": 4.376526921848688e-06, + "loss": 14.9887, + "step": 1449 + }, + { + "epoch": 0.08313505145773013, + "grad_norm": 0.0, + "learning_rate": 4.3703865952037354e-06, + "loss": 15.148, + "step": 1450 + }, + { + "epoch": 0.08319238597597683, + "grad_norm": 0.0, + "learning_rate": 4.364247233254654e-06, + "loss": 15.2904, + "step": 1451 + }, + { + "epoch": 0.08324972049422355, + "grad_norm": 0.0, + "learning_rate": 4.3581088454081975e-06, + "loss": 14.9469, + "step": 1452 + }, + { + "epoch": 0.08330705501247025, + "grad_norm": 0.0, + "learning_rate": 4.351971441069622e-06, + "loss": 15.0734, + "step": 1453 + }, + { + "epoch": 0.08336438953071697, + "grad_norm": 0.0, + "learning_rate": 4.345835029642685e-06, + "loss": 15.4407, + "step": 1454 + }, + { + "epoch": 0.08342172404896368, + "grad_norm": 0.0, + "learning_rate": 4.339699620529614e-06, + "loss": 15.0554, + "step": 1455 + }, + { + "epoch": 0.08347905856721038, + "grad_norm": 0.0, + "learning_rate": 4.333565223131107e-06, + "loss": 15.0257, + "step": 1456 + }, + { + "epoch": 0.0835363930854571, + "grad_norm": 0.0, + "learning_rate": 4.327431846846306e-06, + "loss": 15.2727, + "step": 1457 + }, + { + "epoch": 0.08359372760370382, + "grad_norm": 0.0, + "learning_rate": 4.321299501072797e-06, + "loss": 15.2608, + "step": 1458 + }, + { + "epoch": 0.08365106212195052, + "grad_norm": 0.0, + "learning_rate": 4.315168195206574e-06, + "loss": 15.1937, + "step": 1459 + }, + { + "epoch": 0.08370839664019723, + "grad_norm": 0.0, + "learning_rate": 4.309037938642053e-06, + "loss": 15.3834, + "step": 1460 + }, + { + "epoch": 0.08376573115844393, + "grad_norm": 0.0, + "learning_rate": 4.30290874077203e-06, + "loss": 15.4123, + "step": 1461 + }, + { + "epoch": 0.08382306567669065, + "grad_norm": 0.0, + "learning_rate": 4.296780610987685e-06, + "loss": 14.9967, + "step": 1462 + }, + { + "epoch": 0.08388040019493737, + "grad_norm": 0.0, + "learning_rate": 4.290653558678559e-06, + "loss": 15.1021, + "step": 1463 + }, + { + "epoch": 0.08393773471318407, + "grad_norm": 0.0, + "learning_rate": 4.284527593232545e-06, + "loss": 15.4646, + "step": 1464 + }, + { + "epoch": 0.08399506923143078, + "grad_norm": 0.0, + "learning_rate": 4.278402724035868e-06, + "loss": 15.189, + "step": 1465 + }, + { + "epoch": 0.0840524037496775, + "grad_norm": 0.0, + "learning_rate": 4.272278960473074e-06, + "loss": 15.1943, + "step": 1466 + }, + { + "epoch": 0.0841097382679242, + "grad_norm": 0.0, + "learning_rate": 4.2661563119270135e-06, + "loss": 15.3133, + "step": 1467 + }, + { + "epoch": 0.08416707278617092, + "grad_norm": 0.0, + "learning_rate": 4.260034787778833e-06, + "loss": 14.9534, + "step": 1468 + }, + { + "epoch": 0.08422440730441763, + "grad_norm": 0.0, + "learning_rate": 4.253914397407952e-06, + "loss": 15.3099, + "step": 1469 + }, + { + "epoch": 0.08428174182266433, + "grad_norm": 0.0, + "learning_rate": 4.247795150192054e-06, + "loss": 14.879, + "step": 1470 + }, + { + "epoch": 0.08433907634091105, + "grad_norm": 0.0, + "learning_rate": 4.241677055507071e-06, + "loss": 14.9269, + "step": 1471 + }, + { + "epoch": 0.08439641085915775, + "grad_norm": 0.0, + "learning_rate": 4.235560122727171e-06, + "loss": 15.1852, + "step": 1472 + }, + { + "epoch": 0.08445374537740447, + "grad_norm": 0.0, + "learning_rate": 4.229444361224734e-06, + "loss": 15.3213, + "step": 1473 + }, + { + "epoch": 0.08451107989565118, + "grad_norm": 0.0, + "learning_rate": 4.223329780370359e-06, + "loss": 15.2615, + "step": 1474 + }, + { + "epoch": 0.08456841441389788, + "grad_norm": 0.0, + "learning_rate": 4.217216389532824e-06, + "loss": 15.2346, + "step": 1475 + }, + { + "epoch": 0.0846257489321446, + "grad_norm": 0.0, + "learning_rate": 4.211104198079089e-06, + "loss": 15.0628, + "step": 1476 + }, + { + "epoch": 0.08468308345039131, + "grad_norm": 0.0, + "learning_rate": 4.2049932153742726e-06, + "loss": 15.0532, + "step": 1477 + }, + { + "epoch": 0.08474041796863802, + "grad_norm": 0.0, + "learning_rate": 4.198883450781649e-06, + "loss": 15.0043, + "step": 1478 + }, + { + "epoch": 0.08479775248688473, + "grad_norm": 0.0, + "learning_rate": 4.192774913662618e-06, + "loss": 15.0565, + "step": 1479 + }, + { + "epoch": 0.08485508700513143, + "grad_norm": 0.0, + "learning_rate": 4.186667613376702e-06, + "loss": 15.3671, + "step": 1480 + }, + { + "epoch": 0.08491242152337815, + "grad_norm": 0.0, + "learning_rate": 4.180561559281526e-06, + "loss": 15.0956, + "step": 1481 + }, + { + "epoch": 0.08496975604162486, + "grad_norm": 0.0, + "learning_rate": 4.174456760732813e-06, + "loss": 15.2333, + "step": 1482 + }, + { + "epoch": 0.08502709055987157, + "grad_norm": 0.0, + "learning_rate": 4.1683532270843505e-06, + "loss": 14.9798, + "step": 1483 + }, + { + "epoch": 0.08508442507811828, + "grad_norm": 0.0, + "learning_rate": 4.162250967687999e-06, + "loss": 15.3353, + "step": 1484 + }, + { + "epoch": 0.085141759596365, + "grad_norm": 0.0, + "learning_rate": 4.1561499918936575e-06, + "loss": 15.0471, + "step": 1485 + }, + { + "epoch": 0.0851990941146117, + "grad_norm": 0.0, + "learning_rate": 4.150050309049267e-06, + "loss": 15.4525, + "step": 1486 + }, + { + "epoch": 0.08525642863285841, + "grad_norm": 0.0, + "learning_rate": 4.143951928500778e-06, + "loss": 15.3145, + "step": 1487 + }, + { + "epoch": 0.08531376315110513, + "grad_norm": 0.0, + "learning_rate": 4.137854859592157e-06, + "loss": 15.0502, + "step": 1488 + }, + { + "epoch": 0.08537109766935183, + "grad_norm": 0.0, + "learning_rate": 4.131759111665349e-06, + "loss": 15.1505, + "step": 1489 + }, + { + "epoch": 0.08542843218759855, + "grad_norm": 0.0, + "learning_rate": 4.125664694060283e-06, + "loss": 15.4517, + "step": 1490 + }, + { + "epoch": 0.08548576670584525, + "grad_norm": 0.0, + "learning_rate": 4.119571616114845e-06, + "loss": 15.42, + "step": 1491 + }, + { + "epoch": 0.08554310122409196, + "grad_norm": 0.0, + "learning_rate": 4.113479887164873e-06, + "loss": 15.2936, + "step": 1492 + }, + { + "epoch": 0.08560043574233868, + "grad_norm": 0.0, + "learning_rate": 4.107389516544134e-06, + "loss": 15.0335, + "step": 1493 + }, + { + "epoch": 0.08565777026058538, + "grad_norm": 0.0, + "learning_rate": 4.101300513584317e-06, + "loss": 14.8875, + "step": 1494 + }, + { + "epoch": 0.0857151047788321, + "grad_norm": 0.0, + "learning_rate": 4.09521288761501e-06, + "loss": 15.1966, + "step": 1495 + }, + { + "epoch": 0.08577243929707881, + "grad_norm": 0.0, + "learning_rate": 4.0891266479637e-06, + "loss": 15.1653, + "step": 1496 + }, + { + "epoch": 0.08582977381532551, + "grad_norm": 0.0, + "learning_rate": 4.083041803955739e-06, + "loss": 15.4002, + "step": 1497 + }, + { + "epoch": 0.08588710833357223, + "grad_norm": 0.0, + "learning_rate": 4.076958364914352e-06, + "loss": 15.1514, + "step": 1498 + }, + { + "epoch": 0.08594444285181893, + "grad_norm": 0.0, + "learning_rate": 4.070876340160601e-06, + "loss": 14.9568, + "step": 1499 + }, + { + "epoch": 0.08600177737006565, + "grad_norm": 0.0, + "learning_rate": 4.064795739013389e-06, + "loss": 15.1858, + "step": 1500 + }, + { + "epoch": 0.08605911188831236, + "grad_norm": 0.0, + "learning_rate": 4.0587165707894326e-06, + "loss": 15.3825, + "step": 1501 + }, + { + "epoch": 0.08611644640655906, + "grad_norm": 0.0, + "learning_rate": 4.0526388448032564e-06, + "loss": 15.4134, + "step": 1502 + }, + { + "epoch": 0.08617378092480578, + "grad_norm": 0.0, + "learning_rate": 4.046562570367172e-06, + "loss": 15.1151, + "step": 1503 + }, + { + "epoch": 0.0862311154430525, + "grad_norm": 0.0, + "learning_rate": 4.04048775679127e-06, + "loss": 15.2341, + "step": 1504 + }, + { + "epoch": 0.0862884499612992, + "grad_norm": 0.0, + "learning_rate": 4.034414413383397e-06, + "loss": 15.0329, + "step": 1505 + }, + { + "epoch": 0.08634578447954591, + "grad_norm": 0.0, + "learning_rate": 4.028342549449157e-06, + "loss": 15.1716, + "step": 1506 + }, + { + "epoch": 0.08640311899779263, + "grad_norm": 0.0, + "learning_rate": 4.022272174291878e-06, + "loss": 15.3237, + "step": 1507 + }, + { + "epoch": 0.08646045351603933, + "grad_norm": 0.0, + "learning_rate": 4.0162032972126105e-06, + "loss": 15.2783, + "step": 1508 + }, + { + "epoch": 0.08651778803428604, + "grad_norm": 0.0, + "learning_rate": 4.010135927510109e-06, + "loss": 15.4047, + "step": 1509 + }, + { + "epoch": 0.08657512255253275, + "grad_norm": 0.0, + "learning_rate": 4.004070074480821e-06, + "loss": 15.3116, + "step": 1510 + }, + { + "epoch": 0.08663245707077946, + "grad_norm": 0.0, + "learning_rate": 3.998005747418862e-06, + "loss": 14.971, + "step": 1511 + }, + { + "epoch": 0.08668979158902618, + "grad_norm": 0.0, + "learning_rate": 3.991942955616023e-06, + "loss": 15.1829, + "step": 1512 + }, + { + "epoch": 0.08674712610727288, + "grad_norm": 0.0, + "learning_rate": 3.985881708361729e-06, + "loss": 15.1694, + "step": 1513 + }, + { + "epoch": 0.0868044606255196, + "grad_norm": 0.0, + "learning_rate": 3.979822014943049e-06, + "loss": 15.276, + "step": 1514 + }, + { + "epoch": 0.08686179514376631, + "grad_norm": 0.0, + "learning_rate": 3.9737638846446605e-06, + "loss": 15.0269, + "step": 1515 + }, + { + "epoch": 0.08691912966201301, + "grad_norm": 0.0, + "learning_rate": 3.967707326748857e-06, + "loss": 15.2615, + "step": 1516 + }, + { + "epoch": 0.08697646418025973, + "grad_norm": 0.0, + "learning_rate": 3.961652350535517e-06, + "loss": 15.2742, + "step": 1517 + }, + { + "epoch": 0.08703379869850644, + "grad_norm": 0.0, + "learning_rate": 3.955598965282096e-06, + "loss": 15.3354, + "step": 1518 + }, + { + "epoch": 0.08709113321675314, + "grad_norm": 0.0, + "learning_rate": 3.94954718026361e-06, + "loss": 15.3394, + "step": 1519 + }, + { + "epoch": 0.08714846773499986, + "grad_norm": 0.0, + "learning_rate": 3.94349700475263e-06, + "loss": 15.1984, + "step": 1520 + }, + { + "epoch": 0.08720580225324656, + "grad_norm": 0.0, + "learning_rate": 3.93744844801925e-06, + "loss": 14.8813, + "step": 1521 + }, + { + "epoch": 0.08726313677149328, + "grad_norm": 0.0, + "learning_rate": 3.931401519331095e-06, + "loss": 15.358, + "step": 1522 + }, + { + "epoch": 0.08732047128973999, + "grad_norm": 0.0, + "learning_rate": 3.9253562279532884e-06, + "loss": 15.1213, + "step": 1523 + }, + { + "epoch": 0.0873778058079867, + "grad_norm": 0.0, + "learning_rate": 3.919312583148449e-06, + "loss": 15.029, + "step": 1524 + }, + { + "epoch": 0.08743514032623341, + "grad_norm": 0.0, + "learning_rate": 3.913270594176665e-06, + "loss": 14.8806, + "step": 1525 + }, + { + "epoch": 0.08749247484448013, + "grad_norm": 0.0, + "learning_rate": 3.907230270295499e-06, + "loss": 15.3686, + "step": 1526 + }, + { + "epoch": 0.08754980936272683, + "grad_norm": 0.0, + "learning_rate": 3.901191620759954e-06, + "loss": 15.0427, + "step": 1527 + }, + { + "epoch": 0.08760714388097354, + "grad_norm": 0.0, + "learning_rate": 3.895154654822471e-06, + "loss": 15.1566, + "step": 1528 + }, + { + "epoch": 0.08766447839922024, + "grad_norm": 0.0, + "learning_rate": 3.8891193817329085e-06, + "loss": 15.1766, + "step": 1529 + }, + { + "epoch": 0.08772181291746696, + "grad_norm": 0.0, + "learning_rate": 3.883085810738536e-06, + "loss": 15.1378, + "step": 1530 + }, + { + "epoch": 0.08777914743571368, + "grad_norm": 0.0, + "learning_rate": 3.87705395108401e-06, + "loss": 15.2077, + "step": 1531 + }, + { + "epoch": 0.08783648195396038, + "grad_norm": 0.0, + "learning_rate": 3.8710238120113675e-06, + "loss": 15.1772, + "step": 1532 + }, + { + "epoch": 0.08789381647220709, + "grad_norm": 0.0, + "learning_rate": 3.864995402760009e-06, + "loss": 15.4355, + "step": 1533 + }, + { + "epoch": 0.08795115099045381, + "grad_norm": 0.0, + "learning_rate": 3.858968732566685e-06, + "loss": 15.0356, + "step": 1534 + }, + { + "epoch": 0.08800848550870051, + "grad_norm": 0.0, + "learning_rate": 3.8529438106654785e-06, + "loss": 15.1937, + "step": 1535 + }, + { + "epoch": 0.08806582002694723, + "grad_norm": 0.0, + "learning_rate": 3.8469206462878e-06, + "loss": 15.0776, + "step": 1536 + }, + { + "epoch": 0.08812315454519394, + "grad_norm": 0.0, + "learning_rate": 3.840899248662358e-06, + "loss": 15.2202, + "step": 1537 + }, + { + "epoch": 0.08818048906344064, + "grad_norm": 0.0, + "learning_rate": 3.834879627015165e-06, + "loss": 15.2026, + "step": 1538 + }, + { + "epoch": 0.08823782358168736, + "grad_norm": 0.0, + "learning_rate": 3.8288617905695005e-06, + "loss": 15.211, + "step": 1539 + }, + { + "epoch": 0.08829515809993406, + "grad_norm": 0.0, + "learning_rate": 3.822845748545919e-06, + "loss": 15.3265, + "step": 1540 + }, + { + "epoch": 0.08835249261818078, + "grad_norm": 0.0, + "learning_rate": 3.816831510162219e-06, + "loss": 15.3084, + "step": 1541 + }, + { + "epoch": 0.08840982713642749, + "grad_norm": 0.0, + "learning_rate": 3.8108190846334402e-06, + "loss": 15.1557, + "step": 1542 + }, + { + "epoch": 0.08846716165467419, + "grad_norm": 0.0, + "learning_rate": 3.8048084811718377e-06, + "loss": 15.0298, + "step": 1543 + }, + { + "epoch": 0.08852449617292091, + "grad_norm": 0.0, + "learning_rate": 3.798799708986885e-06, + "loss": 15.454, + "step": 1544 + }, + { + "epoch": 0.08858183069116762, + "grad_norm": 0.0, + "learning_rate": 3.7927927772852395e-06, + "loss": 15.0659, + "step": 1545 + }, + { + "epoch": 0.08863916520941433, + "grad_norm": 0.0, + "learning_rate": 3.786787695270743e-06, + "loss": 15.1532, + "step": 1546 + }, + { + "epoch": 0.08869649972766104, + "grad_norm": 0.0, + "learning_rate": 3.7807844721444063e-06, + "loss": 15.2533, + "step": 1547 + }, + { + "epoch": 0.08875383424590774, + "grad_norm": 0.0, + "learning_rate": 3.7747831171043865e-06, + "loss": 14.971, + "step": 1548 + }, + { + "epoch": 0.08881116876415446, + "grad_norm": 0.0, + "learning_rate": 3.7687836393459828e-06, + "loss": 15.3314, + "step": 1549 + }, + { + "epoch": 0.08886850328240117, + "grad_norm": 0.0, + "learning_rate": 3.7627860480616128e-06, + "loss": 15.2095, + "step": 1550 + }, + { + "epoch": 0.08892583780064788, + "grad_norm": 0.0, + "learning_rate": 3.756790352440811e-06, + "loss": 14.8912, + "step": 1551 + }, + { + "epoch": 0.08898317231889459, + "grad_norm": 0.0, + "learning_rate": 3.7507965616702015e-06, + "loss": 15.3292, + "step": 1552 + }, + { + "epoch": 0.0890405068371413, + "grad_norm": 0.0, + "learning_rate": 3.744804684933492e-06, + "loss": 15.1989, + "step": 1553 + }, + { + "epoch": 0.08909784135538801, + "grad_norm": 0.0, + "learning_rate": 3.7388147314114554e-06, + "loss": 15.6162, + "step": 1554 + }, + { + "epoch": 0.08915517587363472, + "grad_norm": 0.0, + "learning_rate": 3.732826710281923e-06, + "loss": 14.9143, + "step": 1555 + }, + { + "epoch": 0.08921251039188144, + "grad_norm": 0.0, + "learning_rate": 3.7268406307197568e-06, + "loss": 14.7441, + "step": 1556 + }, + { + "epoch": 0.08926984491012814, + "grad_norm": 0.0, + "learning_rate": 3.7208565018968545e-06, + "loss": 14.9323, + "step": 1557 + }, + { + "epoch": 0.08932717942837486, + "grad_norm": 0.0, + "learning_rate": 3.7148743329821146e-06, + "loss": 15.1762, + "step": 1558 + }, + { + "epoch": 0.08938451394662156, + "grad_norm": 0.0, + "learning_rate": 3.7088941331414418e-06, + "loss": 15.7279, + "step": 1559 + }, + { + "epoch": 0.08944184846486827, + "grad_norm": 0.0, + "learning_rate": 3.702915911537714e-06, + "loss": 15.1863, + "step": 1560 + }, + { + "epoch": 0.08949918298311499, + "grad_norm": 0.0, + "learning_rate": 3.6969396773307888e-06, + "loss": 15.29, + "step": 1561 + }, + { + "epoch": 0.08955651750136169, + "grad_norm": 0.0, + "learning_rate": 3.6909654396774684e-06, + "loss": 14.9259, + "step": 1562 + }, + { + "epoch": 0.0896138520196084, + "grad_norm": 0.0, + "learning_rate": 3.684993207731505e-06, + "loss": 14.9451, + "step": 1563 + }, + { + "epoch": 0.08967118653785512, + "grad_norm": 0.0, + "learning_rate": 3.6790229906435706e-06, + "loss": 15.2632, + "step": 1564 + }, + { + "epoch": 0.08972852105610182, + "grad_norm": 0.0, + "learning_rate": 3.673054797561254e-06, + "loss": 15.0482, + "step": 1565 + }, + { + "epoch": 0.08978585557434854, + "grad_norm": 0.0, + "learning_rate": 3.667088637629041e-06, + "loss": 14.9746, + "step": 1566 + }, + { + "epoch": 0.08984319009259524, + "grad_norm": 0.0, + "learning_rate": 3.6611245199883037e-06, + "loss": 15.5239, + "step": 1567 + }, + { + "epoch": 0.08990052461084196, + "grad_norm": 0.0, + "learning_rate": 3.6551624537772834e-06, + "loss": 15.5796, + "step": 1568 + }, + { + "epoch": 0.08995785912908867, + "grad_norm": 0.0, + "learning_rate": 3.6492024481310793e-06, + "loss": 15.051, + "step": 1569 + }, + { + "epoch": 0.09001519364733537, + "grad_norm": 0.0, + "learning_rate": 3.6432445121816308e-06, + "loss": 15.3199, + "step": 1570 + }, + { + "epoch": 0.09007252816558209, + "grad_norm": 0.0, + "learning_rate": 3.6372886550577125e-06, + "loss": 15.3081, + "step": 1571 + }, + { + "epoch": 0.0901298626838288, + "grad_norm": 0.0, + "learning_rate": 3.6313348858849064e-06, + "loss": 15.1435, + "step": 1572 + }, + { + "epoch": 0.0901871972020755, + "grad_norm": 0.0, + "learning_rate": 3.6253832137856e-06, + "loss": 14.6087, + "step": 1573 + }, + { + "epoch": 0.09024453172032222, + "grad_norm": 0.0, + "learning_rate": 3.6194336478789638e-06, + "loss": 15.2436, + "step": 1574 + }, + { + "epoch": 0.09030186623856894, + "grad_norm": 0.0, + "learning_rate": 3.6134861972809477e-06, + "loss": 15.3815, + "step": 1575 + }, + { + "epoch": 0.09035920075681564, + "grad_norm": 0.0, + "learning_rate": 3.6075408711042536e-06, + "loss": 15.3305, + "step": 1576 + }, + { + "epoch": 0.09041653527506235, + "grad_norm": 0.0, + "learning_rate": 3.6015976784583327e-06, + "loss": 14.9759, + "step": 1577 + }, + { + "epoch": 0.09047386979330906, + "grad_norm": 0.0, + "learning_rate": 3.595656628449362e-06, + "loss": 15.1466, + "step": 1578 + }, + { + "epoch": 0.09053120431155577, + "grad_norm": 0.0, + "learning_rate": 3.5897177301802455e-06, + "loss": 15.0655, + "step": 1579 + }, + { + "epoch": 0.09058853882980249, + "grad_norm": 0.0, + "learning_rate": 3.5837809927505783e-06, + "loss": 15.1449, + "step": 1580 + }, + { + "epoch": 0.09064587334804919, + "grad_norm": 0.0, + "learning_rate": 3.5778464252566536e-06, + "loss": 15.1359, + "step": 1581 + }, + { + "epoch": 0.0907032078662959, + "grad_norm": 0.0, + "learning_rate": 3.571914036791435e-06, + "loss": 15.5221, + "step": 1582 + }, + { + "epoch": 0.09076054238454262, + "grad_norm": 0.0, + "learning_rate": 3.5659838364445505e-06, + "loss": 14.8403, + "step": 1583 + }, + { + "epoch": 0.09081787690278932, + "grad_norm": 0.0, + "learning_rate": 3.5600558333022707e-06, + "loss": 15.2719, + "step": 1584 + }, + { + "epoch": 0.09087521142103604, + "grad_norm": 0.0, + "learning_rate": 3.5541300364475067e-06, + "loss": 14.9916, + "step": 1585 + }, + { + "epoch": 0.09093254593928274, + "grad_norm": 0.0, + "learning_rate": 3.548206454959783e-06, + "loss": 14.9182, + "step": 1586 + }, + { + "epoch": 0.09098988045752945, + "grad_norm": 0.0, + "learning_rate": 3.5422850979152335e-06, + "loss": 15.0797, + "step": 1587 + }, + { + "epoch": 0.09104721497577617, + "grad_norm": 0.0, + "learning_rate": 3.5363659743865797e-06, + "loss": 15.3647, + "step": 1588 + }, + { + "epoch": 0.09110454949402287, + "grad_norm": 0.0, + "learning_rate": 3.5304490934431268e-06, + "loss": 15.2592, + "step": 1589 + }, + { + "epoch": 0.09116188401226959, + "grad_norm": 0.0, + "learning_rate": 3.5245344641507384e-06, + "loss": 15.2267, + "step": 1590 + }, + { + "epoch": 0.0912192185305163, + "grad_norm": 0.0, + "learning_rate": 3.518622095571831e-06, + "loss": 15.1416, + "step": 1591 + }, + { + "epoch": 0.091276553048763, + "grad_norm": 0.0, + "learning_rate": 3.512711996765355e-06, + "loss": 15.4173, + "step": 1592 + }, + { + "epoch": 0.09133388756700972, + "grad_norm": 0.0, + "learning_rate": 3.506804176786789e-06, + "loss": 15.1714, + "step": 1593 + }, + { + "epoch": 0.09139122208525643, + "grad_norm": 0.0, + "learning_rate": 3.5008986446881088e-06, + "loss": 15.3202, + "step": 1594 + }, + { + "epoch": 0.09144855660350314, + "grad_norm": 0.0, + "learning_rate": 3.4949954095177986e-06, + "loss": 15.217, + "step": 1595 + }, + { + "epoch": 0.09150589112174985, + "grad_norm": 0.0, + "learning_rate": 3.4890944803208104e-06, + "loss": 14.9886, + "step": 1596 + }, + { + "epoch": 0.09156322563999655, + "grad_norm": 0.0, + "learning_rate": 3.4831958661385716e-06, + "loss": 14.7912, + "step": 1597 + }, + { + "epoch": 0.09162056015824327, + "grad_norm": 0.0, + "learning_rate": 3.4772995760089573e-06, + "loss": 14.9861, + "step": 1598 + }, + { + "epoch": 0.09167789467648998, + "grad_norm": 0.0, + "learning_rate": 3.4714056189662877e-06, + "loss": 15.3865, + "step": 1599 + }, + { + "epoch": 0.09173522919473669, + "grad_norm": 0.0, + "learning_rate": 3.465514004041301e-06, + "loss": 14.9974, + "step": 1600 + }, + { + "epoch": 0.0917925637129834, + "grad_norm": 0.0, + "learning_rate": 3.459624740261153e-06, + "loss": 14.9746, + "step": 1601 + }, + { + "epoch": 0.09184989823123012, + "grad_norm": 0.0, + "learning_rate": 3.45373783664939e-06, + "loss": 14.9292, + "step": 1602 + }, + { + "epoch": 0.09190723274947682, + "grad_norm": 0.0, + "learning_rate": 3.4478533022259527e-06, + "loss": 15.2711, + "step": 1603 + }, + { + "epoch": 0.09196456726772353, + "grad_norm": 0.0, + "learning_rate": 3.4419711460071405e-06, + "loss": 15.109, + "step": 1604 + }, + { + "epoch": 0.09202190178597024, + "grad_norm": 0.0, + "learning_rate": 3.4360913770056166e-06, + "loss": 15.533, + "step": 1605 + }, + { + "epoch": 0.09207923630421695, + "grad_norm": 0.0, + "learning_rate": 3.4302140042303813e-06, + "loss": 15.1691, + "step": 1606 + }, + { + "epoch": 0.09213657082246367, + "grad_norm": 0.0, + "learning_rate": 3.424339036686768e-06, + "loss": 14.9606, + "step": 1607 + }, + { + "epoch": 0.09219390534071037, + "grad_norm": 0.0, + "learning_rate": 3.41846648337642e-06, + "loss": 15.1092, + "step": 1608 + }, + { + "epoch": 0.09225123985895708, + "grad_norm": 0.0, + "learning_rate": 3.4125963532972878e-06, + "loss": 14.8195, + "step": 1609 + }, + { + "epoch": 0.0923085743772038, + "grad_norm": 0.0, + "learning_rate": 3.4067286554436024e-06, + "loss": 14.756, + "step": 1610 + }, + { + "epoch": 0.0923659088954505, + "grad_norm": 0.0, + "learning_rate": 3.400863398805873e-06, + "loss": 15.2289, + "step": 1611 + }, + { + "epoch": 0.09242324341369722, + "grad_norm": 0.0, + "learning_rate": 3.395000592370864e-06, + "loss": 15.1732, + "step": 1612 + }, + { + "epoch": 0.09248057793194393, + "grad_norm": 0.0, + "learning_rate": 3.389140245121591e-06, + "loss": 15.2813, + "step": 1613 + }, + { + "epoch": 0.09253791245019063, + "grad_norm": 0.0, + "learning_rate": 3.383282366037296e-06, + "loss": 15.011, + "step": 1614 + }, + { + "epoch": 0.09259524696843735, + "grad_norm": 0.0, + "learning_rate": 3.3774269640934447e-06, + "loss": 14.8524, + "step": 1615 + }, + { + "epoch": 0.09265258148668405, + "grad_norm": 0.0, + "learning_rate": 3.371574048261701e-06, + "loss": 15.4559, + "step": 1616 + }, + { + "epoch": 0.09270991600493077, + "grad_norm": 0.0, + "learning_rate": 3.3657236275099275e-06, + "loss": 15.3174, + "step": 1617 + }, + { + "epoch": 0.09276725052317748, + "grad_norm": 0.0, + "learning_rate": 3.3598757108021546e-06, + "loss": 15.1581, + "step": 1618 + }, + { + "epoch": 0.09282458504142418, + "grad_norm": 0.0, + "learning_rate": 3.354030307098585e-06, + "loss": 15.4304, + "step": 1619 + }, + { + "epoch": 0.0928819195596709, + "grad_norm": 0.0, + "learning_rate": 3.348187425355564e-06, + "loss": 15.2791, + "step": 1620 + }, + { + "epoch": 0.09293925407791762, + "grad_norm": 0.0, + "learning_rate": 3.342347074525578e-06, + "loss": 15.3398, + "step": 1621 + }, + { + "epoch": 0.09299658859616432, + "grad_norm": 0.0, + "learning_rate": 3.3365092635572295e-06, + "loss": 14.9245, + "step": 1622 + }, + { + "epoch": 0.09305392311441103, + "grad_norm": 0.0, + "learning_rate": 3.3306740013952368e-06, + "loss": 15.1071, + "step": 1623 + }, + { + "epoch": 0.09311125763265775, + "grad_norm": 0.0, + "learning_rate": 3.3248412969804065e-06, + "loss": 15.2702, + "step": 1624 + }, + { + "epoch": 0.09316859215090445, + "grad_norm": 0.0, + "learning_rate": 3.319011159249631e-06, + "loss": 14.9664, + "step": 1625 + }, + { + "epoch": 0.09322592666915117, + "grad_norm": 0.0, + "learning_rate": 3.313183597135865e-06, + "loss": 15.1732, + "step": 1626 + }, + { + "epoch": 0.09328326118739787, + "grad_norm": 0.0, + "learning_rate": 3.307358619568123e-06, + "loss": 15.2397, + "step": 1627 + }, + { + "epoch": 0.09334059570564458, + "grad_norm": 0.0, + "learning_rate": 3.301536235471453e-06, + "loss": 15.1465, + "step": 1628 + }, + { + "epoch": 0.0933979302238913, + "grad_norm": 0.0, + "learning_rate": 3.295716453766935e-06, + "loss": 15.2098, + "step": 1629 + }, + { + "epoch": 0.093455264742138, + "grad_norm": 0.0, + "learning_rate": 3.289899283371657e-06, + "loss": 15.2483, + "step": 1630 + }, + { + "epoch": 0.09351259926038472, + "grad_norm": 0.0, + "learning_rate": 3.2840847331987093e-06, + "loss": 15.0997, + "step": 1631 + }, + { + "epoch": 0.09356993377863143, + "grad_norm": 0.0, + "learning_rate": 3.2782728121571632e-06, + "loss": 15.2503, + "step": 1632 + }, + { + "epoch": 0.09362726829687813, + "grad_norm": 0.0, + "learning_rate": 3.2724635291520697e-06, + "loss": 15.3095, + "step": 1633 + }, + { + "epoch": 0.09368460281512485, + "grad_norm": 0.0, + "learning_rate": 3.266656893084428e-06, + "loss": 15.4218, + "step": 1634 + }, + { + "epoch": 0.09374193733337155, + "grad_norm": 0.0, + "learning_rate": 3.2608529128511896e-06, + "loss": 15.0612, + "step": 1635 + }, + { + "epoch": 0.09379927185161827, + "grad_norm": 0.0, + "learning_rate": 3.2550515973452295e-06, + "loss": 15.3992, + "step": 1636 + }, + { + "epoch": 0.09385660636986498, + "grad_norm": 0.0, + "learning_rate": 3.2492529554553485e-06, + "loss": 15.1745, + "step": 1637 + }, + { + "epoch": 0.09391394088811168, + "grad_norm": 0.0, + "learning_rate": 3.243456996066242e-06, + "loss": 14.9587, + "step": 1638 + }, + { + "epoch": 0.0939712754063584, + "grad_norm": 0.0, + "learning_rate": 3.2376637280585025e-06, + "loss": 15.0485, + "step": 1639 + }, + { + "epoch": 0.09402860992460511, + "grad_norm": 0.0, + "learning_rate": 3.2318731603085923e-06, + "loss": 15.0185, + "step": 1640 + }, + { + "epoch": 0.09408594444285182, + "grad_norm": 0.0, + "learning_rate": 3.2260853016888443e-06, + "loss": 15.2848, + "step": 1641 + }, + { + "epoch": 0.09414327896109853, + "grad_norm": 0.0, + "learning_rate": 3.2203001610674322e-06, + "loss": 15.0875, + "step": 1642 + }, + { + "epoch": 0.09420061347934525, + "grad_norm": 0.0, + "learning_rate": 3.214517747308368e-06, + "loss": 15.0593, + "step": 1643 + }, + { + "epoch": 0.09425794799759195, + "grad_norm": 0.0, + "learning_rate": 3.2087380692714887e-06, + "loss": 15.1293, + "step": 1644 + }, + { + "epoch": 0.09431528251583866, + "grad_norm": 0.0, + "learning_rate": 3.202961135812437e-06, + "loss": 14.9762, + "step": 1645 + }, + { + "epoch": 0.09437261703408537, + "grad_norm": 0.0, + "learning_rate": 3.1971869557826507e-06, + "loss": 15.3738, + "step": 1646 + }, + { + "epoch": 0.09442995155233208, + "grad_norm": 0.0, + "learning_rate": 3.191415538029346e-06, + "loss": 15.1781, + "step": 1647 + }, + { + "epoch": 0.0944872860705788, + "grad_norm": 0.0, + "learning_rate": 3.185646891395514e-06, + "loss": 15.2245, + "step": 1648 + }, + { + "epoch": 0.0945446205888255, + "grad_norm": 0.0, + "learning_rate": 3.1798810247198925e-06, + "loss": 15.0903, + "step": 1649 + }, + { + "epoch": 0.09460195510707221, + "grad_norm": 0.0, + "learning_rate": 3.174117946836964e-06, + "loss": 15.0486, + "step": 1650 + }, + { + "epoch": 0.09465928962531893, + "grad_norm": 0.0, + "learning_rate": 3.1683576665769344e-06, + "loss": 15.0967, + "step": 1651 + }, + { + "epoch": 0.09471662414356563, + "grad_norm": 0.0, + "learning_rate": 3.1626001927657287e-06, + "loss": 15.1772, + "step": 1652 + }, + { + "epoch": 0.09477395866181235, + "grad_norm": 0.0, + "learning_rate": 3.1568455342249654e-06, + "loss": 15.0888, + "step": 1653 + }, + { + "epoch": 0.09483129318005905, + "grad_norm": 0.0, + "learning_rate": 3.1510936997719557e-06, + "loss": 15.2379, + "step": 1654 + }, + { + "epoch": 0.09488862769830576, + "grad_norm": 0.0, + "learning_rate": 3.145344698219677e-06, + "loss": 14.9315, + "step": 1655 + }, + { + "epoch": 0.09494596221655248, + "grad_norm": 0.0, + "learning_rate": 3.1395985383767734e-06, + "loss": 15.681, + "step": 1656 + }, + { + "epoch": 0.09500329673479918, + "grad_norm": 0.0, + "learning_rate": 3.1338552290475265e-06, + "loss": 15.0557, + "step": 1657 + }, + { + "epoch": 0.0950606312530459, + "grad_norm": 0.0, + "learning_rate": 3.12811477903186e-06, + "loss": 15.0777, + "step": 1658 + }, + { + "epoch": 0.09511796577129261, + "grad_norm": 0.0, + "learning_rate": 3.1223771971253093e-06, + "loss": 15.2504, + "step": 1659 + }, + { + "epoch": 0.09517530028953931, + "grad_norm": 0.0, + "learning_rate": 3.1166424921190174e-06, + "loss": 15.0185, + "step": 1660 + }, + { + "epoch": 0.09523263480778603, + "grad_norm": 0.0, + "learning_rate": 3.1109106727997184e-06, + "loss": 15.0898, + "step": 1661 + }, + { + "epoch": 0.09528996932603274, + "grad_norm": 0.0, + "learning_rate": 3.1051817479497297e-06, + "loss": 15.0596, + "step": 1662 + }, + { + "epoch": 0.09534730384427945, + "grad_norm": 0.0, + "learning_rate": 3.0994557263469267e-06, + "loss": 15.1607, + "step": 1663 + }, + { + "epoch": 0.09540463836252616, + "grad_norm": 0.0, + "learning_rate": 3.093732616764742e-06, + "loss": 15.1243, + "step": 1664 + }, + { + "epoch": 0.09546197288077286, + "grad_norm": 0.0, + "learning_rate": 3.0880124279721408e-06, + "loss": 15.0445, + "step": 1665 + }, + { + "epoch": 0.09551930739901958, + "grad_norm": 0.0, + "learning_rate": 3.0822951687336215e-06, + "loss": 14.8608, + "step": 1666 + }, + { + "epoch": 0.0955766419172663, + "grad_norm": 0.0, + "learning_rate": 3.076580847809184e-06, + "loss": 15.4273, + "step": 1667 + }, + { + "epoch": 0.095633976435513, + "grad_norm": 0.0, + "learning_rate": 3.0708694739543345e-06, + "loss": 15.1485, + "step": 1668 + }, + { + "epoch": 0.09569131095375971, + "grad_norm": 0.0, + "learning_rate": 3.065161055920057e-06, + "loss": 15.4583, + "step": 1669 + }, + { + "epoch": 0.09574864547200643, + "grad_norm": 0.0, + "learning_rate": 3.0594556024528134e-06, + "loss": 14.7834, + "step": 1670 + }, + { + "epoch": 0.09580597999025313, + "grad_norm": 0.0, + "learning_rate": 3.053753122294515e-06, + "loss": 14.9889, + "step": 1671 + }, + { + "epoch": 0.09586331450849984, + "grad_norm": 0.0, + "learning_rate": 3.0480536241825263e-06, + "loss": 14.956, + "step": 1672 + }, + { + "epoch": 0.09592064902674655, + "grad_norm": 0.0, + "learning_rate": 3.0423571168496356e-06, + "loss": 15.1446, + "step": 1673 + }, + { + "epoch": 0.09597798354499326, + "grad_norm": 0.0, + "learning_rate": 3.036663609024054e-06, + "loss": 15.4386, + "step": 1674 + }, + { + "epoch": 0.09603531806323998, + "grad_norm": 0.0, + "learning_rate": 3.03097310942939e-06, + "loss": 14.9984, + "step": 1675 + }, + { + "epoch": 0.09609265258148668, + "grad_norm": 0.0, + "learning_rate": 3.025285626784651e-06, + "loss": 15.175, + "step": 1676 + }, + { + "epoch": 0.0961499870997334, + "grad_norm": 0.0, + "learning_rate": 3.019601169804216e-06, + "loss": 15.1857, + "step": 1677 + }, + { + "epoch": 0.09620732161798011, + "grad_norm": 0.0, + "learning_rate": 3.013919747197832e-06, + "loss": 14.9624, + "step": 1678 + }, + { + "epoch": 0.09626465613622681, + "grad_norm": 0.0, + "learning_rate": 3.0082413676705914e-06, + "loss": 15.1623, + "step": 1679 + }, + { + "epoch": 0.09632199065447353, + "grad_norm": 0.0, + "learning_rate": 3.00256603992293e-06, + "loss": 15.2423, + "step": 1680 + }, + { + "epoch": 0.09637932517272024, + "grad_norm": 0.0, + "learning_rate": 2.996893772650602e-06, + "loss": 15.1929, + "step": 1681 + }, + { + "epoch": 0.09643665969096694, + "grad_norm": 0.0, + "learning_rate": 2.99122457454468e-06, + "loss": 15.4669, + "step": 1682 + }, + { + "epoch": 0.09649399420921366, + "grad_norm": 0.0, + "learning_rate": 2.985558454291525e-06, + "loss": 15.1124, + "step": 1683 + }, + { + "epoch": 0.09655132872746036, + "grad_norm": 0.0, + "learning_rate": 2.9798954205727886e-06, + "loss": 15.2577, + "step": 1684 + }, + { + "epoch": 0.09660866324570708, + "grad_norm": 0.0, + "learning_rate": 2.9742354820653884e-06, + "loss": 15.0487, + "step": 1685 + }, + { + "epoch": 0.09666599776395379, + "grad_norm": 0.0, + "learning_rate": 2.9685786474415057e-06, + "loss": 14.9933, + "step": 1686 + }, + { + "epoch": 0.0967233322822005, + "grad_norm": 0.0, + "learning_rate": 2.96292492536856e-06, + "loss": 15.0056, + "step": 1687 + }, + { + "epoch": 0.09678066680044721, + "grad_norm": 0.0, + "learning_rate": 2.957274324509206e-06, + "loss": 15.1144, + "step": 1688 + }, + { + "epoch": 0.09683800131869393, + "grad_norm": 0.0, + "learning_rate": 2.95162685352131e-06, + "loss": 15.3127, + "step": 1689 + }, + { + "epoch": 0.09689533583694063, + "grad_norm": 0.0, + "learning_rate": 2.9459825210579534e-06, + "loss": 15.1207, + "step": 1690 + }, + { + "epoch": 0.09695267035518734, + "grad_norm": 0.0, + "learning_rate": 2.9403413357673955e-06, + "loss": 14.9744, + "step": 1691 + }, + { + "epoch": 0.09701000487343404, + "grad_norm": 0.0, + "learning_rate": 2.9347033062930856e-06, + "loss": 15.2412, + "step": 1692 + }, + { + "epoch": 0.09706733939168076, + "grad_norm": 0.0, + "learning_rate": 2.929068441273629e-06, + "loss": 14.9284, + "step": 1693 + }, + { + "epoch": 0.09712467390992748, + "grad_norm": 0.0, + "learning_rate": 2.923436749342788e-06, + "loss": 15.1222, + "step": 1694 + }, + { + "epoch": 0.09718200842817418, + "grad_norm": 0.0, + "learning_rate": 2.9178082391294573e-06, + "loss": 15.1443, + "step": 1695 + }, + { + "epoch": 0.09723934294642089, + "grad_norm": 0.0, + "learning_rate": 2.9121829192576647e-06, + "loss": 15.1553, + "step": 1696 + }, + { + "epoch": 0.09729667746466761, + "grad_norm": 0.0, + "learning_rate": 2.90656079834654e-06, + "loss": 15.2868, + "step": 1697 + }, + { + "epoch": 0.09735401198291431, + "grad_norm": 0.0, + "learning_rate": 2.9009418850103218e-06, + "loss": 15.0563, + "step": 1698 + }, + { + "epoch": 0.09741134650116103, + "grad_norm": 0.0, + "learning_rate": 2.8953261878583263e-06, + "loss": 15.0829, + "step": 1699 + }, + { + "epoch": 0.09746868101940774, + "grad_norm": 0.0, + "learning_rate": 2.889713715494944e-06, + "loss": 15.2201, + "step": 1700 + }, + { + "epoch": 0.09752601553765444, + "grad_norm": 0.0, + "learning_rate": 2.8841044765196236e-06, + "loss": 15.1362, + "step": 1701 + }, + { + "epoch": 0.09758335005590116, + "grad_norm": 0.0, + "learning_rate": 2.8784984795268644e-06, + "loss": 15.1359, + "step": 1702 + }, + { + "epoch": 0.09764068457414786, + "grad_norm": 0.0, + "learning_rate": 2.8728957331061914e-06, + "loss": 15.3242, + "step": 1703 + }, + { + "epoch": 0.09769801909239458, + "grad_norm": 0.0, + "learning_rate": 2.8672962458421548e-06, + "loss": 15.243, + "step": 1704 + }, + { + "epoch": 0.09775535361064129, + "grad_norm": 0.0, + "learning_rate": 2.861700026314308e-06, + "loss": 15.295, + "step": 1705 + }, + { + "epoch": 0.09781268812888799, + "grad_norm": 0.0, + "learning_rate": 2.8561070830971975e-06, + "loss": 14.8623, + "step": 1706 + }, + { + "epoch": 0.09787002264713471, + "grad_norm": 0.0, + "learning_rate": 2.8505174247603495e-06, + "loss": 15.1138, + "step": 1707 + }, + { + "epoch": 0.09792735716538142, + "grad_norm": 0.0, + "learning_rate": 2.844931059868261e-06, + "loss": 15.2223, + "step": 1708 + }, + { + "epoch": 0.09798469168362813, + "grad_norm": 0.0, + "learning_rate": 2.839347996980376e-06, + "loss": 15.2052, + "step": 1709 + }, + { + "epoch": 0.09804202620187484, + "grad_norm": 0.0, + "learning_rate": 2.8337682446510883e-06, + "loss": 15.1068, + "step": 1710 + }, + { + "epoch": 0.09809936072012154, + "grad_norm": 0.0, + "learning_rate": 2.828191811429709e-06, + "loss": 15.4515, + "step": 1711 + }, + { + "epoch": 0.09815669523836826, + "grad_norm": 0.0, + "learning_rate": 2.8226187058604735e-06, + "loss": 15.5278, + "step": 1712 + }, + { + "epoch": 0.09821402975661497, + "grad_norm": 0.0, + "learning_rate": 2.8170489364825106e-06, + "loss": 14.9237, + "step": 1713 + }, + { + "epoch": 0.09827136427486168, + "grad_norm": 0.0, + "learning_rate": 2.811482511829842e-06, + "loss": 15.1981, + "step": 1714 + }, + { + "epoch": 0.09832869879310839, + "grad_norm": 0.0, + "learning_rate": 2.805919440431359e-06, + "loss": 15.1981, + "step": 1715 + }, + { + "epoch": 0.0983860333113551, + "grad_norm": 0.0, + "learning_rate": 2.8003597308108246e-06, + "loss": 14.7001, + "step": 1716 + }, + { + "epoch": 0.09844336782960181, + "grad_norm": 0.0, + "learning_rate": 2.7948033914868415e-06, + "loss": 15.3086, + "step": 1717 + }, + { + "epoch": 0.09850070234784852, + "grad_norm": 0.0, + "learning_rate": 2.7892504309728564e-06, + "loss": 14.7995, + "step": 1718 + }, + { + "epoch": 0.09855803686609524, + "grad_norm": 0.0, + "learning_rate": 2.7837008577771317e-06, + "loss": 15.2355, + "step": 1719 + }, + { + "epoch": 0.09861537138434194, + "grad_norm": 0.0, + "learning_rate": 2.778154680402745e-06, + "loss": 14.8578, + "step": 1720 + }, + { + "epoch": 0.09867270590258866, + "grad_norm": 0.0, + "learning_rate": 2.7726119073475643e-06, + "loss": 15.1245, + "step": 1721 + }, + { + "epoch": 0.09873004042083536, + "grad_norm": 0.0, + "learning_rate": 2.7670725471042526e-06, + "loss": 14.927, + "step": 1722 + }, + { + "epoch": 0.09878737493908207, + "grad_norm": 0.0, + "learning_rate": 2.7615366081602306e-06, + "loss": 15.2329, + "step": 1723 + }, + { + "epoch": 0.09884470945732879, + "grad_norm": 0.0, + "learning_rate": 2.7560040989976894e-06, + "loss": 15.0808, + "step": 1724 + }, + { + "epoch": 0.09890204397557549, + "grad_norm": 0.0, + "learning_rate": 2.750475028093554e-06, + "loss": 15.1372, + "step": 1725 + }, + { + "epoch": 0.0989593784938222, + "grad_norm": 0.0, + "learning_rate": 2.74494940391949e-06, + "loss": 15.2879, + "step": 1726 + }, + { + "epoch": 0.09901671301206892, + "grad_norm": 0.0, + "learning_rate": 2.7394272349418776e-06, + "loss": 15.1674, + "step": 1727 + }, + { + "epoch": 0.09907404753031562, + "grad_norm": 0.0, + "learning_rate": 2.733908529621802e-06, + "loss": 15.0526, + "step": 1728 + }, + { + "epoch": 0.09913138204856234, + "grad_norm": 0.0, + "learning_rate": 2.7283932964150417e-06, + "loss": 15.5379, + "step": 1729 + }, + { + "epoch": 0.09918871656680905, + "grad_norm": 0.0, + "learning_rate": 2.7228815437720602e-06, + "loss": 15.2825, + "step": 1730 + }, + { + "epoch": 0.09924605108505576, + "grad_norm": 0.0, + "learning_rate": 2.7173732801379805e-06, + "loss": 15.0891, + "step": 1731 + }, + { + "epoch": 0.09930338560330247, + "grad_norm": 0.0, + "learning_rate": 2.711868513952587e-06, + "loss": 15.1538, + "step": 1732 + }, + { + "epoch": 0.09936072012154917, + "grad_norm": 0.0, + "learning_rate": 2.7063672536502995e-06, + "loss": 15.2978, + "step": 1733 + }, + { + "epoch": 0.09941805463979589, + "grad_norm": 0.0, + "learning_rate": 2.7008695076601693e-06, + "loss": 14.8973, + "step": 1734 + }, + { + "epoch": 0.0994753891580426, + "grad_norm": 0.0, + "learning_rate": 2.69537528440586e-06, + "loss": 15.4109, + "step": 1735 + }, + { + "epoch": 0.0995327236762893, + "grad_norm": 0.0, + "learning_rate": 2.6898845923056437e-06, + "loss": 15.1761, + "step": 1736 + }, + { + "epoch": 0.09959005819453602, + "grad_norm": 0.0, + "learning_rate": 2.6843974397723736e-06, + "loss": 14.8358, + "step": 1737 + }, + { + "epoch": 0.09964739271278274, + "grad_norm": 0.0, + "learning_rate": 2.6789138352134885e-06, + "loss": 14.9992, + "step": 1738 + }, + { + "epoch": 0.09970472723102944, + "grad_norm": 0.0, + "learning_rate": 2.6734337870309844e-06, + "loss": 15.0057, + "step": 1739 + }, + { + "epoch": 0.09976206174927615, + "grad_norm": 0.0, + "learning_rate": 2.6679573036214112e-06, + "loss": 14.9869, + "step": 1740 + }, + { + "epoch": 0.09981939626752286, + "grad_norm": 0.0, + "learning_rate": 2.6624843933758547e-06, + "loss": 15.4995, + "step": 1741 + }, + { + "epoch": 0.09987673078576957, + "grad_norm": 0.0, + "learning_rate": 2.6570150646799266e-06, + "loss": 15.1863, + "step": 1742 + }, + { + "epoch": 0.09993406530401629, + "grad_norm": 0.0, + "learning_rate": 2.6515493259137546e-06, + "loss": 15.227, + "step": 1743 + }, + { + "epoch": 0.09999139982226299, + "grad_norm": 0.0, + "learning_rate": 2.6460871854519594e-06, + "loss": 14.8933, + "step": 1744 + }, + { + "epoch": 0.1000487343405097, + "grad_norm": 0.0, + "learning_rate": 2.6406286516636546e-06, + "loss": 14.9753, + "step": 1745 + }, + { + "epoch": 0.10010606885875642, + "grad_norm": 0.0, + "learning_rate": 2.635173732912423e-06, + "loss": 15.2712, + "step": 1746 + }, + { + "epoch": 0.10016340337700312, + "grad_norm": 0.0, + "learning_rate": 2.6297224375563126e-06, + "loss": 15.0092, + "step": 1747 + }, + { + "epoch": 0.10022073789524984, + "grad_norm": 0.0, + "learning_rate": 2.6242747739478158e-06, + "loss": 15.1965, + "step": 1748 + }, + { + "epoch": 0.10027807241349655, + "grad_norm": 0.0, + "learning_rate": 2.618830750433862e-06, + "loss": 15.0236, + "step": 1749 + }, + { + "epoch": 0.10033540693174325, + "grad_norm": 0.0, + "learning_rate": 2.613390375355801e-06, + "loss": 14.9518, + "step": 1750 + }, + { + "epoch": 0.10039274144998997, + "grad_norm": 0.0, + "learning_rate": 2.607953657049398e-06, + "loss": 14.8813, + "step": 1751 + }, + { + "epoch": 0.10045007596823667, + "grad_norm": 0.0, + "learning_rate": 2.60252060384481e-06, + "loss": 15.0054, + "step": 1752 + }, + { + "epoch": 0.10050741048648339, + "grad_norm": 0.0, + "learning_rate": 2.5970912240665815e-06, + "loss": 14.9681, + "step": 1753 + }, + { + "epoch": 0.1005647450047301, + "grad_norm": 0.0, + "learning_rate": 2.591665526033628e-06, + "loss": 14.9709, + "step": 1754 + }, + { + "epoch": 0.1006220795229768, + "grad_norm": 0.0, + "learning_rate": 2.5862435180592203e-06, + "loss": 15.0781, + "step": 1755 + }, + { + "epoch": 0.10067941404122352, + "grad_norm": 0.0, + "learning_rate": 2.5808252084509784e-06, + "loss": 14.9999, + "step": 1756 + }, + { + "epoch": 0.10073674855947023, + "grad_norm": 0.0, + "learning_rate": 2.575410605510858e-06, + "loss": 15.0287, + "step": 1757 + }, + { + "epoch": 0.10079408307771694, + "grad_norm": 0.0, + "learning_rate": 2.5699997175351293e-06, + "loss": 15.1299, + "step": 1758 + }, + { + "epoch": 0.10085141759596365, + "grad_norm": 0.0, + "learning_rate": 2.5645925528143778e-06, + "loss": 14.9807, + "step": 1759 + }, + { + "epoch": 0.10090875211421035, + "grad_norm": 0.0, + "learning_rate": 2.559189119633476e-06, + "loss": 15.2697, + "step": 1760 + }, + { + "epoch": 0.10096608663245707, + "grad_norm": 0.0, + "learning_rate": 2.553789426271588e-06, + "loss": 15.1754, + "step": 1761 + }, + { + "epoch": 0.10102342115070378, + "grad_norm": 0.0, + "learning_rate": 2.54839348100214e-06, + "loss": 15.2112, + "step": 1762 + }, + { + "epoch": 0.10108075566895049, + "grad_norm": 0.0, + "learning_rate": 2.543001292092819e-06, + "loss": 15.0921, + "step": 1763 + }, + { + "epoch": 0.1011380901871972, + "grad_norm": 0.0, + "learning_rate": 2.5376128678055536e-06, + "loss": 14.9949, + "step": 1764 + }, + { + "epoch": 0.10119542470544392, + "grad_norm": 0.0, + "learning_rate": 2.5322282163965096e-06, + "loss": 15.0155, + "step": 1765 + }, + { + "epoch": 0.10125275922369062, + "grad_norm": 0.0, + "learning_rate": 2.5268473461160665e-06, + "loss": 15.1644, + "step": 1766 + }, + { + "epoch": 0.10131009374193733, + "grad_norm": 0.0, + "learning_rate": 2.521470265208815e-06, + "loss": 15.0194, + "step": 1767 + }, + { + "epoch": 0.10136742826018405, + "grad_norm": 0.0, + "learning_rate": 2.5160969819135368e-06, + "loss": 14.571, + "step": 1768 + }, + { + "epoch": 0.10142476277843075, + "grad_norm": 0.0, + "learning_rate": 2.5107275044631942e-06, + "loss": 15.3127, + "step": 1769 + }, + { + "epoch": 0.10148209729667747, + "grad_norm": 0.0, + "learning_rate": 2.5053618410849186e-06, + "loss": 15.0523, + "step": 1770 + }, + { + "epoch": 0.10153943181492417, + "grad_norm": 0.0, + "learning_rate": 2.5000000000000015e-06, + "loss": 15.1352, + "step": 1771 + }, + { + "epoch": 0.10159676633317088, + "grad_norm": 0.0, + "learning_rate": 2.4946419894238705e-06, + "loss": 15.0326, + "step": 1772 + }, + { + "epoch": 0.1016541008514176, + "grad_norm": 0.0, + "learning_rate": 2.4892878175660927e-06, + "loss": 15.1512, + "step": 1773 + }, + { + "epoch": 0.1017114353696643, + "grad_norm": 0.0, + "learning_rate": 2.483937492630345e-06, + "loss": 15.2138, + "step": 1774 + }, + { + "epoch": 0.10176876988791102, + "grad_norm": 0.0, + "learning_rate": 2.47859102281442e-06, + "loss": 15.042, + "step": 1775 + }, + { + "epoch": 0.10182610440615773, + "grad_norm": 0.0, + "learning_rate": 2.4732484163101896e-06, + "loss": 15.1799, + "step": 1776 + }, + { + "epoch": 0.10188343892440443, + "grad_norm": 0.0, + "learning_rate": 2.4679096813036202e-06, + "loss": 15.3713, + "step": 1777 + }, + { + "epoch": 0.10194077344265115, + "grad_norm": 0.0, + "learning_rate": 2.4625748259747363e-06, + "loss": 14.9062, + "step": 1778 + }, + { + "epoch": 0.10199810796089785, + "grad_norm": 0.0, + "learning_rate": 2.457243858497626e-06, + "loss": 15.33, + "step": 1779 + }, + { + "epoch": 0.10205544247914457, + "grad_norm": 0.0, + "learning_rate": 2.4519167870404126e-06, + "loss": 15.1443, + "step": 1780 + }, + { + "epoch": 0.10211277699739128, + "grad_norm": 0.0, + "learning_rate": 2.4465936197652573e-06, + "loss": 15.3425, + "step": 1781 + }, + { + "epoch": 0.10217011151563798, + "grad_norm": 0.0, + "learning_rate": 2.4412743648283343e-06, + "loss": 14.8019, + "step": 1782 + }, + { + "epoch": 0.1022274460338847, + "grad_norm": 0.0, + "learning_rate": 2.4359590303798243e-06, + "loss": 14.9075, + "step": 1783 + }, + { + "epoch": 0.10228478055213142, + "grad_norm": 0.0, + "learning_rate": 2.4306476245638995e-06, + "loss": 15.0322, + "step": 1784 + }, + { + "epoch": 0.10234211507037812, + "grad_norm": 0.0, + "learning_rate": 2.4253401555187183e-06, + "loss": 14.9531, + "step": 1785 + }, + { + "epoch": 0.10239944958862483, + "grad_norm": 0.0, + "learning_rate": 2.4200366313764e-06, + "loss": 14.9875, + "step": 1786 + }, + { + "epoch": 0.10245678410687155, + "grad_norm": 0.0, + "learning_rate": 2.4147370602630267e-06, + "loss": 14.8213, + "step": 1787 + }, + { + "epoch": 0.10251411862511825, + "grad_norm": 0.0, + "learning_rate": 2.4094414502986176e-06, + "loss": 15.1506, + "step": 1788 + }, + { + "epoch": 0.10257145314336497, + "grad_norm": 0.0, + "learning_rate": 2.4041498095971253e-06, + "loss": 14.9495, + "step": 1789 + }, + { + "epoch": 0.10262878766161167, + "grad_norm": 0.0, + "learning_rate": 2.398862146266418e-06, + "loss": 15.2569, + "step": 1790 + }, + { + "epoch": 0.10268612217985838, + "grad_norm": 0.0, + "learning_rate": 2.3935784684082763e-06, + "loss": 15.5546, + "step": 1791 + }, + { + "epoch": 0.1027434566981051, + "grad_norm": 0.0, + "learning_rate": 2.388298784118366e-06, + "loss": 14.7149, + "step": 1792 + }, + { + "epoch": 0.1028007912163518, + "grad_norm": 0.0, + "learning_rate": 2.3830231014862415e-06, + "loss": 15.0869, + "step": 1793 + }, + { + "epoch": 0.10285812573459852, + "grad_norm": 0.0, + "learning_rate": 2.3777514285953192e-06, + "loss": 15.0755, + "step": 1794 + }, + { + "epoch": 0.10291546025284523, + "grad_norm": 0.0, + "learning_rate": 2.3724837735228773e-06, + "loss": 14.853, + "step": 1795 + }, + { + "epoch": 0.10297279477109193, + "grad_norm": 0.0, + "learning_rate": 2.367220144340035e-06, + "loss": 15.2218, + "step": 1796 + }, + { + "epoch": 0.10303012928933865, + "grad_norm": 0.0, + "learning_rate": 2.361960549111742e-06, + "loss": 15.0583, + "step": 1797 + }, + { + "epoch": 0.10308746380758535, + "grad_norm": 0.0, + "learning_rate": 2.356704995896768e-06, + "loss": 15.1339, + "step": 1798 + }, + { + "epoch": 0.10314479832583207, + "grad_norm": 0.0, + "learning_rate": 2.3514534927476935e-06, + "loss": 15.0067, + "step": 1799 + }, + { + "epoch": 0.10320213284407878, + "grad_norm": 0.0, + "learning_rate": 2.3462060477108856e-06, + "loss": 15.0885, + "step": 1800 + }, + { + "epoch": 0.10325946736232548, + "grad_norm": 0.0, + "learning_rate": 2.340962668826503e-06, + "loss": 15.144, + "step": 1801 + }, + { + "epoch": 0.1033168018805722, + "grad_norm": 0.0, + "learning_rate": 2.3357233641284665e-06, + "loss": 15.266, + "step": 1802 + }, + { + "epoch": 0.10337413639881891, + "grad_norm": 0.0, + "learning_rate": 2.330488141644457e-06, + "loss": 15.1685, + "step": 1803 + }, + { + "epoch": 0.10343147091706562, + "grad_norm": 0.0, + "learning_rate": 2.3252570093959e-06, + "loss": 15.1289, + "step": 1804 + }, + { + "epoch": 0.10348880543531233, + "grad_norm": 0.0, + "learning_rate": 2.320029975397957e-06, + "loss": 15.1866, + "step": 1805 + }, + { + "epoch": 0.10354613995355905, + "grad_norm": 0.0, + "learning_rate": 2.314807047659506e-06, + "loss": 15.1786, + "step": 1806 + }, + { + "epoch": 0.10360347447180575, + "grad_norm": 0.0, + "learning_rate": 2.309588234183137e-06, + "loss": 14.7894, + "step": 1807 + }, + { + "epoch": 0.10366080899005246, + "grad_norm": 0.0, + "learning_rate": 2.304373542965132e-06, + "loss": 15.1901, + "step": 1808 + }, + { + "epoch": 0.10371814350829917, + "grad_norm": 0.0, + "learning_rate": 2.2991629819954626e-06, + "loss": 14.9909, + "step": 1809 + }, + { + "epoch": 0.10377547802654588, + "grad_norm": 0.0, + "learning_rate": 2.293956559257766e-06, + "loss": 14.6973, + "step": 1810 + }, + { + "epoch": 0.1038328125447926, + "grad_norm": 0.0, + "learning_rate": 2.2887542827293424e-06, + "loss": 15.1475, + "step": 1811 + }, + { + "epoch": 0.1038901470630393, + "grad_norm": 0.0, + "learning_rate": 2.2835561603811363e-06, + "loss": 14.8105, + "step": 1812 + }, + { + "epoch": 0.10394748158128601, + "grad_norm": 0.0, + "learning_rate": 2.2783622001777322e-06, + "loss": 14.9498, + "step": 1813 + }, + { + "epoch": 0.10400481609953273, + "grad_norm": 0.0, + "learning_rate": 2.2731724100773305e-06, + "loss": 15.0761, + "step": 1814 + }, + { + "epoch": 0.10406215061777943, + "grad_norm": 0.0, + "learning_rate": 2.26798679803175e-06, + "loss": 15.3239, + "step": 1815 + }, + { + "epoch": 0.10411948513602615, + "grad_norm": 0.0, + "learning_rate": 2.262805371986402e-06, + "loss": 15.0212, + "step": 1816 + }, + { + "epoch": 0.10417681965427285, + "grad_norm": 0.0, + "learning_rate": 2.257628139880285e-06, + "loss": 14.8911, + "step": 1817 + }, + { + "epoch": 0.10423415417251956, + "grad_norm": 0.0, + "learning_rate": 2.2524551096459703e-06, + "loss": 15.3894, + "step": 1818 + }, + { + "epoch": 0.10429148869076628, + "grad_norm": 0.0, + "learning_rate": 2.247286289209597e-06, + "loss": 15.4648, + "step": 1819 + }, + { + "epoch": 0.10434882320901298, + "grad_norm": 0.0, + "learning_rate": 2.242121686490847e-06, + "loss": 15.2684, + "step": 1820 + }, + { + "epoch": 0.1044061577272597, + "grad_norm": 0.0, + "learning_rate": 2.236961309402945e-06, + "loss": 15.3737, + "step": 1821 + }, + { + "epoch": 0.10446349224550641, + "grad_norm": 0.0, + "learning_rate": 2.231805165852637e-06, + "loss": 14.9809, + "step": 1822 + }, + { + "epoch": 0.10452082676375311, + "grad_norm": 0.0, + "learning_rate": 2.2266532637401867e-06, + "loss": 14.7812, + "step": 1823 + }, + { + "epoch": 0.10457816128199983, + "grad_norm": 0.0, + "learning_rate": 2.2215056109593547e-06, + "loss": 15.1497, + "step": 1824 + }, + { + "epoch": 0.10463549580024654, + "grad_norm": 0.0, + "learning_rate": 2.216362215397393e-06, + "loss": 14.7571, + "step": 1825 + }, + { + "epoch": 0.10469283031849325, + "grad_norm": 0.0, + "learning_rate": 2.2112230849350286e-06, + "loss": 15.2152, + "step": 1826 + }, + { + "epoch": 0.10475016483673996, + "grad_norm": 0.0, + "learning_rate": 2.206088227446459e-06, + "loss": 14.8404, + "step": 1827 + }, + { + "epoch": 0.10480749935498666, + "grad_norm": 0.0, + "learning_rate": 2.2009576507993273e-06, + "loss": 14.8393, + "step": 1828 + }, + { + "epoch": 0.10486483387323338, + "grad_norm": 0.0, + "learning_rate": 2.1958313628547247e-06, + "loss": 15.1098, + "step": 1829 + }, + { + "epoch": 0.1049221683914801, + "grad_norm": 0.0, + "learning_rate": 2.190709371467165e-06, + "loss": 15.2116, + "step": 1830 + }, + { + "epoch": 0.1049795029097268, + "grad_norm": 0.0, + "learning_rate": 2.1855916844845827e-06, + "loss": 15.1286, + "step": 1831 + }, + { + "epoch": 0.10503683742797351, + "grad_norm": 0.0, + "learning_rate": 2.180478309748313e-06, + "loss": 15.0697, + "step": 1832 + }, + { + "epoch": 0.10509417194622023, + "grad_norm": 0.0, + "learning_rate": 2.175369255093091e-06, + "loss": 14.8526, + "step": 1833 + }, + { + "epoch": 0.10515150646446693, + "grad_norm": 0.0, + "learning_rate": 2.1702645283470238e-06, + "loss": 15.0791, + "step": 1834 + }, + { + "epoch": 0.10520884098271364, + "grad_norm": 0.0, + "learning_rate": 2.165164137331596e-06, + "loss": 15.1782, + "step": 1835 + }, + { + "epoch": 0.10526617550096036, + "grad_norm": 0.0, + "learning_rate": 2.16006808986164e-06, + "loss": 15.0698, + "step": 1836 + }, + { + "epoch": 0.10532351001920706, + "grad_norm": 0.0, + "learning_rate": 2.1549763937453445e-06, + "loss": 15.0492, + "step": 1837 + }, + { + "epoch": 0.10538084453745378, + "grad_norm": 0.0, + "learning_rate": 2.1498890567842175e-06, + "loss": 15.3157, + "step": 1838 + }, + { + "epoch": 0.10543817905570048, + "grad_norm": 0.0, + "learning_rate": 2.144806086773095e-06, + "loss": 14.9877, + "step": 1839 + }, + { + "epoch": 0.1054955135739472, + "grad_norm": 0.0, + "learning_rate": 2.1397274915001254e-06, + "loss": 15.2216, + "step": 1840 + }, + { + "epoch": 0.10555284809219391, + "grad_norm": 0.0, + "learning_rate": 2.1346532787467466e-06, + "loss": 15.1515, + "step": 1841 + }, + { + "epoch": 0.10561018261044061, + "grad_norm": 0.0, + "learning_rate": 2.129583456287689e-06, + "loss": 15.2111, + "step": 1842 + }, + { + "epoch": 0.10566751712868733, + "grad_norm": 0.0, + "learning_rate": 2.1245180318909482e-06, + "loss": 14.9736, + "step": 1843 + }, + { + "epoch": 0.10572485164693404, + "grad_norm": 0.0, + "learning_rate": 2.119457013317789e-06, + "loss": 15.1239, + "step": 1844 + }, + { + "epoch": 0.10578218616518074, + "grad_norm": 0.0, + "learning_rate": 2.11440040832272e-06, + "loss": 15.1009, + "step": 1845 + }, + { + "epoch": 0.10583952068342746, + "grad_norm": 0.0, + "learning_rate": 2.1093482246534896e-06, + "loss": 14.9144, + "step": 1846 + }, + { + "epoch": 0.10589685520167416, + "grad_norm": 0.0, + "learning_rate": 2.1043004700510694e-06, + "loss": 15.2687, + "step": 1847 + }, + { + "epoch": 0.10595418971992088, + "grad_norm": 0.0, + "learning_rate": 2.0992571522496502e-06, + "loss": 15.3205, + "step": 1848 + }, + { + "epoch": 0.10601152423816759, + "grad_norm": 0.0, + "learning_rate": 2.0942182789766174e-06, + "loss": 15.1931, + "step": 1849 + }, + { + "epoch": 0.1060688587564143, + "grad_norm": 0.0, + "learning_rate": 2.0891838579525547e-06, + "loss": 15.2253, + "step": 1850 + }, + { + "epoch": 0.10612619327466101, + "grad_norm": 0.0, + "learning_rate": 2.084153896891217e-06, + "loss": 15.0989, + "step": 1851 + }, + { + "epoch": 0.10618352779290773, + "grad_norm": 0.0, + "learning_rate": 2.0791284034995296e-06, + "loss": 15.1758, + "step": 1852 + }, + { + "epoch": 0.10624086231115443, + "grad_norm": 0.0, + "learning_rate": 2.074107385477568e-06, + "loss": 15.0728, + "step": 1853 + }, + { + "epoch": 0.10629819682940114, + "grad_norm": 0.0, + "learning_rate": 2.0690908505185577e-06, + "loss": 14.9472, + "step": 1854 + }, + { + "epoch": 0.10635553134764786, + "grad_norm": 0.0, + "learning_rate": 2.064078806308848e-06, + "loss": 14.9618, + "step": 1855 + }, + { + "epoch": 0.10641286586589456, + "grad_norm": 0.0, + "learning_rate": 2.0590712605279135e-06, + "loss": 14.966, + "step": 1856 + }, + { + "epoch": 0.10647020038414128, + "grad_norm": 0.0, + "learning_rate": 2.054068220848331e-06, + "loss": 14.9155, + "step": 1857 + }, + { + "epoch": 0.10652753490238798, + "grad_norm": 0.0, + "learning_rate": 2.0490696949357774e-06, + "loss": 14.9877, + "step": 1858 + }, + { + "epoch": 0.10658486942063469, + "grad_norm": 0.0, + "learning_rate": 2.0440756904490115e-06, + "loss": 14.8584, + "step": 1859 + }, + { + "epoch": 0.10664220393888141, + "grad_norm": 0.0, + "learning_rate": 2.0390862150398637e-06, + "loss": 15.2422, + "step": 1860 + }, + { + "epoch": 0.10669953845712811, + "grad_norm": 0.0, + "learning_rate": 2.0341012763532243e-06, + "loss": 14.9152, + "step": 1861 + }, + { + "epoch": 0.10675687297537483, + "grad_norm": 0.0, + "learning_rate": 2.0291208820270368e-06, + "loss": 15.1744, + "step": 1862 + }, + { + "epoch": 0.10681420749362154, + "grad_norm": 0.0, + "learning_rate": 2.024145039692277e-06, + "loss": 14.7057, + "step": 1863 + }, + { + "epoch": 0.10687154201186824, + "grad_norm": 0.0, + "learning_rate": 2.0191737569729492e-06, + "loss": 15.0924, + "step": 1864 + }, + { + "epoch": 0.10692887653011496, + "grad_norm": 0.0, + "learning_rate": 2.0142070414860704e-06, + "loss": 14.8843, + "step": 1865 + }, + { + "epoch": 0.10698621104836166, + "grad_norm": 0.0, + "learning_rate": 2.009244900841658e-06, + "loss": 14.9535, + "step": 1866 + }, + { + "epoch": 0.10704354556660838, + "grad_norm": 0.0, + "learning_rate": 2.004287342642721e-06, + "loss": 15.2298, + "step": 1867 + }, + { + "epoch": 0.10710088008485509, + "grad_norm": 0.0, + "learning_rate": 1.9993343744852504e-06, + "loss": 14.8105, + "step": 1868 + }, + { + "epoch": 0.10715821460310179, + "grad_norm": 0.0, + "learning_rate": 1.994386003958198e-06, + "loss": 15.1573, + "step": 1869 + }, + { + "epoch": 0.10721554912134851, + "grad_norm": 0.0, + "learning_rate": 1.989442238643478e-06, + "loss": 15.06, + "step": 1870 + }, + { + "epoch": 0.10727288363959522, + "grad_norm": 0.0, + "learning_rate": 1.9845030861159416e-06, + "loss": 14.703, + "step": 1871 + }, + { + "epoch": 0.10733021815784193, + "grad_norm": 0.0, + "learning_rate": 1.9795685539433785e-06, + "loss": 15.1016, + "step": 1872 + }, + { + "epoch": 0.10738755267608864, + "grad_norm": 0.0, + "learning_rate": 1.974638649686495e-06, + "loss": 15.0317, + "step": 1873 + }, + { + "epoch": 0.10744488719433536, + "grad_norm": 0.0, + "learning_rate": 1.9697133808989084e-06, + "loss": 14.9478, + "step": 1874 + }, + { + "epoch": 0.10750222171258206, + "grad_norm": 0.0, + "learning_rate": 1.9647927551271302e-06, + "loss": 15.0411, + "step": 1875 + }, + { + "epoch": 0.10755955623082877, + "grad_norm": 0.0, + "learning_rate": 1.959876779910564e-06, + "loss": 15.0192, + "step": 1876 + }, + { + "epoch": 0.10761689074907548, + "grad_norm": 0.0, + "learning_rate": 1.954965462781481e-06, + "loss": 14.9757, + "step": 1877 + }, + { + "epoch": 0.10767422526732219, + "grad_norm": 0.0, + "learning_rate": 1.950058811265022e-06, + "loss": 15.2199, + "step": 1878 + }, + { + "epoch": 0.1077315597855689, + "grad_norm": 0.0, + "learning_rate": 1.945156832879174e-06, + "loss": 15.1254, + "step": 1879 + }, + { + "epoch": 0.10778889430381561, + "grad_norm": 0.0, + "learning_rate": 1.9402595351347656e-06, + "loss": 15.0774, + "step": 1880 + }, + { + "epoch": 0.10784622882206232, + "grad_norm": 0.0, + "learning_rate": 1.935366925535452e-06, + "loss": 14.932, + "step": 1881 + }, + { + "epoch": 0.10790356334030904, + "grad_norm": 0.0, + "learning_rate": 1.930479011577711e-06, + "loss": 15.0582, + "step": 1882 + }, + { + "epoch": 0.10796089785855574, + "grad_norm": 0.0, + "learning_rate": 1.925595800750816e-06, + "loss": 15.268, + "step": 1883 + }, + { + "epoch": 0.10801823237680246, + "grad_norm": 0.0, + "learning_rate": 1.920717300536846e-06, + "loss": 15.4256, + "step": 1884 + }, + { + "epoch": 0.10807556689504916, + "grad_norm": 0.0, + "learning_rate": 1.91584351841065e-06, + "loss": 14.9376, + "step": 1885 + }, + { + "epoch": 0.10813290141329587, + "grad_norm": 0.0, + "learning_rate": 1.9109744618398607e-06, + "loss": 15.2782, + "step": 1886 + }, + { + "epoch": 0.10819023593154259, + "grad_norm": 0.0, + "learning_rate": 1.9061101382848567e-06, + "loss": 14.8349, + "step": 1887 + }, + { + "epoch": 0.10824757044978929, + "grad_norm": 0.0, + "learning_rate": 1.9012505551987764e-06, + "loss": 15.1059, + "step": 1888 + }, + { + "epoch": 0.108304904968036, + "grad_norm": 0.0, + "learning_rate": 1.8963957200274874e-06, + "loss": 14.9271, + "step": 1889 + }, + { + "epoch": 0.10836223948628272, + "grad_norm": 0.0, + "learning_rate": 1.8915456402095883e-06, + "loss": 15.0641, + "step": 1890 + }, + { + "epoch": 0.10841957400452942, + "grad_norm": 0.0, + "learning_rate": 1.8867003231763847e-06, + "loss": 14.84, + "step": 1891 + }, + { + "epoch": 0.10847690852277614, + "grad_norm": 0.0, + "learning_rate": 1.8818597763518926e-06, + "loss": 14.9408, + "step": 1892 + }, + { + "epoch": 0.10853424304102285, + "grad_norm": 0.0, + "learning_rate": 1.8770240071528117e-06, + "loss": 15.1839, + "step": 1893 + }, + { + "epoch": 0.10859157755926956, + "grad_norm": 0.0, + "learning_rate": 1.872193022988526e-06, + "loss": 15.2123, + "step": 1894 + }, + { + "epoch": 0.10864891207751627, + "grad_norm": 0.0, + "learning_rate": 1.8673668312610843e-06, + "loss": 15.3868, + "step": 1895 + }, + { + "epoch": 0.10870624659576297, + "grad_norm": 0.0, + "learning_rate": 1.8625454393651976e-06, + "loss": 14.9713, + "step": 1896 + }, + { + "epoch": 0.10876358111400969, + "grad_norm": 0.0, + "learning_rate": 1.8577288546882167e-06, + "loss": 15.1364, + "step": 1897 + }, + { + "epoch": 0.1088209156322564, + "grad_norm": 0.0, + "learning_rate": 1.8529170846101318e-06, + "loss": 15.0209, + "step": 1898 + }, + { + "epoch": 0.1088782501505031, + "grad_norm": 0.0, + "learning_rate": 1.8481101365035537e-06, + "loss": 15.1795, + "step": 1899 + }, + { + "epoch": 0.10893558466874982, + "grad_norm": 0.0, + "learning_rate": 1.8433080177337043e-06, + "loss": 15.1728, + "step": 1900 + }, + { + "epoch": 0.10899291918699654, + "grad_norm": 0.0, + "learning_rate": 1.8385107356584058e-06, + "loss": 14.9725, + "step": 1901 + }, + { + "epoch": 0.10905025370524324, + "grad_norm": 0.0, + "learning_rate": 1.8337182976280731e-06, + "loss": 15.1804, + "step": 1902 + }, + { + "epoch": 0.10910758822348995, + "grad_norm": 0.0, + "learning_rate": 1.8289307109856941e-06, + "loss": 14.9713, + "step": 1903 + }, + { + "epoch": 0.10916492274173666, + "grad_norm": 0.0, + "learning_rate": 1.8241479830668291e-06, + "loss": 15.0086, + "step": 1904 + }, + { + "epoch": 0.10922225725998337, + "grad_norm": 0.0, + "learning_rate": 1.8193701211995862e-06, + "loss": 15.3655, + "step": 1905 + }, + { + "epoch": 0.10927959177823009, + "grad_norm": 0.0, + "learning_rate": 1.8145971327046274e-06, + "loss": 14.9315, + "step": 1906 + }, + { + "epoch": 0.10933692629647679, + "grad_norm": 0.0, + "learning_rate": 1.8098290248951394e-06, + "loss": 15.0965, + "step": 1907 + }, + { + "epoch": 0.1093942608147235, + "grad_norm": 0.0, + "learning_rate": 1.8050658050768338e-06, + "loss": 15.2293, + "step": 1908 + }, + { + "epoch": 0.10945159533297022, + "grad_norm": 0.0, + "learning_rate": 1.8003074805479314e-06, + "loss": 15.0582, + "step": 1909 + }, + { + "epoch": 0.10950892985121692, + "grad_norm": 0.0, + "learning_rate": 1.7955540585991577e-06, + "loss": 15.4075, + "step": 1910 + }, + { + "epoch": 0.10956626436946364, + "grad_norm": 0.0, + "learning_rate": 1.7908055465137181e-06, + "loss": 15.0865, + "step": 1911 + }, + { + "epoch": 0.10962359888771035, + "grad_norm": 0.0, + "learning_rate": 1.7860619515673034e-06, + "loss": 14.9347, + "step": 1912 + }, + { + "epoch": 0.10968093340595705, + "grad_norm": 0.0, + "learning_rate": 1.781323281028065e-06, + "loss": 15.1036, + "step": 1913 + }, + { + "epoch": 0.10973826792420377, + "grad_norm": 0.0, + "learning_rate": 1.7765895421566099e-06, + "loss": 14.9977, + "step": 1914 + }, + { + "epoch": 0.10979560244245047, + "grad_norm": 0.0, + "learning_rate": 1.771860742205988e-06, + "loss": 15.3883, + "step": 1915 + }, + { + "epoch": 0.10985293696069719, + "grad_norm": 0.0, + "learning_rate": 1.7671368884216873e-06, + "loss": 15.2364, + "step": 1916 + }, + { + "epoch": 0.1099102714789439, + "grad_norm": 0.0, + "learning_rate": 1.7624179880416087e-06, + "loss": 15.0877, + "step": 1917 + }, + { + "epoch": 0.1099676059971906, + "grad_norm": 0.0, + "learning_rate": 1.7577040482960723e-06, + "loss": 14.7803, + "step": 1918 + }, + { + "epoch": 0.11002494051543732, + "grad_norm": 0.0, + "learning_rate": 1.7529950764077885e-06, + "loss": 14.9745, + "step": 1919 + }, + { + "epoch": 0.11008227503368403, + "grad_norm": 0.0, + "learning_rate": 1.7482910795918655e-06, + "loss": 15.092, + "step": 1920 + }, + { + "epoch": 0.11013960955193074, + "grad_norm": 0.0, + "learning_rate": 1.7435920650557808e-06, + "loss": 15.1327, + "step": 1921 + }, + { + "epoch": 0.11019694407017745, + "grad_norm": 0.0, + "learning_rate": 1.7388980399993822e-06, + "loss": 15.2231, + "step": 1922 + }, + { + "epoch": 0.11025427858842415, + "grad_norm": 0.0, + "learning_rate": 1.7342090116148684e-06, + "loss": 15.0259, + "step": 1923 + }, + { + "epoch": 0.11031161310667087, + "grad_norm": 0.0, + "learning_rate": 1.7295249870867898e-06, + "loss": 15.2957, + "step": 1924 + }, + { + "epoch": 0.11036894762491758, + "grad_norm": 0.0, + "learning_rate": 1.7248459735920203e-06, + "loss": 15.0902, + "step": 1925 + }, + { + "epoch": 0.11042628214316429, + "grad_norm": 0.0, + "learning_rate": 1.720171978299766e-06, + "loss": 15.0184, + "step": 1926 + }, + { + "epoch": 0.110483616661411, + "grad_norm": 0.0, + "learning_rate": 1.7155030083715362e-06, + "loss": 14.5436, + "step": 1927 + }, + { + "epoch": 0.11054095117965772, + "grad_norm": 0.0, + "learning_rate": 1.7108390709611427e-06, + "loss": 14.8727, + "step": 1928 + }, + { + "epoch": 0.11059828569790442, + "grad_norm": 0.0, + "learning_rate": 1.7061801732146865e-06, + "loss": 14.7645, + "step": 1929 + }, + { + "epoch": 0.11065562021615113, + "grad_norm": 0.0, + "learning_rate": 1.7015263222705492e-06, + "loss": 15.1675, + "step": 1930 + }, + { + "epoch": 0.11071295473439785, + "grad_norm": 0.0, + "learning_rate": 1.6968775252593745e-06, + "loss": 15.1032, + "step": 1931 + }, + { + "epoch": 0.11077028925264455, + "grad_norm": 0.0, + "learning_rate": 1.692233789304069e-06, + "loss": 15.134, + "step": 1932 + }, + { + "epoch": 0.11082762377089127, + "grad_norm": 0.0, + "learning_rate": 1.6875951215197779e-06, + "loss": 15.1558, + "step": 1933 + }, + { + "epoch": 0.11088495828913797, + "grad_norm": 0.0, + "learning_rate": 1.682961529013889e-06, + "loss": 14.7275, + "step": 1934 + }, + { + "epoch": 0.11094229280738468, + "grad_norm": 0.0, + "learning_rate": 1.6783330188860047e-06, + "loss": 14.8734, + "step": 1935 + }, + { + "epoch": 0.1109996273256314, + "grad_norm": 0.0, + "learning_rate": 1.6737095982279444e-06, + "loss": 15.6697, + "step": 1936 + }, + { + "epoch": 0.1110569618438781, + "grad_norm": 0.0, + "learning_rate": 1.669091274123732e-06, + "loss": 15.2523, + "step": 1937 + }, + { + "epoch": 0.11111429636212482, + "grad_norm": 0.0, + "learning_rate": 1.6644780536495775e-06, + "loss": 15.1574, + "step": 1938 + }, + { + "epoch": 0.11117163088037153, + "grad_norm": 0.0, + "learning_rate": 1.6598699438738764e-06, + "loss": 15.0501, + "step": 1939 + }, + { + "epoch": 0.11122896539861823, + "grad_norm": 0.0, + "learning_rate": 1.6552669518571873e-06, + "loss": 14.9379, + "step": 1940 + }, + { + "epoch": 0.11128629991686495, + "grad_norm": 0.0, + "learning_rate": 1.6506690846522338e-06, + "loss": 14.8896, + "step": 1941 + }, + { + "epoch": 0.11134363443511167, + "grad_norm": 0.0, + "learning_rate": 1.646076349303884e-06, + "loss": 14.9501, + "step": 1942 + }, + { + "epoch": 0.11140096895335837, + "grad_norm": 0.0, + "learning_rate": 1.6414887528491414e-06, + "loss": 15.0372, + "step": 1943 + }, + { + "epoch": 0.11145830347160508, + "grad_norm": 0.0, + "learning_rate": 1.6369063023171367e-06, + "loss": 15.1974, + "step": 1944 + }, + { + "epoch": 0.11151563798985178, + "grad_norm": 0.0, + "learning_rate": 1.6323290047291196e-06, + "loss": 14.8393, + "step": 1945 + }, + { + "epoch": 0.1115729725080985, + "grad_norm": 0.0, + "learning_rate": 1.6277568670984384e-06, + "loss": 15.2872, + "step": 1946 + }, + { + "epoch": 0.11163030702634522, + "grad_norm": 0.0, + "learning_rate": 1.623189896430542e-06, + "loss": 15.1409, + "step": 1947 + }, + { + "epoch": 0.11168764154459192, + "grad_norm": 0.0, + "learning_rate": 1.618628099722957e-06, + "loss": 15.1526, + "step": 1948 + }, + { + "epoch": 0.11174497606283863, + "grad_norm": 0.0, + "learning_rate": 1.6140714839652838e-06, + "loss": 15.0921, + "step": 1949 + }, + { + "epoch": 0.11180231058108535, + "grad_norm": 0.0, + "learning_rate": 1.609520056139185e-06, + "loss": 15.1327, + "step": 1950 + }, + { + "epoch": 0.11185964509933205, + "grad_norm": 0.0, + "learning_rate": 1.604973823218376e-06, + "loss": 14.6891, + "step": 1951 + }, + { + "epoch": 0.11191697961757877, + "grad_norm": 0.0, + "learning_rate": 1.6004327921686086e-06, + "loss": 15.1599, + "step": 1952 + }, + { + "epoch": 0.11197431413582547, + "grad_norm": 0.0, + "learning_rate": 1.5958969699476689e-06, + "loss": 14.7748, + "step": 1953 + }, + { + "epoch": 0.11203164865407218, + "grad_norm": 0.0, + "learning_rate": 1.5913663635053578e-06, + "loss": 14.9859, + "step": 1954 + }, + { + "epoch": 0.1120889831723189, + "grad_norm": 0.0, + "learning_rate": 1.5868409797834882e-06, + "loss": 15.2427, + "step": 1955 + }, + { + "epoch": 0.1121463176905656, + "grad_norm": 0.0, + "learning_rate": 1.582320825715868e-06, + "loss": 14.8814, + "step": 1956 + }, + { + "epoch": 0.11220365220881232, + "grad_norm": 0.0, + "learning_rate": 1.5778059082282932e-06, + "loss": 15.1055, + "step": 1957 + }, + { + "epoch": 0.11226098672705903, + "grad_norm": 0.0, + "learning_rate": 1.573296234238534e-06, + "loss": 15.1705, + "step": 1958 + }, + { + "epoch": 0.11231832124530573, + "grad_norm": 0.0, + "learning_rate": 1.5687918106563326e-06, + "loss": 14.8643, + "step": 1959 + }, + { + "epoch": 0.11237565576355245, + "grad_norm": 0.0, + "learning_rate": 1.56429264438338e-06, + "loss": 14.8809, + "step": 1960 + }, + { + "epoch": 0.11243299028179916, + "grad_norm": 0.0, + "learning_rate": 1.5597987423133166e-06, + "loss": 14.9778, + "step": 1961 + }, + { + "epoch": 0.11249032480004587, + "grad_norm": 0.0, + "learning_rate": 1.5553101113317137e-06, + "loss": 14.9691, + "step": 1962 + }, + { + "epoch": 0.11254765931829258, + "grad_norm": 0.0, + "learning_rate": 1.550826758316068e-06, + "loss": 15.1386, + "step": 1963 + }, + { + "epoch": 0.11260499383653928, + "grad_norm": 0.0, + "learning_rate": 1.546348690135786e-06, + "loss": 15.2445, + "step": 1964 + }, + { + "epoch": 0.112662328354786, + "grad_norm": 0.0, + "learning_rate": 1.5418759136521844e-06, + "loss": 15.0999, + "step": 1965 + }, + { + "epoch": 0.11271966287303271, + "grad_norm": 0.0, + "learning_rate": 1.5374084357184621e-06, + "loss": 15.0017, + "step": 1966 + }, + { + "epoch": 0.11277699739127942, + "grad_norm": 0.0, + "learning_rate": 1.5329462631797092e-06, + "loss": 15.1215, + "step": 1967 + }, + { + "epoch": 0.11283433190952613, + "grad_norm": 0.0, + "learning_rate": 1.528489402872878e-06, + "loss": 15.2549, + "step": 1968 + }, + { + "epoch": 0.11289166642777285, + "grad_norm": 0.0, + "learning_rate": 1.5240378616267887e-06, + "loss": 14.8262, + "step": 1969 + }, + { + "epoch": 0.11294900094601955, + "grad_norm": 0.0, + "learning_rate": 1.5195916462621074e-06, + "loss": 15.219, + "step": 1970 + }, + { + "epoch": 0.11300633546426626, + "grad_norm": 0.0, + "learning_rate": 1.5151507635913403e-06, + "loss": 15.0064, + "step": 1971 + }, + { + "epoch": 0.11306366998251297, + "grad_norm": 0.0, + "learning_rate": 1.510715220418823e-06, + "loss": 15.0459, + "step": 1972 + }, + { + "epoch": 0.11312100450075968, + "grad_norm": 0.0, + "learning_rate": 1.5062850235407118e-06, + "loss": 14.745, + "step": 1973 + }, + { + "epoch": 0.1131783390190064, + "grad_norm": 0.0, + "learning_rate": 1.5018601797449683e-06, + "loss": 15.1417, + "step": 1974 + }, + { + "epoch": 0.1132356735372531, + "grad_norm": 0.0, + "learning_rate": 1.4974406958113557e-06, + "loss": 14.9677, + "step": 1975 + }, + { + "epoch": 0.11329300805549981, + "grad_norm": 0.0, + "learning_rate": 1.4930265785114224e-06, + "loss": 15.0245, + "step": 1976 + }, + { + "epoch": 0.11335034257374653, + "grad_norm": 0.0, + "learning_rate": 1.4886178346084934e-06, + "loss": 15.0466, + "step": 1977 + }, + { + "epoch": 0.11340767709199323, + "grad_norm": 0.0, + "learning_rate": 1.4842144708576606e-06, + "loss": 15.0459, + "step": 1978 + }, + { + "epoch": 0.11346501161023995, + "grad_norm": 0.0, + "learning_rate": 1.4798164940057769e-06, + "loss": 15.342, + "step": 1979 + }, + { + "epoch": 0.11352234612848666, + "grad_norm": 0.0, + "learning_rate": 1.4754239107914337e-06, + "loss": 15.3308, + "step": 1980 + }, + { + "epoch": 0.11357968064673336, + "grad_norm": 0.0, + "learning_rate": 1.4710367279449662e-06, + "loss": 15.0959, + "step": 1981 + }, + { + "epoch": 0.11363701516498008, + "grad_norm": 0.0, + "learning_rate": 1.4666549521884283e-06, + "loss": 15.3118, + "step": 1982 + }, + { + "epoch": 0.11369434968322678, + "grad_norm": 0.0, + "learning_rate": 1.4622785902355967e-06, + "loss": 15.0102, + "step": 1983 + }, + { + "epoch": 0.1137516842014735, + "grad_norm": 0.0, + "learning_rate": 1.457907648791943e-06, + "loss": 15.2011, + "step": 1984 + }, + { + "epoch": 0.11380901871972021, + "grad_norm": 0.0, + "learning_rate": 1.4535421345546424e-06, + "loss": 14.8329, + "step": 1985 + }, + { + "epoch": 0.11386635323796691, + "grad_norm": 0.0, + "learning_rate": 1.4491820542125495e-06, + "loss": 14.8631, + "step": 1986 + }, + { + "epoch": 0.11392368775621363, + "grad_norm": 0.0, + "learning_rate": 1.4448274144461965e-06, + "loss": 15.0568, + "step": 1987 + }, + { + "epoch": 0.11398102227446034, + "grad_norm": 0.0, + "learning_rate": 1.4404782219277758e-06, + "loss": 14.9962, + "step": 1988 + }, + { + "epoch": 0.11403835679270705, + "grad_norm": 0.0, + "learning_rate": 1.4361344833211377e-06, + "loss": 14.908, + "step": 1989 + }, + { + "epoch": 0.11409569131095376, + "grad_norm": 0.0, + "learning_rate": 1.431796205281773e-06, + "loss": 14.9463, + "step": 1990 + }, + { + "epoch": 0.11415302582920046, + "grad_norm": 0.0, + "learning_rate": 1.4274633944568056e-06, + "loss": 14.7642, + "step": 1991 + }, + { + "epoch": 0.11421036034744718, + "grad_norm": 0.0, + "learning_rate": 1.423136057484983e-06, + "loss": 14.9915, + "step": 1992 + }, + { + "epoch": 0.1142676948656939, + "grad_norm": 0.0, + "learning_rate": 1.4188142009966689e-06, + "loss": 15.1879, + "step": 1993 + }, + { + "epoch": 0.1143250293839406, + "grad_norm": 0.0, + "learning_rate": 1.414497831613823e-06, + "loss": 15.253, + "step": 1994 + }, + { + "epoch": 0.11438236390218731, + "grad_norm": 0.0, + "learning_rate": 1.410186955950006e-06, + "loss": 15.1298, + "step": 1995 + }, + { + "epoch": 0.11443969842043403, + "grad_norm": 0.0, + "learning_rate": 1.4058815806103542e-06, + "loss": 15.3996, + "step": 1996 + }, + { + "epoch": 0.11449703293868073, + "grad_norm": 0.0, + "learning_rate": 1.4015817121915792e-06, + "loss": 14.8465, + "step": 1997 + }, + { + "epoch": 0.11455436745692744, + "grad_norm": 0.0, + "learning_rate": 1.3972873572819535e-06, + "loss": 15.2481, + "step": 1998 + }, + { + "epoch": 0.11461170197517416, + "grad_norm": 0.0, + "learning_rate": 1.3929985224613051e-06, + "loss": 15.0481, + "step": 1999 + }, + { + "epoch": 0.11466903649342086, + "grad_norm": 0.0, + "learning_rate": 1.3887152143009992e-06, + "loss": 15.2159, + "step": 2000 + } + ], + "logging_steps": 1.0, + "max_steps": 2617, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.7098723774976492e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}