{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992952783650458, "eval_steps": 500, "global_step": 709, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.5454545454545455e-06, "loss": 1.019, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.090909090909091e-06, "loss": 1.0036, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.3636363636363637e-05, "loss": 0.9864, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.8181818181818182e-05, "loss": 1.0294, "step": 4 }, { "epoch": 0.01, "learning_rate": 2.272727272727273e-05, "loss": 1.0158, "step": 5 }, { "epoch": 0.01, "learning_rate": 2.7272727272727273e-05, "loss": 1.005, "step": 6 }, { "epoch": 0.01, "learning_rate": 3.181818181818182e-05, "loss": 0.999, "step": 7 }, { "epoch": 0.01, "learning_rate": 3.6363636363636364e-05, "loss": 1.1149, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.0909090909090915e-05, "loss": 1.1047, "step": 9 }, { "epoch": 0.01, "learning_rate": 4.545454545454546e-05, "loss": 1.1206, "step": 10 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 1.1102, "step": 11 }, { "epoch": 0.02, "learning_rate": 5.4545454545454546e-05, "loss": 1.1411, "step": 12 }, { "epoch": 0.02, "learning_rate": 5.90909090909091e-05, "loss": 1.1526, "step": 13 }, { "epoch": 0.02, "learning_rate": 6.363636363636364e-05, "loss": 1.1397, "step": 14 }, { "epoch": 0.02, "learning_rate": 6.818181818181818e-05, "loss": 1.1834, "step": 15 }, { "epoch": 0.02, "learning_rate": 7.272727272727273e-05, "loss": 1.2163, "step": 16 }, { "epoch": 0.02, "learning_rate": 7.727272727272727e-05, "loss": 1.2801, "step": 17 }, { "epoch": 0.03, "learning_rate": 8.181818181818183e-05, "loss": 1.2758, "step": 18 }, { "epoch": 0.03, "learning_rate": 8.636363636363637e-05, "loss": 1.2541, "step": 19 }, { "epoch": 0.03, "learning_rate": 9.090909090909092e-05, "loss": 1.2804, "step": 20 }, { "epoch": 0.03, "learning_rate": 9.545454545454546e-05, "loss": 1.2869, "step": 21 }, { "epoch": 0.03, "learning_rate": 0.0001, "loss": 1.261, "step": 22 }, { "epoch": 0.03, "learning_rate": 9.999947721210493e-05, "loss": 1.2938, "step": 23 }, { "epoch": 0.03, "learning_rate": 9.999790885935198e-05, "loss": 1.3089, "step": 24 }, { "epoch": 0.04, "learning_rate": 9.999529497453782e-05, "loss": 1.3996, "step": 25 }, { "epoch": 0.04, "learning_rate": 9.999163561232272e-05, "loss": 1.4033, "step": 26 }, { "epoch": 0.04, "learning_rate": 9.998693084922947e-05, "loss": 1.4061, "step": 27 }, { "epoch": 0.04, "learning_rate": 9.998118078364184e-05, "loss": 1.3929, "step": 28 }, { "epoch": 0.04, "learning_rate": 9.997438553580241e-05, "loss": 1.4128, "step": 29 }, { "epoch": 0.04, "learning_rate": 9.996654524781009e-05, "loss": 1.4063, "step": 30 }, { "epoch": 0.04, "learning_rate": 9.995766008361719e-05, "loss": 1.4088, "step": 31 }, { "epoch": 0.05, "learning_rate": 9.994773022902597e-05, "loss": 1.3899, "step": 32 }, { "epoch": 0.05, "learning_rate": 9.993675589168473e-05, "loss": 1.3813, "step": 33 }, { "epoch": 0.05, "learning_rate": 9.992473730108354e-05, "loss": 1.3964, "step": 34 }, { "epoch": 0.05, "learning_rate": 9.99116747085493e-05, "loss": 1.3606, "step": 35 }, { "epoch": 0.05, "learning_rate": 9.989756838724064e-05, "loss": 1.4069, "step": 36 }, { "epoch": 0.05, "learning_rate": 9.988241863214211e-05, "loss": 1.4116, "step": 37 }, { "epoch": 0.05, "learning_rate": 9.986622576005806e-05, "loss": 1.3307, "step": 38 }, { "epoch": 0.05, "learning_rate": 9.984899010960601e-05, "loss": 1.3983, "step": 39 }, { "epoch": 0.06, "learning_rate": 9.983071204120951e-05, "loss": 1.3725, "step": 40 }, { "epoch": 0.06, "learning_rate": 9.981139193709068e-05, "loss": 1.4223, "step": 41 }, { "epoch": 0.06, "learning_rate": 9.979103020126218e-05, "loss": 1.3844, "step": 42 }, { "epoch": 0.06, "learning_rate": 9.976962725951878e-05, "loss": 1.4282, "step": 43 }, { "epoch": 0.06, "learning_rate": 9.974718355942843e-05, "loss": 1.3989, "step": 44 }, { "epoch": 0.06, "learning_rate": 9.972369957032293e-05, "loss": 1.393, "step": 45 }, { "epoch": 0.06, "learning_rate": 9.969917578328808e-05, "loss": 1.4606, "step": 46 }, { "epoch": 0.07, "learning_rate": 9.967361271115343e-05, "loss": 1.4384, "step": 47 }, { "epoch": 0.07, "learning_rate": 9.964701088848158e-05, "loss": 1.478, "step": 48 }, { "epoch": 0.07, "learning_rate": 9.961937087155697e-05, "loss": 1.4254, "step": 49 }, { "epoch": 0.07, "learning_rate": 9.959069323837424e-05, "loss": 1.454, "step": 50 }, { "epoch": 0.07, "learning_rate": 9.956097858862619e-05, "loss": 1.4448, "step": 51 }, { "epoch": 0.07, "learning_rate": 9.953022754369114e-05, "loss": 1.4567, "step": 52 }, { "epoch": 0.07, "learning_rate": 9.94984407466201e-05, "loss": 1.4563, "step": 53 }, { "epoch": 0.08, "learning_rate": 9.946561886212315e-05, "loss": 1.438, "step": 54 }, { "epoch": 0.08, "learning_rate": 9.943176257655567e-05, "loss": 1.4096, "step": 55 }, { "epoch": 0.08, "learning_rate": 9.93968725979039e-05, "loss": 1.4472, "step": 56 }, { "epoch": 0.08, "learning_rate": 9.936094965577017e-05, "loss": 1.4223, "step": 57 }, { "epoch": 0.08, "learning_rate": 9.932399450135766e-05, "loss": 1.439, "step": 58 }, { "epoch": 0.08, "learning_rate": 9.928600790745466e-05, "loss": 1.4606, "step": 59 }, { "epoch": 0.08, "learning_rate": 9.924699066841845e-05, "loss": 1.5223, "step": 60 }, { "epoch": 0.09, "learning_rate": 9.920694360015863e-05, "loss": 1.4791, "step": 61 }, { "epoch": 0.09, "learning_rate": 9.916586754012008e-05, "loss": 1.4432, "step": 62 }, { "epoch": 0.09, "learning_rate": 9.91237633472655e-05, "loss": 1.4596, "step": 63 }, { "epoch": 0.09, "learning_rate": 9.908063190205738e-05, "loss": 1.4488, "step": 64 }, { "epoch": 0.09, "learning_rate": 9.903647410643963e-05, "loss": 1.4542, "step": 65 }, { "epoch": 0.09, "learning_rate": 9.899129088381866e-05, "loss": 1.443, "step": 66 }, { "epoch": 0.09, "learning_rate": 9.894508317904419e-05, "loss": 1.5506, "step": 67 }, { "epoch": 0.1, "learning_rate": 9.88978519583893e-05, "loss": 1.4963, "step": 68 }, { "epoch": 0.1, "learning_rate": 9.884959820953048e-05, "loss": 1.4135, "step": 69 }, { "epoch": 0.1, "learning_rate": 9.880032294152673e-05, "loss": 1.4813, "step": 70 }, { "epoch": 0.1, "learning_rate": 9.875002718479858e-05, "loss": 1.4471, "step": 71 }, { "epoch": 0.1, "learning_rate": 9.869871199110656e-05, "loss": 1.4165, "step": 72 }, { "epoch": 0.1, "learning_rate": 9.864637843352915e-05, "loss": 1.4395, "step": 73 }, { "epoch": 0.1, "learning_rate": 9.859302760644036e-05, "loss": 1.4375, "step": 74 }, { "epoch": 0.11, "learning_rate": 9.853866062548687e-05, "loss": 1.4995, "step": 75 }, { "epoch": 0.11, "learning_rate": 9.848327862756466e-05, "loss": 1.4465, "step": 76 }, { "epoch": 0.11, "learning_rate": 9.842688277079523e-05, "loss": 1.4374, "step": 77 }, { "epoch": 0.11, "learning_rate": 9.836947423450147e-05, "loss": 1.4476, "step": 78 }, { "epoch": 0.11, "learning_rate": 9.831105421918287e-05, "loss": 1.437, "step": 79 }, { "epoch": 0.11, "learning_rate": 9.825162394649048e-05, "loss": 1.41, "step": 80 }, { "epoch": 0.11, "learning_rate": 9.819118465920143e-05, "loss": 1.469, "step": 81 }, { "epoch": 0.12, "learning_rate": 9.812973762119281e-05, "loss": 1.4168, "step": 82 }, { "epoch": 0.12, "learning_rate": 9.806728411741533e-05, "loss": 1.5309, "step": 83 }, { "epoch": 0.12, "learning_rate": 9.800382545386641e-05, "loss": 1.4883, "step": 84 }, { "epoch": 0.12, "learning_rate": 9.79393629575629e-05, "loss": 1.4679, "step": 85 }, { "epoch": 0.12, "learning_rate": 9.787389797651334e-05, "loss": 1.5121, "step": 86 }, { "epoch": 0.12, "learning_rate": 9.780743187968968e-05, "loss": 1.4483, "step": 87 }, { "epoch": 0.12, "learning_rate": 9.773996605699875e-05, "loss": 1.4756, "step": 88 }, { "epoch": 0.13, "learning_rate": 9.767150191925321e-05, "loss": 1.4383, "step": 89 }, { "epoch": 0.13, "learning_rate": 9.760204089814192e-05, "loss": 1.4457, "step": 90 }, { "epoch": 0.13, "learning_rate": 9.753158444620013e-05, "loss": 1.4332, "step": 91 }, { "epoch": 0.13, "learning_rate": 9.746013403677905e-05, "loss": 1.4486, "step": 92 }, { "epoch": 0.13, "learning_rate": 9.738769116401505e-05, "loss": 1.4434, "step": 93 }, { "epoch": 0.13, "learning_rate": 9.73142573427984e-05, "loss": 1.4616, "step": 94 }, { "epoch": 0.13, "learning_rate": 9.723983410874163e-05, "loss": 1.4519, "step": 95 }, { "epoch": 0.14, "learning_rate": 9.716442301814735e-05, "loss": 1.4913, "step": 96 }, { "epoch": 0.14, "learning_rate": 9.708802564797578e-05, "loss": 1.4632, "step": 97 }, { "epoch": 0.14, "learning_rate": 9.701064359581176e-05, "loss": 1.4475, "step": 98 }, { "epoch": 0.14, "learning_rate": 9.693227847983126e-05, "loss": 1.4445, "step": 99 }, { "epoch": 0.14, "learning_rate": 9.685293193876765e-05, "loss": 1.458, "step": 100 }, { "epoch": 0.14, "learning_rate": 9.67726056318774e-05, "loss": 1.4125, "step": 101 }, { "epoch": 0.14, "learning_rate": 9.669130123890533e-05, "loss": 1.4348, "step": 102 }, { "epoch": 0.15, "learning_rate": 9.660902046004953e-05, "loss": 1.4535, "step": 103 }, { "epoch": 0.15, "learning_rate": 9.652576501592583e-05, "loss": 1.4669, "step": 104 }, { "epoch": 0.15, "learning_rate": 9.644153664753173e-05, "loss": 1.4212, "step": 105 }, { "epoch": 0.15, "learning_rate": 9.635633711621012e-05, "loss": 1.4407, "step": 106 }, { "epoch": 0.15, "learning_rate": 9.627016820361235e-05, "loss": 1.4285, "step": 107 }, { "epoch": 0.15, "learning_rate": 9.618303171166094e-05, "loss": 1.4433, "step": 108 }, { "epoch": 0.15, "learning_rate": 9.60949294625121e-05, "loss": 1.4629, "step": 109 }, { "epoch": 0.16, "learning_rate": 9.600586329851735e-05, "loss": 1.4309, "step": 110 }, { "epoch": 0.16, "learning_rate": 9.59158350821852e-05, "loss": 1.4424, "step": 111 }, { "epoch": 0.16, "learning_rate": 9.582484669614211e-05, "loss": 1.5388, "step": 112 }, { "epoch": 0.16, "learning_rate": 9.573290004309318e-05, "loss": 1.4657, "step": 113 }, { "epoch": 0.16, "learning_rate": 9.563999704578226e-05, "loss": 1.4654, "step": 114 }, { "epoch": 0.16, "learning_rate": 9.554613964695189e-05, "loss": 1.4174, "step": 115 }, { "epoch": 0.16, "learning_rate": 9.545132980930251e-05, "loss": 1.4004, "step": 116 }, { "epoch": 0.16, "learning_rate": 9.535556951545157e-05, "loss": 1.4183, "step": 117 }, { "epoch": 0.17, "learning_rate": 9.525886076789194e-05, "loss": 1.4468, "step": 118 }, { "epoch": 0.17, "learning_rate": 9.516120558895014e-05, "loss": 1.429, "step": 119 }, { "epoch": 0.17, "learning_rate": 9.506260602074398e-05, "loss": 1.3977, "step": 120 }, { "epoch": 0.17, "learning_rate": 9.496306412513988e-05, "loss": 1.433, "step": 121 }, { "epoch": 0.17, "learning_rate": 9.486258198370981e-05, "loss": 1.4687, "step": 122 }, { "epoch": 0.17, "learning_rate": 9.47611616976876e-05, "loss": 1.4017, "step": 123 }, { "epoch": 0.17, "learning_rate": 9.465880538792518e-05, "loss": 1.4283, "step": 124 }, { "epoch": 0.18, "learning_rate": 9.455551519484816e-05, "loss": 1.4018, "step": 125 }, { "epoch": 0.18, "learning_rate": 9.445129327841102e-05, "loss": 1.4522, "step": 126 }, { "epoch": 0.18, "learning_rate": 9.434614181805202e-05, "loss": 1.4229, "step": 127 }, { "epoch": 0.18, "learning_rate": 9.424006301264761e-05, "loss": 1.4334, "step": 128 }, { "epoch": 0.18, "learning_rate": 9.413305908046636e-05, "loss": 1.4241, "step": 129 }, { "epoch": 0.18, "learning_rate": 9.402513225912273e-05, "loss": 1.4142, "step": 130 }, { "epoch": 0.18, "learning_rate": 9.391628480553013e-05, "loss": 1.4467, "step": 131 }, { "epoch": 0.19, "learning_rate": 9.38065189958538e-05, "loss": 1.4676, "step": 132 }, { "epoch": 0.19, "learning_rate": 9.369583712546322e-05, "loss": 1.4513, "step": 133 }, { "epoch": 0.19, "learning_rate": 9.358424150888405e-05, "loss": 1.4245, "step": 134 }, { "epoch": 0.19, "learning_rate": 9.347173447974982e-05, "loss": 1.406, "step": 135 }, { "epoch": 0.19, "learning_rate": 9.335831839075304e-05, "loss": 1.3876, "step": 136 }, { "epoch": 0.19, "learning_rate": 9.324399561359602e-05, "loss": 1.352, "step": 137 }, { "epoch": 0.19, "learning_rate": 9.312876853894134e-05, "loss": 1.4428, "step": 138 }, { "epoch": 0.2, "learning_rate": 9.30126395763618e-05, "loss": 1.4213, "step": 139 }, { "epoch": 0.2, "learning_rate": 9.289561115429004e-05, "loss": 1.3948, "step": 140 }, { "epoch": 0.2, "learning_rate": 9.277768571996772e-05, "loss": 1.4475, "step": 141 }, { "epoch": 0.2, "learning_rate": 9.265886573939447e-05, "loss": 1.4342, "step": 142 }, { "epoch": 0.2, "learning_rate": 9.253915369727617e-05, "loss": 1.3901, "step": 143 }, { "epoch": 0.2, "learning_rate": 9.241855209697307e-05, "loss": 1.4319, "step": 144 }, { "epoch": 0.2, "learning_rate": 9.229706346044747e-05, "loss": 1.4263, "step": 145 }, { "epoch": 0.21, "learning_rate": 9.217469032821088e-05, "loss": 1.3893, "step": 146 }, { "epoch": 0.21, "learning_rate": 9.205143525927103e-05, "loss": 1.4633, "step": 147 }, { "epoch": 0.21, "learning_rate": 9.192730083107819e-05, "loss": 1.4429, "step": 148 }, { "epoch": 0.21, "learning_rate": 9.180228963947144e-05, "loss": 1.3981, "step": 149 }, { "epoch": 0.21, "learning_rate": 9.167640429862429e-05, "loss": 1.441, "step": 150 }, { "epoch": 0.21, "learning_rate": 9.154964744099006e-05, "loss": 1.4101, "step": 151 }, { "epoch": 0.21, "learning_rate": 9.142202171724674e-05, "loss": 1.4725, "step": 152 }, { "epoch": 0.22, "learning_rate": 9.129352979624169e-05, "loss": 1.3903, "step": 153 }, { "epoch": 0.22, "learning_rate": 9.116417436493574e-05, "loss": 1.4252, "step": 154 }, { "epoch": 0.22, "learning_rate": 9.103395812834705e-05, "loss": 1.4432, "step": 155 }, { "epoch": 0.22, "learning_rate": 9.09028838094945e-05, "loss": 1.4416, "step": 156 }, { "epoch": 0.22, "learning_rate": 9.077095414934075e-05, "loss": 1.3401, "step": 157 }, { "epoch": 0.22, "learning_rate": 9.063817190673503e-05, "loss": 1.4355, "step": 158 }, { "epoch": 0.22, "learning_rate": 9.050453985835527e-05, "loss": 1.4083, "step": 159 }, { "epoch": 0.23, "learning_rate": 9.037006079865016e-05, "loss": 1.4175, "step": 160 }, { "epoch": 0.23, "learning_rate": 9.023473753978069e-05, "loss": 1.4193, "step": 161 }, { "epoch": 0.23, "learning_rate": 9.009857291156134e-05, "loss": 1.4148, "step": 162 }, { "epoch": 0.23, "learning_rate": 8.996156976140086e-05, "loss": 1.4227, "step": 163 }, { "epoch": 0.23, "learning_rate": 8.98237309542428e-05, "loss": 1.3991, "step": 164 }, { "epoch": 0.23, "learning_rate": 8.968505937250555e-05, "loss": 1.4063, "step": 165 }, { "epoch": 0.23, "learning_rate": 8.954555791602211e-05, "loss": 1.4072, "step": 166 }, { "epoch": 0.24, "learning_rate": 8.940522950197935e-05, "loss": 1.4471, "step": 167 }, { "epoch": 0.24, "learning_rate": 8.926407706485713e-05, "loss": 1.3875, "step": 168 }, { "epoch": 0.24, "learning_rate": 8.91221035563669e-05, "loss": 1.469, "step": 169 }, { "epoch": 0.24, "learning_rate": 8.897931194538989e-05, "loss": 1.4398, "step": 170 }, { "epoch": 0.24, "learning_rate": 8.883570521791514e-05, "loss": 1.436, "step": 171 }, { "epoch": 0.24, "learning_rate": 8.869128637697702e-05, "loss": 1.4175, "step": 172 }, { "epoch": 0.24, "learning_rate": 8.854605844259237e-05, "loss": 1.3957, "step": 173 }, { "epoch": 0.25, "learning_rate": 8.840002445169746e-05, "loss": 1.4846, "step": 174 }, { "epoch": 0.25, "learning_rate": 8.82531874580844e-05, "loss": 1.3793, "step": 175 }, { "epoch": 0.25, "learning_rate": 8.810555053233729e-05, "loss": 1.4326, "step": 176 }, { "epoch": 0.25, "learning_rate": 8.795711676176803e-05, "loss": 1.4546, "step": 177 }, { "epoch": 0.25, "learning_rate": 8.780788925035178e-05, "loss": 1.4048, "step": 178 }, { "epoch": 0.25, "learning_rate": 8.765787111866198e-05, "loss": 1.3985, "step": 179 }, { "epoch": 0.25, "learning_rate": 8.750706550380518e-05, "loss": 1.3538, "step": 180 }, { "epoch": 0.26, "learning_rate": 8.735547555935537e-05, "loss": 1.402, "step": 181 }, { "epoch": 0.26, "learning_rate": 8.720310445528807e-05, "loss": 1.3701, "step": 182 }, { "epoch": 0.26, "learning_rate": 8.704995537791405e-05, "loss": 1.3698, "step": 183 }, { "epoch": 0.26, "learning_rate": 8.689603152981263e-05, "loss": 1.3827, "step": 184 }, { "epoch": 0.26, "learning_rate": 8.674133612976481e-05, "loss": 1.463, "step": 185 }, { "epoch": 0.26, "learning_rate": 8.658587241268587e-05, "loss": 1.363, "step": 186 }, { "epoch": 0.26, "learning_rate": 8.642964362955781e-05, "loss": 1.4031, "step": 187 }, { "epoch": 0.26, "learning_rate": 8.627265304736131e-05, "loss": 1.4026, "step": 188 }, { "epoch": 0.27, "learning_rate": 8.611490394900739e-05, "loss": 1.45, "step": 189 }, { "epoch": 0.27, "learning_rate": 8.595639963326881e-05, "loss": 1.3965, "step": 190 }, { "epoch": 0.27, "learning_rate": 8.579714341471106e-05, "loss": 1.4117, "step": 191 }, { "epoch": 0.27, "learning_rate": 8.56371386236231e-05, "loss": 1.3525, "step": 192 }, { "epoch": 0.27, "learning_rate": 8.547638860594764e-05, "loss": 1.4094, "step": 193 }, { "epoch": 0.27, "learning_rate": 8.531489672321122e-05, "loss": 1.3721, "step": 194 }, { "epoch": 0.27, "learning_rate": 8.515266635245389e-05, "loss": 1.3894, "step": 195 }, { "epoch": 0.28, "learning_rate": 8.498970088615861e-05, "loss": 1.3716, "step": 196 }, { "epoch": 0.28, "learning_rate": 8.48260037321803e-05, "loss": 1.4026, "step": 197 }, { "epoch": 0.28, "learning_rate": 8.46615783136746e-05, "loss": 1.3507, "step": 198 }, { "epoch": 0.28, "learning_rate": 8.449642806902623e-05, "loss": 1.3839, "step": 199 }, { "epoch": 0.28, "learning_rate": 8.433055645177714e-05, "loss": 1.3451, "step": 200 }, { "epoch": 0.28, "learning_rate": 8.416396693055429e-05, "loss": 1.3518, "step": 201 }, { "epoch": 0.28, "learning_rate": 8.399666298899706e-05, "loss": 1.3893, "step": 202 }, { "epoch": 0.29, "learning_rate": 8.382864812568452e-05, "loss": 1.3508, "step": 203 }, { "epoch": 0.29, "learning_rate": 8.365992585406207e-05, "loss": 1.335, "step": 204 }, { "epoch": 0.29, "learning_rate": 8.34904997023682e-05, "loss": 1.3734, "step": 205 }, { "epoch": 0.29, "learning_rate": 8.332037321356057e-05, "loss": 1.3876, "step": 206 }, { "epoch": 0.29, "learning_rate": 8.31495499452419e-05, "loss": 1.398, "step": 207 }, { "epoch": 0.29, "learning_rate": 8.297803346958571e-05, "loss": 1.4288, "step": 208 }, { "epoch": 0.29, "learning_rate": 8.280582737326146e-05, "loss": 1.389, "step": 209 }, { "epoch": 0.3, "learning_rate": 8.263293525735967e-05, "loss": 1.3819, "step": 210 }, { "epoch": 0.3, "learning_rate": 8.245936073731653e-05, "loss": 1.3584, "step": 211 }, { "epoch": 0.3, "learning_rate": 8.228510744283837e-05, "loss": 1.3803, "step": 212 }, { "epoch": 0.3, "learning_rate": 8.211017901782574e-05, "loss": 1.4161, "step": 213 }, { "epoch": 0.3, "learning_rate": 8.193457912029713e-05, "loss": 1.3701, "step": 214 }, { "epoch": 0.3, "learning_rate": 8.175831142231258e-05, "loss": 1.4184, "step": 215 }, { "epoch": 0.3, "learning_rate": 8.158137960989685e-05, "loss": 1.4, "step": 216 }, { "epoch": 0.31, "learning_rate": 8.140378738296233e-05, "loss": 1.3785, "step": 217 }, { "epoch": 0.31, "learning_rate": 8.122553845523166e-05, "loss": 1.4217, "step": 218 }, { "epoch": 0.31, "learning_rate": 8.104663655416014e-05, "loss": 1.3014, "step": 219 }, { "epoch": 0.31, "learning_rate": 8.086708542085768e-05, "loss": 1.3709, "step": 220 }, { "epoch": 0.31, "learning_rate": 8.068688881001065e-05, "loss": 1.3432, "step": 221 }, { "epoch": 0.31, "learning_rate": 8.050605048980333e-05, "loss": 1.3728, "step": 222 }, { "epoch": 0.31, "learning_rate": 8.03245742418391e-05, "loss": 1.4366, "step": 223 }, { "epoch": 0.32, "learning_rate": 8.014246386106138e-05, "loss": 1.3944, "step": 224 }, { "epoch": 0.32, "learning_rate": 7.995972315567431e-05, "loss": 1.4022, "step": 225 }, { "epoch": 0.32, "learning_rate": 7.977635594706299e-05, "loss": 1.3549, "step": 226 }, { "epoch": 0.32, "learning_rate": 7.959236606971375e-05, "loss": 1.4002, "step": 227 }, { "epoch": 0.32, "learning_rate": 7.940775737113378e-05, "loss": 1.3788, "step": 228 }, { "epoch": 0.32, "learning_rate": 7.922253371177082e-05, "loss": 1.3339, "step": 229 }, { "epoch": 0.32, "learning_rate": 7.903669896493233e-05, "loss": 1.3297, "step": 230 }, { "epoch": 0.33, "learning_rate": 7.885025701670457e-05, "loss": 1.2987, "step": 231 }, { "epoch": 0.33, "learning_rate": 7.866321176587129e-05, "loss": 1.4247, "step": 232 }, { "epoch": 0.33, "learning_rate": 7.84755671238322e-05, "loss": 1.3754, "step": 233 }, { "epoch": 0.33, "learning_rate": 7.828732701452119e-05, "loss": 1.3807, "step": 234 }, { "epoch": 0.33, "learning_rate": 7.809849537432432e-05, "loss": 1.3654, "step": 235 }, { "epoch": 0.33, "learning_rate": 7.790907615199736e-05, "loss": 1.3311, "step": 236 }, { "epoch": 0.33, "learning_rate": 7.771907330858341e-05, "loss": 1.3283, "step": 237 }, { "epoch": 0.34, "learning_rate": 7.752849081732993e-05, "loss": 1.3423, "step": 238 }, { "epoch": 0.34, "learning_rate": 7.733733266360568e-05, "loss": 1.37, "step": 239 }, { "epoch": 0.34, "learning_rate": 7.714560284481742e-05, "loss": 1.355, "step": 240 }, { "epoch": 0.34, "learning_rate": 7.695330537032628e-05, "loss": 1.397, "step": 241 }, { "epoch": 0.34, "learning_rate": 7.676044426136397e-05, "loss": 1.3589, "step": 242 }, { "epoch": 0.34, "learning_rate": 7.656702355094859e-05, "loss": 1.3804, "step": 243 }, { "epoch": 0.34, "learning_rate": 7.637304728380036e-05, "loss": 1.3695, "step": 244 }, { "epoch": 0.35, "learning_rate": 7.61785195162571e-05, "loss": 1.3546, "step": 245 }, { "epoch": 0.35, "learning_rate": 7.598344431618926e-05, "loss": 1.33, "step": 246 }, { "epoch": 0.35, "learning_rate": 7.578782576291501e-05, "loss": 1.3519, "step": 247 }, { "epoch": 0.35, "learning_rate": 7.559166794711476e-05, "loss": 1.3526, "step": 248 }, { "epoch": 0.35, "learning_rate": 7.539497497074584e-05, "loss": 1.37, "step": 249 }, { "epoch": 0.35, "learning_rate": 7.519775094695649e-05, "loss": 1.343, "step": 250 }, { "epoch": 0.35, "learning_rate": 7.500000000000001e-05, "loss": 1.3729, "step": 251 }, { "epoch": 0.36, "learning_rate": 7.480172626514845e-05, "loss": 1.344, "step": 252 }, { "epoch": 0.36, "learning_rate": 7.460293388860615e-05, "loss": 1.3071, "step": 253 }, { "epoch": 0.36, "learning_rate": 7.440362702742305e-05, "loss": 1.3789, "step": 254 }, { "epoch": 0.36, "learning_rate": 7.420380984940773e-05, "loss": 1.3207, "step": 255 }, { "epoch": 0.36, "learning_rate": 7.400348653304022e-05, "loss": 1.3365, "step": 256 }, { "epoch": 0.36, "learning_rate": 7.380266126738476e-05, "loss": 1.3616, "step": 257 }, { "epoch": 0.36, "learning_rate": 7.360133825200205e-05, "loss": 1.3094, "step": 258 }, { "epoch": 0.37, "learning_rate": 7.339952169686151e-05, "loss": 1.3491, "step": 259 }, { "epoch": 0.37, "learning_rate": 7.319721582225323e-05, "loss": 1.3591, "step": 260 }, { "epoch": 0.37, "learning_rate": 7.29944248586997e-05, "loss": 1.336, "step": 261 }, { "epoch": 0.37, "learning_rate": 7.279115304686735e-05, "loss": 1.3376, "step": 262 }, { "epoch": 0.37, "learning_rate": 7.258740463747788e-05, "loss": 1.3373, "step": 263 }, { "epoch": 0.37, "learning_rate": 7.238318389121939e-05, "loss": 1.3632, "step": 264 }, { "epoch": 0.37, "learning_rate": 7.217849507865724e-05, "loss": 1.3784, "step": 265 }, { "epoch": 0.37, "learning_rate": 7.197334248014477e-05, "loss": 1.3099, "step": 266 }, { "epoch": 0.38, "learning_rate": 7.176773038573377e-05, "loss": 1.3208, "step": 267 }, { "epoch": 0.38, "learning_rate": 7.156166309508482e-05, "loss": 1.3293, "step": 268 }, { "epoch": 0.38, "learning_rate": 7.13551449173773e-05, "loss": 1.3885, "step": 269 }, { "epoch": 0.38, "learning_rate": 7.114818017121939e-05, "loss": 1.354, "step": 270 }, { "epoch": 0.38, "learning_rate": 7.094077318455762e-05, "loss": 1.3362, "step": 271 }, { "epoch": 0.38, "learning_rate": 7.073292829458645e-05, "loss": 1.3203, "step": 272 }, { "epoch": 0.38, "learning_rate": 7.052464984765764e-05, "loss": 1.3777, "step": 273 }, { "epoch": 0.39, "learning_rate": 7.031594219918916e-05, "loss": 1.3297, "step": 274 }, { "epoch": 0.39, "learning_rate": 7.010680971357434e-05, "loss": 1.3249, "step": 275 }, { "epoch": 0.39, "learning_rate": 6.989725676409044e-05, "loss": 1.3271, "step": 276 }, { "epoch": 0.39, "learning_rate": 6.96872877328073e-05, "loss": 1.3234, "step": 277 }, { "epoch": 0.39, "learning_rate": 6.94769070104956e-05, "loss": 1.3597, "step": 278 }, { "epoch": 0.39, "learning_rate": 6.926611899653516e-05, "loss": 1.3421, "step": 279 }, { "epoch": 0.39, "learning_rate": 6.905492809882286e-05, "loss": 1.3048, "step": 280 }, { "epoch": 0.4, "learning_rate": 6.88433387336805e-05, "loss": 1.3737, "step": 281 }, { "epoch": 0.4, "learning_rate": 6.863135532576241e-05, "loss": 1.3015, "step": 282 }, { "epoch": 0.4, "learning_rate": 6.841898230796302e-05, "loss": 1.325, "step": 283 }, { "epoch": 0.4, "learning_rate": 6.820622412132402e-05, "loss": 1.3183, "step": 284 }, { "epoch": 0.4, "learning_rate": 6.799308521494156e-05, "loss": 1.3447, "step": 285 }, { "epoch": 0.4, "learning_rate": 6.777957004587331e-05, "loss": 1.2967, "step": 286 }, { "epoch": 0.4, "learning_rate": 6.756568307904508e-05, "loss": 1.3522, "step": 287 }, { "epoch": 0.41, "learning_rate": 6.735142878715754e-05, "loss": 1.3333, "step": 288 }, { "epoch": 0.41, "learning_rate": 6.713681165059271e-05, "loss": 1.4079, "step": 289 }, { "epoch": 0.41, "learning_rate": 6.692183615732025e-05, "loss": 1.2946, "step": 290 }, { "epoch": 0.41, "learning_rate": 6.670650680280358e-05, "loss": 1.3045, "step": 291 }, { "epoch": 0.41, "learning_rate": 6.649082808990586e-05, "loss": 1.358, "step": 292 }, { "epoch": 0.41, "learning_rate": 6.627480452879593e-05, "loss": 1.2442, "step": 293 }, { "epoch": 0.41, "learning_rate": 6.605844063685392e-05, "loss": 1.3063, "step": 294 }, { "epoch": 0.42, "learning_rate": 6.584174093857675e-05, "loss": 1.3009, "step": 295 }, { "epoch": 0.42, "learning_rate": 6.562470996548361e-05, "loss": 1.2866, "step": 296 }, { "epoch": 0.42, "learning_rate": 6.54073522560211e-05, "loss": 1.3432, "step": 297 }, { "epoch": 0.42, "learning_rate": 6.518967235546841e-05, "loss": 1.363, "step": 298 }, { "epoch": 0.42, "learning_rate": 6.497167481584221e-05, "loss": 1.3525, "step": 299 }, { "epoch": 0.42, "learning_rate": 6.475336419580151e-05, "loss": 1.3155, "step": 300 }, { "epoch": 0.42, "learning_rate": 6.453474506055228e-05, "loss": 1.3143, "step": 301 }, { "epoch": 0.43, "learning_rate": 6.431582198175203e-05, "loss": 1.3338, "step": 302 }, { "epoch": 0.43, "learning_rate": 6.409659953741416e-05, "loss": 1.3271, "step": 303 }, { "epoch": 0.43, "learning_rate": 6.387708231181229e-05, "loss": 1.2591, "step": 304 }, { "epoch": 0.43, "learning_rate": 6.365727489538437e-05, "loss": 1.2911, "step": 305 }, { "epoch": 0.43, "learning_rate": 6.343718188463663e-05, "loss": 1.2664, "step": 306 }, { "epoch": 0.43, "learning_rate": 6.321680788204758e-05, "loss": 1.2654, "step": 307 }, { "epoch": 0.43, "learning_rate": 6.299615749597165e-05, "loss": 1.256, "step": 308 }, { "epoch": 0.44, "learning_rate": 6.277523534054284e-05, "loss": 1.2852, "step": 309 }, { "epoch": 0.44, "learning_rate": 6.255404603557833e-05, "loss": 1.2917, "step": 310 }, { "epoch": 0.44, "learning_rate": 6.233259420648175e-05, "loss": 1.2603, "step": 311 }, { "epoch": 0.44, "learning_rate": 6.211088448414653e-05, "loss": 1.324, "step": 312 }, { "epoch": 0.44, "learning_rate": 6.188892150485903e-05, "loss": 1.3535, "step": 313 }, { "epoch": 0.44, "learning_rate": 6.166670991020162e-05, "loss": 1.2928, "step": 314 }, { "epoch": 0.44, "learning_rate": 6.144425434695551e-05, "loss": 1.3231, "step": 315 }, { "epoch": 0.45, "learning_rate": 6.122155946700381e-05, "loss": 1.313, "step": 316 }, { "epoch": 0.45, "learning_rate": 6.099862992723397e-05, "loss": 1.3226, "step": 317 }, { "epoch": 0.45, "learning_rate": 6.077547038944058e-05, "loss": 1.3328, "step": 318 }, { "epoch": 0.45, "learning_rate": 6.0552085520227875e-05, "loss": 1.2803, "step": 319 }, { "epoch": 0.45, "learning_rate": 6.032847999091206e-05, "loss": 1.3747, "step": 320 }, { "epoch": 0.45, "learning_rate": 6.010465847742368e-05, "loss": 1.2863, "step": 321 }, { "epoch": 0.45, "learning_rate": 5.988062566020987e-05, "loss": 1.3153, "step": 322 }, { "epoch": 0.46, "learning_rate": 5.9656386224136426e-05, "loss": 1.2917, "step": 323 }, { "epoch": 0.46, "learning_rate": 5.943194485838985e-05, "loss": 1.2688, "step": 324 }, { "epoch": 0.46, "learning_rate": 5.920730625637934e-05, "loss": 1.3114, "step": 325 }, { "epoch": 0.46, "learning_rate": 5.8982475115638515e-05, "loss": 1.3089, "step": 326 }, { "epoch": 0.46, "learning_rate": 5.875745613772736e-05, "loss": 1.3035, "step": 327 }, { "epoch": 0.46, "learning_rate": 5.85322540281338e-05, "loss": 1.3158, "step": 328 }, { "epoch": 0.46, "learning_rate": 5.830687349617529e-05, "loss": 1.2819, "step": 329 }, { "epoch": 0.47, "learning_rate": 5.808131925490039e-05, "loss": 1.3261, "step": 330 }, { "epoch": 0.47, "learning_rate": 5.7855596020990186e-05, "loss": 1.2925, "step": 331 }, { "epoch": 0.47, "learning_rate": 5.7629708514659655e-05, "loss": 1.2977, "step": 332 }, { "epoch": 0.47, "learning_rate": 5.740366145955893e-05, "loss": 1.2672, "step": 333 }, { "epoch": 0.47, "learning_rate": 5.7177459582674595e-05, "loss": 1.2812, "step": 334 }, { "epoch": 0.47, "learning_rate": 5.6951107614230783e-05, "loss": 1.343, "step": 335 }, { "epoch": 0.47, "learning_rate": 5.672461028759024e-05, "loss": 1.2741, "step": 336 }, { "epoch": 0.47, "learning_rate": 5.649797233915539e-05, "loss": 1.2426, "step": 337 }, { "epoch": 0.48, "learning_rate": 5.627119850826931e-05, "loss": 1.2673, "step": 338 }, { "epoch": 0.48, "learning_rate": 5.6044293537116496e-05, "loss": 1.2921, "step": 339 }, { "epoch": 0.48, "learning_rate": 5.5817262170623865e-05, "loss": 1.3118, "step": 340 }, { "epoch": 0.48, "learning_rate": 5.559010915636143e-05, "loss": 1.2766, "step": 341 }, { "epoch": 0.48, "learning_rate": 5.5362839244443034e-05, "loss": 1.311, "step": 342 }, { "epoch": 0.48, "learning_rate": 5.513545718742702e-05, "loss": 1.327, "step": 343 }, { "epoch": 0.48, "learning_rate": 5.490796774021687e-05, "loss": 1.3586, "step": 344 }, { "epoch": 0.49, "learning_rate": 5.468037565996177e-05, "loss": 1.3274, "step": 345 }, { "epoch": 0.49, "learning_rate": 5.445268570595708e-05, "loss": 1.2875, "step": 346 }, { "epoch": 0.49, "learning_rate": 5.42249026395449e-05, "loss": 1.2926, "step": 347 }, { "epoch": 0.49, "learning_rate": 5.399703122401441e-05, "loss": 1.3126, "step": 348 }, { "epoch": 0.49, "learning_rate": 5.376907622450229e-05, "loss": 1.3025, "step": 349 }, { "epoch": 0.49, "learning_rate": 5.3541042407893164e-05, "loss": 1.3226, "step": 350 }, { "epoch": 0.49, "learning_rate": 5.331293454271974e-05, "loss": 1.3268, "step": 351 }, { "epoch": 0.5, "learning_rate": 5.308475739906329e-05, "loss": 1.328, "step": 352 }, { "epoch": 0.5, "learning_rate": 5.285651574845374e-05, "loss": 1.3183, "step": 353 }, { "epoch": 0.5, "learning_rate": 5.262821436376998e-05, "loss": 1.2694, "step": 354 }, { "epoch": 0.5, "learning_rate": 5.239985801914e-05, "loss": 1.3136, "step": 355 }, { "epoch": 0.5, "learning_rate": 5.217145148984114e-05, "loss": 1.2293, "step": 356 }, { "epoch": 0.5, "learning_rate": 5.1942999552200136e-05, "loss": 1.2772, "step": 357 }, { "epoch": 0.5, "learning_rate": 5.171450698349329e-05, "loss": 1.3034, "step": 358 }, { "epoch": 0.51, "learning_rate": 5.148597856184656e-05, "loss": 1.3186, "step": 359 }, { "epoch": 0.51, "learning_rate": 5.125741906613565e-05, "loss": 1.2917, "step": 360 }, { "epoch": 0.51, "learning_rate": 5.102883327588608e-05, "loss": 1.2597, "step": 361 }, { "epoch": 0.51, "learning_rate": 5.080022597117318e-05, "loss": 1.2747, "step": 362 }, { "epoch": 0.51, "learning_rate": 5.057160193252225e-05, "loss": 1.3008, "step": 363 }, { "epoch": 0.51, "learning_rate": 5.0342965940808486e-05, "loss": 1.3098, "step": 364 }, { "epoch": 0.51, "learning_rate": 5.011432277715702e-05, "loss": 1.2622, "step": 365 }, { "epoch": 0.52, "learning_rate": 4.9885677222842984e-05, "loss": 1.2856, "step": 366 }, { "epoch": 0.52, "learning_rate": 4.965703405919154e-05, "loss": 1.2775, "step": 367 }, { "epoch": 0.52, "learning_rate": 4.942839806747775e-05, "loss": 1.2891, "step": 368 }, { "epoch": 0.52, "learning_rate": 4.919977402882682e-05, "loss": 1.3406, "step": 369 }, { "epoch": 0.52, "learning_rate": 4.897116672411395e-05, "loss": 1.302, "step": 370 }, { "epoch": 0.52, "learning_rate": 4.8742580933864356e-05, "loss": 1.3238, "step": 371 }, { "epoch": 0.52, "learning_rate": 4.851402143815345e-05, "loss": 1.2748, "step": 372 }, { "epoch": 0.53, "learning_rate": 4.828549301650673e-05, "loss": 1.2535, "step": 373 }, { "epoch": 0.53, "learning_rate": 4.8057000447799876e-05, "loss": 1.26, "step": 374 }, { "epoch": 0.53, "learning_rate": 4.782854851015886e-05, "loss": 1.2667, "step": 375 }, { "epoch": 0.53, "learning_rate": 4.760014198086002e-05, "loss": 1.2761, "step": 376 }, { "epoch": 0.53, "learning_rate": 4.737178563623004e-05, "loss": 1.2922, "step": 377 }, { "epoch": 0.53, "learning_rate": 4.714348425154627e-05, "loss": 1.2724, "step": 378 }, { "epoch": 0.53, "learning_rate": 4.691524260093672e-05, "loss": 1.2786, "step": 379 }, { "epoch": 0.54, "learning_rate": 4.668706545728026e-05, "loss": 1.3129, "step": 380 }, { "epoch": 0.54, "learning_rate": 4.6458957592106855e-05, "loss": 1.2888, "step": 381 }, { "epoch": 0.54, "learning_rate": 4.623092377549772e-05, "loss": 1.2741, "step": 382 }, { "epoch": 0.54, "learning_rate": 4.600296877598561e-05, "loss": 1.2683, "step": 383 }, { "epoch": 0.54, "learning_rate": 4.577509736045511e-05, "loss": 1.2782, "step": 384 }, { "epoch": 0.54, "learning_rate": 4.554731429404293e-05, "loss": 1.2592, "step": 385 }, { "epoch": 0.54, "learning_rate": 4.5319624340038244e-05, "loss": 1.3419, "step": 386 }, { "epoch": 0.55, "learning_rate": 4.509203225978314e-05, "loss": 1.2586, "step": 387 }, { "epoch": 0.55, "learning_rate": 4.486454281257299e-05, "loss": 1.3275, "step": 388 }, { "epoch": 0.55, "learning_rate": 4.4637160755557e-05, "loss": 1.2152, "step": 389 }, { "epoch": 0.55, "learning_rate": 4.4409890843638584e-05, "loss": 1.3169, "step": 390 }, { "epoch": 0.55, "learning_rate": 4.418273782937613e-05, "loss": 1.2855, "step": 391 }, { "epoch": 0.55, "learning_rate": 4.395570646288352e-05, "loss": 1.2351, "step": 392 }, { "epoch": 0.55, "learning_rate": 4.372880149173071e-05, "loss": 1.3066, "step": 393 }, { "epoch": 0.56, "learning_rate": 4.35020276608446e-05, "loss": 1.2788, "step": 394 }, { "epoch": 0.56, "learning_rate": 4.327538971240978e-05, "loss": 1.3019, "step": 395 }, { "epoch": 0.56, "learning_rate": 4.304889238576922e-05, "loss": 1.2754, "step": 396 }, { "epoch": 0.56, "learning_rate": 4.2822540417325396e-05, "loss": 1.2644, "step": 397 }, { "epoch": 0.56, "learning_rate": 4.2596338540441086e-05, "loss": 1.2097, "step": 398 }, { "epoch": 0.56, "learning_rate": 4.237029148534036e-05, "loss": 1.2849, "step": 399 }, { "epoch": 0.56, "learning_rate": 4.2144403979009826e-05, "loss": 1.2622, "step": 400 }, { "epoch": 0.57, "learning_rate": 4.1918680745099614e-05, "loss": 1.2837, "step": 401 }, { "epoch": 0.57, "learning_rate": 4.169312650382471e-05, "loss": 1.2705, "step": 402 }, { "epoch": 0.57, "learning_rate": 4.1467745971866216e-05, "loss": 1.274, "step": 403 }, { "epoch": 0.57, "learning_rate": 4.124254386227264e-05, "loss": 1.2877, "step": 404 }, { "epoch": 0.57, "learning_rate": 4.101752488436149e-05, "loss": 1.2715, "step": 405 }, { "epoch": 0.57, "learning_rate": 4.0792693743620695e-05, "loss": 1.2333, "step": 406 }, { "epoch": 0.57, "learning_rate": 4.056805514161015e-05, "loss": 1.3097, "step": 407 }, { "epoch": 0.58, "learning_rate": 4.034361377586357e-05, "loss": 1.3334, "step": 408 }, { "epoch": 0.58, "learning_rate": 4.0119374339790136e-05, "loss": 1.2569, "step": 409 }, { "epoch": 0.58, "learning_rate": 3.989534152257632e-05, "loss": 1.2795, "step": 410 }, { "epoch": 0.58, "learning_rate": 3.967152000908796e-05, "loss": 1.3314, "step": 411 }, { "epoch": 0.58, "learning_rate": 3.944791447977214e-05, "loss": 1.3027, "step": 412 }, { "epoch": 0.58, "learning_rate": 3.922452961055941e-05, "loss": 1.2614, "step": 413 }, { "epoch": 0.58, "learning_rate": 3.900137007276605e-05, "loss": 1.2581, "step": 414 }, { "epoch": 0.58, "learning_rate": 3.8778440532996204e-05, "loss": 1.2899, "step": 415 }, { "epoch": 0.59, "learning_rate": 3.855574565304448e-05, "loss": 1.2968, "step": 416 }, { "epoch": 0.59, "learning_rate": 3.8333290089798415e-05, "loss": 1.2594, "step": 417 }, { "epoch": 0.59, "learning_rate": 3.811107849514098e-05, "loss": 1.2745, "step": 418 }, { "epoch": 0.59, "learning_rate": 3.788911551585348e-05, "loss": 1.2817, "step": 419 }, { "epoch": 0.59, "learning_rate": 3.7667405793518264e-05, "loss": 1.2884, "step": 420 }, { "epoch": 0.59, "learning_rate": 3.744595396442169e-05, "loss": 1.2748, "step": 421 }, { "epoch": 0.59, "learning_rate": 3.722476465945718e-05, "loss": 1.309, "step": 422 }, { "epoch": 0.6, "learning_rate": 3.7003842504028366e-05, "loss": 1.3223, "step": 423 }, { "epoch": 0.6, "learning_rate": 3.678319211795242e-05, "loss": 1.3418, "step": 424 }, { "epoch": 0.6, "learning_rate": 3.656281811536337e-05, "loss": 1.274, "step": 425 }, { "epoch": 0.6, "learning_rate": 3.634272510461564e-05, "loss": 1.2765, "step": 426 }, { "epoch": 0.6, "learning_rate": 3.612291768818772e-05, "loss": 1.2968, "step": 427 }, { "epoch": 0.6, "learning_rate": 3.590340046258586e-05, "loss": 1.2538, "step": 428 }, { "epoch": 0.6, "learning_rate": 3.5684178018247996e-05, "loss": 1.3016, "step": 429 }, { "epoch": 0.61, "learning_rate": 3.546525493944773e-05, "loss": 1.287, "step": 430 }, { "epoch": 0.61, "learning_rate": 3.52466358041985e-05, "loss": 1.2595, "step": 431 }, { "epoch": 0.61, "learning_rate": 3.502832518415778e-05, "loss": 1.2606, "step": 432 }, { "epoch": 0.61, "learning_rate": 3.48103276445316e-05, "loss": 1.2743, "step": 433 }, { "epoch": 0.61, "learning_rate": 3.459264774397891e-05, "loss": 1.2596, "step": 434 }, { "epoch": 0.61, "learning_rate": 3.437529003451639e-05, "loss": 1.2515, "step": 435 }, { "epoch": 0.61, "learning_rate": 3.415825906142326e-05, "loss": 1.2812, "step": 436 }, { "epoch": 0.62, "learning_rate": 3.394155936314609e-05, "loss": 1.3057, "step": 437 }, { "epoch": 0.62, "learning_rate": 3.372519547120407e-05, "loss": 1.2646, "step": 438 }, { "epoch": 0.62, "learning_rate": 3.350917191009416e-05, "loss": 1.2425, "step": 439 }, { "epoch": 0.62, "learning_rate": 3.329349319719644e-05, "loss": 1.2911, "step": 440 }, { "epoch": 0.62, "learning_rate": 3.307816384267975e-05, "loss": 1.2944, "step": 441 }, { "epoch": 0.62, "learning_rate": 3.286318834940729e-05, "loss": 1.2845, "step": 442 }, { "epoch": 0.62, "learning_rate": 3.264857121284246e-05, "loss": 1.2854, "step": 443 }, { "epoch": 0.63, "learning_rate": 3.2434316920954935e-05, "loss": 1.2476, "step": 444 }, { "epoch": 0.63, "learning_rate": 3.222042995412669e-05, "loss": 1.3087, "step": 445 }, { "epoch": 0.63, "learning_rate": 3.200691478505843e-05, "loss": 1.2395, "step": 446 }, { "epoch": 0.63, "learning_rate": 3.1793775878676e-05, "loss": 1.2305, "step": 447 }, { "epoch": 0.63, "learning_rate": 3.1581017692036985e-05, "loss": 1.2651, "step": 448 }, { "epoch": 0.63, "learning_rate": 3.136864467423758e-05, "loss": 1.2375, "step": 449 }, { "epoch": 0.63, "learning_rate": 3.115666126631952e-05, "loss": 1.2829, "step": 450 }, { "epoch": 0.64, "learning_rate": 3.094507190117715e-05, "loss": 1.3191, "step": 451 }, { "epoch": 0.64, "learning_rate": 3.073388100346484e-05, "loss": 1.2975, "step": 452 }, { "epoch": 0.64, "learning_rate": 3.0523092989504415e-05, "loss": 1.2755, "step": 453 }, { "epoch": 0.64, "learning_rate": 3.0312712267192713e-05, "loss": 1.2873, "step": 454 }, { "epoch": 0.64, "learning_rate": 3.010274323590956e-05, "loss": 1.2662, "step": 455 }, { "epoch": 0.64, "learning_rate": 2.989319028642567e-05, "loss": 1.3002, "step": 456 }, { "epoch": 0.64, "learning_rate": 2.9684057800810845e-05, "loss": 1.2369, "step": 457 }, { "epoch": 0.65, "learning_rate": 2.9475350152342378e-05, "loss": 1.2125, "step": 458 }, { "epoch": 0.65, "learning_rate": 2.9267071705413552e-05, "loss": 1.296, "step": 459 }, { "epoch": 0.65, "learning_rate": 2.9059226815442385e-05, "loss": 1.2878, "step": 460 }, { "epoch": 0.65, "learning_rate": 2.8851819828780623e-05, "loss": 1.308, "step": 461 }, { "epoch": 0.65, "learning_rate": 2.8644855082622695e-05, "loss": 1.2083, "step": 462 }, { "epoch": 0.65, "learning_rate": 2.8438336904915185e-05, "loss": 1.2431, "step": 463 }, { "epoch": 0.65, "learning_rate": 2.823226961426625e-05, "loss": 1.2315, "step": 464 }, { "epoch": 0.66, "learning_rate": 2.802665751985525e-05, "loss": 1.3051, "step": 465 }, { "epoch": 0.66, "learning_rate": 2.7821504921342777e-05, "loss": 1.2527, "step": 466 }, { "epoch": 0.66, "learning_rate": 2.7616816108780623e-05, "loss": 1.295, "step": 467 }, { "epoch": 0.66, "learning_rate": 2.741259536252213e-05, "loss": 1.2856, "step": 468 }, { "epoch": 0.66, "learning_rate": 2.7208846953132682e-05, "loss": 1.28, "step": 469 }, { "epoch": 0.66, "learning_rate": 2.700557514130032e-05, "loss": 1.2658, "step": 470 }, { "epoch": 0.66, "learning_rate": 2.6802784177746777e-05, "loss": 1.2511, "step": 471 }, { "epoch": 0.67, "learning_rate": 2.6600478303138503e-05, "loss": 1.3012, "step": 472 }, { "epoch": 0.67, "learning_rate": 2.6398661747997955e-05, "loss": 1.2492, "step": 473 }, { "epoch": 0.67, "learning_rate": 2.619733873261524e-05, "loss": 1.2535, "step": 474 }, { "epoch": 0.67, "learning_rate": 2.5996513466959794e-05, "loss": 1.261, "step": 475 }, { "epoch": 0.67, "learning_rate": 2.579619015059229e-05, "loss": 1.2847, "step": 476 }, { "epoch": 0.67, "learning_rate": 2.5596372972576967e-05, "loss": 1.2955, "step": 477 }, { "epoch": 0.67, "learning_rate": 2.5397066111393853e-05, "loss": 1.2465, "step": 478 }, { "epoch": 0.68, "learning_rate": 2.5198273734851553e-05, "loss": 1.2523, "step": 479 }, { "epoch": 0.68, "learning_rate": 2.500000000000001e-05, "loss": 1.3038, "step": 480 }, { "epoch": 0.68, "learning_rate": 2.4802249053043526e-05, "loss": 1.3138, "step": 481 }, { "epoch": 0.68, "learning_rate": 2.4605025029254164e-05, "loss": 1.2282, "step": 482 }, { "epoch": 0.68, "learning_rate": 2.4408332052885246e-05, "loss": 1.2731, "step": 483 }, { "epoch": 0.68, "learning_rate": 2.4212174237085007e-05, "loss": 1.3086, "step": 484 }, { "epoch": 0.68, "learning_rate": 2.401655568381074e-05, "loss": 1.3227, "step": 485 }, { "epoch": 0.68, "learning_rate": 2.382148048374292e-05, "loss": 1.2396, "step": 486 }, { "epoch": 0.69, "learning_rate": 2.3626952716199647e-05, "loss": 1.2487, "step": 487 }, { "epoch": 0.69, "learning_rate": 2.3432976449051442e-05, "loss": 1.2149, "step": 488 }, { "epoch": 0.69, "learning_rate": 2.3239555738636044e-05, "loss": 1.2986, "step": 489 }, { "epoch": 0.69, "learning_rate": 2.3046694629673716e-05, "loss": 1.2814, "step": 490 }, { "epoch": 0.69, "learning_rate": 2.28543971551826e-05, "loss": 1.2465, "step": 491 }, { "epoch": 0.69, "learning_rate": 2.266266733639434e-05, "loss": 1.307, "step": 492 }, { "epoch": 0.69, "learning_rate": 2.247150918267008e-05, "loss": 1.2532, "step": 493 }, { "epoch": 0.7, "learning_rate": 2.2280926691416603e-05, "loss": 1.261, "step": 494 }, { "epoch": 0.7, "learning_rate": 2.209092384800265e-05, "loss": 1.2771, "step": 495 }, { "epoch": 0.7, "learning_rate": 2.190150462567569e-05, "loss": 1.2589, "step": 496 }, { "epoch": 0.7, "learning_rate": 2.1712672985478815e-05, "loss": 1.278, "step": 497 }, { "epoch": 0.7, "learning_rate": 2.1524432876167812e-05, "loss": 1.2595, "step": 498 }, { "epoch": 0.7, "learning_rate": 2.133678823412873e-05, "loss": 1.299, "step": 499 }, { "epoch": 0.7, "learning_rate": 2.1149742983295446e-05, "loss": 1.3015, "step": 500 }, { "epoch": 0.71, "learning_rate": 2.0963301035067685e-05, "loss": 1.2777, "step": 501 }, { "epoch": 0.71, "learning_rate": 2.0777466288229207e-05, "loss": 1.2536, "step": 502 }, { "epoch": 0.71, "learning_rate": 2.0592242628866236e-05, "loss": 1.2498, "step": 503 }, { "epoch": 0.71, "learning_rate": 2.040763393028627e-05, "loss": 1.2639, "step": 504 }, { "epoch": 0.71, "learning_rate": 2.022364405293703e-05, "loss": 1.2522, "step": 505 }, { "epoch": 0.71, "learning_rate": 2.0040276844325718e-05, "loss": 1.263, "step": 506 }, { "epoch": 0.71, "learning_rate": 1.9857536138938627e-05, "loss": 1.2423, "step": 507 }, { "epoch": 0.72, "learning_rate": 1.9675425758160925e-05, "loss": 1.2513, "step": 508 }, { "epoch": 0.72, "learning_rate": 1.949394951019669e-05, "loss": 1.2495, "step": 509 }, { "epoch": 0.72, "learning_rate": 1.9313111189989375e-05, "loss": 1.2178, "step": 510 }, { "epoch": 0.72, "learning_rate": 1.913291457914234e-05, "loss": 1.2509, "step": 511 }, { "epoch": 0.72, "learning_rate": 1.8953363445839877e-05, "loss": 1.2768, "step": 512 }, { "epoch": 0.72, "learning_rate": 1.8774461544768347e-05, "loss": 1.2517, "step": 513 }, { "epoch": 0.72, "learning_rate": 1.8596212617037694e-05, "loss": 1.2702, "step": 514 }, { "epoch": 0.73, "learning_rate": 1.8418620390103163e-05, "loss": 1.2566, "step": 515 }, { "epoch": 0.73, "learning_rate": 1.8241688577687426e-05, "loss": 1.2464, "step": 516 }, { "epoch": 0.73, "learning_rate": 1.806542087970289e-05, "loss": 1.2356, "step": 517 }, { "epoch": 0.73, "learning_rate": 1.788982098217427e-05, "loss": 1.2719, "step": 518 }, { "epoch": 0.73, "learning_rate": 1.7714892557161624e-05, "loss": 1.2667, "step": 519 }, { "epoch": 0.73, "learning_rate": 1.754063926268349e-05, "loss": 1.2486, "step": 520 }, { "epoch": 0.73, "learning_rate": 1.7367064742640348e-05, "loss": 1.2685, "step": 521 }, { "epoch": 0.74, "learning_rate": 1.719417262673854e-05, "loss": 1.3077, "step": 522 }, { "epoch": 0.74, "learning_rate": 1.7021966530414303e-05, "loss": 1.2647, "step": 523 }, { "epoch": 0.74, "learning_rate": 1.6850450054758092e-05, "loss": 1.2924, "step": 524 }, { "epoch": 0.74, "learning_rate": 1.667962678643943e-05, "loss": 1.2512, "step": 525 }, { "epoch": 0.74, "learning_rate": 1.6509500297631787e-05, "loss": 1.2396, "step": 526 }, { "epoch": 0.74, "learning_rate": 1.6340074145937934e-05, "loss": 1.2432, "step": 527 }, { "epoch": 0.74, "learning_rate": 1.6171351874315494e-05, "loss": 1.2462, "step": 528 }, { "epoch": 0.75, "learning_rate": 1.600333701100293e-05, "loss": 1.2697, "step": 529 }, { "epoch": 0.75, "learning_rate": 1.583603306944572e-05, "loss": 1.2251, "step": 530 }, { "epoch": 0.75, "learning_rate": 1.566944354822286e-05, "loss": 1.2481, "step": 531 }, { "epoch": 0.75, "learning_rate": 1.5503571930973786e-05, "loss": 1.2048, "step": 532 }, { "epoch": 0.75, "learning_rate": 1.533842168632541e-05, "loss": 1.277, "step": 533 }, { "epoch": 0.75, "learning_rate": 1.5173996267819695e-05, "loss": 1.2525, "step": 534 }, { "epoch": 0.75, "learning_rate": 1.5010299113841397e-05, "loss": 1.2449, "step": 535 }, { "epoch": 0.76, "learning_rate": 1.4847333647546113e-05, "loss": 1.2571, "step": 536 }, { "epoch": 0.76, "learning_rate": 1.468510327678877e-05, "loss": 1.2618, "step": 537 }, { "epoch": 0.76, "learning_rate": 1.4523611394052356e-05, "loss": 1.2877, "step": 538 }, { "epoch": 0.76, "learning_rate": 1.4362861376376896e-05, "loss": 1.2563, "step": 539 }, { "epoch": 0.76, "learning_rate": 1.4202856585288954e-05, "loss": 1.3038, "step": 540 }, { "epoch": 0.76, "learning_rate": 1.4043600366731213e-05, "loss": 1.2746, "step": 541 }, { "epoch": 0.76, "learning_rate": 1.3885096050992624e-05, "loss": 1.2297, "step": 542 }, { "epoch": 0.77, "learning_rate": 1.3727346952638703e-05, "loss": 1.2217, "step": 543 }, { "epoch": 0.77, "learning_rate": 1.3570356370442188e-05, "loss": 1.2506, "step": 544 }, { "epoch": 0.77, "learning_rate": 1.341412758731413e-05, "loss": 1.2465, "step": 545 }, { "epoch": 0.77, "learning_rate": 1.325866387023521e-05, "loss": 1.2689, "step": 546 }, { "epoch": 0.77, "learning_rate": 1.3103968470187384e-05, "loss": 1.235, "step": 547 }, { "epoch": 0.77, "learning_rate": 1.2950044622085955e-05, "loss": 1.2777, "step": 548 }, { "epoch": 0.77, "learning_rate": 1.2796895544711929e-05, "loss": 1.2638, "step": 549 }, { "epoch": 0.78, "learning_rate": 1.2644524440644628e-05, "loss": 1.2612, "step": 550 }, { "epoch": 0.78, "learning_rate": 1.249293449619483e-05, "loss": 1.269, "step": 551 }, { "epoch": 0.78, "learning_rate": 1.2342128881338027e-05, "loss": 1.2453, "step": 552 }, { "epoch": 0.78, "learning_rate": 1.2192110749648233e-05, "loss": 1.2294, "step": 553 }, { "epoch": 0.78, "learning_rate": 1.2042883238231984e-05, "loss": 1.2449, "step": 554 }, { "epoch": 0.78, "learning_rate": 1.1894449467662728e-05, "loss": 1.2523, "step": 555 }, { "epoch": 0.78, "learning_rate": 1.1746812541915608e-05, "loss": 1.2162, "step": 556 }, { "epoch": 0.79, "learning_rate": 1.1599975548302549e-05, "loss": 1.2533, "step": 557 }, { "epoch": 0.79, "learning_rate": 1.1453941557407638e-05, "loss": 1.2307, "step": 558 }, { "epoch": 0.79, "learning_rate": 1.1308713623022987e-05, "loss": 1.2713, "step": 559 }, { "epoch": 0.79, "learning_rate": 1.1164294782084866e-05, "loss": 1.2636, "step": 560 }, { "epoch": 0.79, "learning_rate": 1.1020688054610118e-05, "loss": 1.2408, "step": 561 }, { "epoch": 0.79, "learning_rate": 1.0877896443633117e-05, "loss": 1.2865, "step": 562 }, { "epoch": 0.79, "learning_rate": 1.0735922935142873e-05, "loss": 1.2493, "step": 563 }, { "epoch": 0.79, "learning_rate": 1.0594770498020657e-05, "loss": 1.2406, "step": 564 }, { "epoch": 0.8, "learning_rate": 1.0454442083977912e-05, "loss": 1.263, "step": 565 }, { "epoch": 0.8, "learning_rate": 1.0314940627494451e-05, "loss": 1.2701, "step": 566 }, { "epoch": 0.8, "learning_rate": 1.0176269045757202e-05, "loss": 1.2196, "step": 567 }, { "epoch": 0.8, "learning_rate": 1.0038430238599156e-05, "loss": 1.2103, "step": 568 }, { "epoch": 0.8, "learning_rate": 9.901427088438675e-06, "loss": 1.2255, "step": 569 }, { "epoch": 0.8, "learning_rate": 9.76526246021931e-06, "loss": 1.2491, "step": 570 }, { "epoch": 0.8, "learning_rate": 9.629939201349853e-06, "loss": 1.2494, "step": 571 }, { "epoch": 0.81, "learning_rate": 9.49546014164474e-06, "loss": 1.2868, "step": 572 }, { "epoch": 0.81, "learning_rate": 9.361828093264984e-06, "loss": 1.2383, "step": 573 }, { "epoch": 0.81, "learning_rate": 9.229045850659252e-06, "loss": 1.2227, "step": 574 }, { "epoch": 0.81, "learning_rate": 9.097116190505516e-06, "loss": 1.2423, "step": 575 }, { "epoch": 0.81, "learning_rate": 8.966041871652969e-06, "loss": 1.233, "step": 576 }, { "epoch": 0.81, "learning_rate": 8.835825635064266e-06, "loss": 1.2144, "step": 577 }, { "epoch": 0.81, "learning_rate": 8.706470203758316e-06, "loss": 1.2529, "step": 578 }, { "epoch": 0.82, "learning_rate": 8.577978282753274e-06, "loss": 1.2622, "step": 579 }, { "epoch": 0.82, "learning_rate": 8.45035255900995e-06, "loss": 1.2398, "step": 580 }, { "epoch": 0.82, "learning_rate": 8.323595701375702e-06, "loss": 1.2622, "step": 581 }, { "epoch": 0.82, "learning_rate": 8.197710360528571e-06, "loss": 1.2585, "step": 582 }, { "epoch": 0.82, "learning_rate": 8.072699168921826e-06, "loss": 1.2459, "step": 583 }, { "epoch": 0.82, "learning_rate": 7.948564740728998e-06, "loss": 1.2625, "step": 584 }, { "epoch": 0.82, "learning_rate": 7.825309671789128e-06, "loss": 1.2628, "step": 585 }, { "epoch": 0.83, "learning_rate": 7.70293653955254e-06, "loss": 1.2116, "step": 586 }, { "epoch": 0.83, "learning_rate": 7.581447903026939e-06, "loss": 1.2787, "step": 587 }, { "epoch": 0.83, "learning_rate": 7.460846302723845e-06, "loss": 1.2558, "step": 588 }, { "epoch": 0.83, "learning_rate": 7.341134260605536e-06, "loss": 1.2558, "step": 589 }, { "epoch": 0.83, "learning_rate": 7.2223142800322775e-06, "loss": 1.2198, "step": 590 }, { "epoch": 0.83, "learning_rate": 7.104388845709981e-06, "loss": 1.2856, "step": 591 }, { "epoch": 0.83, "learning_rate": 6.987360423638206e-06, "loss": 1.1944, "step": 592 }, { "epoch": 0.84, "learning_rate": 6.871231461058658e-06, "loss": 1.2197, "step": 593 }, { "epoch": 0.84, "learning_rate": 6.756004386403996e-06, "loss": 1.2229, "step": 594 }, { "epoch": 0.84, "learning_rate": 6.64168160924698e-06, "loss": 1.2093, "step": 595 }, { "epoch": 0.84, "learning_rate": 6.528265520250182e-06, "loss": 1.2403, "step": 596 }, { "epoch": 0.84, "learning_rate": 6.415758491115953e-06, "loss": 1.2749, "step": 597 }, { "epoch": 0.84, "learning_rate": 6.304162874536796e-06, "loss": 1.2668, "step": 598 }, { "epoch": 0.84, "learning_rate": 6.1934810041462066e-06, "loss": 1.2744, "step": 599 }, { "epoch": 0.85, "learning_rate": 6.08371519446988e-06, "loss": 1.2197, "step": 600 }, { "epoch": 0.85, "learning_rate": 5.974867740877283e-06, "loss": 1.248, "step": 601 }, { "epoch": 0.85, "learning_rate": 5.866940919533642e-06, "loss": 1.227, "step": 602 }, { "epoch": 0.85, "learning_rate": 5.759936987352399e-06, "loss": 1.2888, "step": 603 }, { "epoch": 0.85, "learning_rate": 5.65385818194798e-06, "loss": 1.2776, "step": 604 }, { "epoch": 0.85, "learning_rate": 5.548706721588986e-06, "loss": 1.2825, "step": 605 }, { "epoch": 0.85, "learning_rate": 5.444484805151856e-06, "loss": 1.2633, "step": 606 }, { "epoch": 0.86, "learning_rate": 5.341194612074824e-06, "loss": 1.2443, "step": 607 }, { "epoch": 0.86, "learning_rate": 5.23883830231241e-06, "loss": 1.247, "step": 608 }, { "epoch": 0.86, "learning_rate": 5.137418016290207e-06, "loss": 1.2748, "step": 609 }, { "epoch": 0.86, "learning_rate": 5.036935874860111e-06, "loss": 1.2337, "step": 610 }, { "epoch": 0.86, "learning_rate": 4.937393979256016e-06, "loss": 1.2413, "step": 611 }, { "epoch": 0.86, "learning_rate": 4.8387944110498685e-06, "loss": 1.2295, "step": 612 }, { "epoch": 0.86, "learning_rate": 4.7411392321080605e-06, "loss": 1.2466, "step": 613 }, { "epoch": 0.87, "learning_rate": 4.644430484548428e-06, "loss": 1.247, "step": 614 }, { "epoch": 0.87, "learning_rate": 4.548670190697485e-06, "loss": 1.2306, "step": 615 }, { "epoch": 0.87, "learning_rate": 4.453860353048112e-06, "loss": 1.2751, "step": 616 }, { "epoch": 0.87, "learning_rate": 4.360002954217734e-06, "loss": 1.2279, "step": 617 }, { "epoch": 0.87, "learning_rate": 4.267099956906828e-06, "loss": 1.2359, "step": 618 }, { "epoch": 0.87, "learning_rate": 4.175153303857887e-06, "loss": 1.2383, "step": 619 }, { "epoch": 0.87, "learning_rate": 4.084164917814815e-06, "loss": 1.2658, "step": 620 }, { "epoch": 0.88, "learning_rate": 3.994136701482659e-06, "loss": 1.2115, "step": 621 }, { "epoch": 0.88, "learning_rate": 3.905070537487909e-06, "loss": 1.2615, "step": 622 }, { "epoch": 0.88, "learning_rate": 3.8169682883390565e-06, "loss": 1.1924, "step": 623 }, { "epoch": 0.88, "learning_rate": 3.729831796387667e-06, "loss": 1.2593, "step": 624 }, { "epoch": 0.88, "learning_rate": 3.643662883789878e-06, "loss": 1.2781, "step": 625 }, { "epoch": 0.88, "learning_rate": 3.558463352468272e-06, "loss": 1.1864, "step": 626 }, { "epoch": 0.88, "learning_rate": 3.474234984074182e-06, "loss": 1.2902, "step": 627 }, { "epoch": 0.89, "learning_rate": 3.3909795399504783e-06, "loss": 1.2285, "step": 628 }, { "epoch": 0.89, "learning_rate": 3.3086987610946807e-06, "loss": 1.2597, "step": 629 }, { "epoch": 0.89, "learning_rate": 3.2273943681225992e-06, "loss": 1.2546, "step": 630 }, { "epoch": 0.89, "learning_rate": 3.14706806123235e-06, "loss": 1.2337, "step": 631 }, { "epoch": 0.89, "learning_rate": 3.067721520168748e-06, "loss": 1.214, "step": 632 }, { "epoch": 0.89, "learning_rate": 2.9893564041882484e-06, "loss": 1.2604, "step": 633 }, { "epoch": 0.89, "learning_rate": 2.9119743520242217e-06, "loss": 1.2261, "step": 634 }, { "epoch": 0.89, "learning_rate": 2.835576981852656e-06, "loss": 1.2825, "step": 635 }, { "epoch": 0.9, "learning_rate": 2.7601658912583763e-06, "loss": 1.2606, "step": 636 }, { "epoch": 0.9, "learning_rate": 2.685742657201601e-06, "loss": 1.2474, "step": 637 }, { "epoch": 0.9, "learning_rate": 2.61230883598495e-06, "loss": 1.2088, "step": 638 }, { "epoch": 0.9, "learning_rate": 2.5398659632209552e-06, "loss": 1.2612, "step": 639 }, { "epoch": 0.9, "learning_rate": 2.4684155537998743e-06, "loss": 1.2175, "step": 640 }, { "epoch": 0.9, "learning_rate": 2.397959101858083e-06, "loss": 1.2022, "step": 641 }, { "epoch": 0.9, "learning_rate": 2.3284980807467994e-06, "loss": 1.219, "step": 642 }, { "epoch": 0.91, "learning_rate": 2.2600339430012442e-06, "loss": 1.2495, "step": 643 }, { "epoch": 0.91, "learning_rate": 2.1925681203103287e-06, "loss": 1.2155, "step": 644 }, { "epoch": 0.91, "learning_rate": 2.12610202348667e-06, "loss": 1.2359, "step": 645 }, { "epoch": 0.91, "learning_rate": 2.060637042437097e-06, "loss": 1.2291, "step": 646 }, { "epoch": 0.91, "learning_rate": 1.9961745461335947e-06, "loss": 1.2033, "step": 647 }, { "epoch": 0.91, "learning_rate": 1.9327158825846848e-06, "loss": 1.2563, "step": 648 }, { "epoch": 0.91, "learning_rate": 1.8702623788072027e-06, "loss": 1.2112, "step": 649 }, { "epoch": 0.92, "learning_rate": 1.8088153407985809e-06, "loss": 1.2475, "step": 650 }, { "epoch": 0.92, "learning_rate": 1.7483760535095262e-06, "loss": 1.2246, "step": 651 }, { "epoch": 0.92, "learning_rate": 1.6889457808171472e-06, "loss": 1.2597, "step": 652 }, { "epoch": 0.92, "learning_rate": 1.6305257654985361e-06, "loss": 1.2363, "step": 653 }, { "epoch": 0.92, "learning_rate": 1.5731172292047625e-06, "loss": 1.2366, "step": 654 }, { "epoch": 0.92, "learning_rate": 1.5167213724353424e-06, "loss": 1.2134, "step": 655 }, { "epoch": 0.92, "learning_rate": 1.4613393745131321e-06, "loss": 1.2548, "step": 656 }, { "epoch": 0.93, "learning_rate": 1.4069723935596412e-06, "loss": 1.2399, "step": 657 }, { "epoch": 0.93, "learning_rate": 1.3536215664708586e-06, "loss": 1.242, "step": 658 }, { "epoch": 0.93, "learning_rate": 1.3012880088934532e-06, "loss": 1.2886, "step": 659 }, { "epoch": 0.93, "learning_rate": 1.2499728152014334e-06, "loss": 1.2667, "step": 660 }, { "epoch": 0.93, "learning_rate": 1.1996770584732919e-06, "loss": 1.28, "step": 661 }, { "epoch": 0.93, "learning_rate": 1.1504017904695296e-06, "loss": 1.2598, "step": 662 }, { "epoch": 0.93, "learning_rate": 1.1021480416106956e-06, "loss": 1.2444, "step": 663 }, { "epoch": 0.94, "learning_rate": 1.0549168209558312e-06, "loss": 1.2365, "step": 664 }, { "epoch": 0.94, "learning_rate": 1.008709116181339e-06, "loss": 1.235, "step": 665 }, { "epoch": 0.94, "learning_rate": 9.635258935603796e-07, "loss": 1.2456, "step": 666 }, { "epoch": 0.94, "learning_rate": 9.19368097942619e-07, "loss": 1.2448, "step": 667 }, { "epoch": 0.94, "learning_rate": 8.762366527345022e-07, "loss": 1.284, "step": 668 }, { "epoch": 0.94, "learning_rate": 8.341324598799216e-07, "loss": 1.2056, "step": 669 }, { "epoch": 0.94, "learning_rate": 7.930563998413798e-07, "loss": 1.175, "step": 670 }, { "epoch": 0.95, "learning_rate": 7.530093315815557e-07, "loss": 1.2085, "step": 671 }, { "epoch": 0.95, "learning_rate": 7.139920925453347e-07, "loss": 1.2548, "step": 672 }, { "epoch": 0.95, "learning_rate": 6.760054986423459e-07, "loss": 1.1877, "step": 673 }, { "epoch": 0.95, "learning_rate": 6.390503442298413e-07, "loss": 1.2495, "step": 674 }, { "epoch": 0.95, "learning_rate": 6.031274020961152e-07, "loss": 1.2678, "step": 675 }, { "epoch": 0.95, "learning_rate": 5.682374234443344e-07, "loss": 1.2285, "step": 676 }, { "epoch": 0.95, "learning_rate": 5.343811378768492e-07, "loss": 1.2401, "step": 677 }, { "epoch": 0.96, "learning_rate": 5.015592533799074e-07, "loss": 1.1871, "step": 678 }, { "epoch": 0.96, "learning_rate": 4.6977245630886455e-07, "loss": 1.2252, "step": 679 }, { "epoch": 0.96, "learning_rate": 4.3902141137382444e-07, "loss": 1.2074, "step": 680 }, { "epoch": 0.96, "learning_rate": 4.0930676162576063e-07, "loss": 1.2778, "step": 681 }, { "epoch": 0.96, "learning_rate": 3.8062912844302746e-07, "loss": 1.2601, "step": 682 }, { "epoch": 0.96, "learning_rate": 3.5298911151841475e-07, "loss": 1.239, "step": 683 }, { "epoch": 0.96, "learning_rate": 3.263872888465691e-07, "loss": 1.231, "step": 684 }, { "epoch": 0.97, "learning_rate": 3.0082421671192575e-07, "loss": 1.2737, "step": 685 }, { "epoch": 0.97, "learning_rate": 2.7630042967707327e-07, "loss": 1.29, "step": 686 }, { "epoch": 0.97, "learning_rate": 2.5281644057156826e-07, "loss": 1.248, "step": 687 }, { "epoch": 0.97, "learning_rate": 2.303727404812217e-07, "loss": 1.2008, "step": 688 }, { "epoch": 0.97, "learning_rate": 2.0896979873782918e-07, "loss": 1.1855, "step": 689 }, { "epoch": 0.97, "learning_rate": 1.8860806290932897e-07, "loss": 1.1995, "step": 690 }, { "epoch": 0.97, "learning_rate": 1.692879587904983e-07, "loss": 1.245, "step": 691 }, { "epoch": 0.98, "learning_rate": 1.5100989039399939e-07, "loss": 1.2523, "step": 692 }, { "epoch": 0.98, "learning_rate": 1.337742399419306e-07, "loss": 1.2624, "step": 693 }, { "epoch": 0.98, "learning_rate": 1.1758136785788854e-07, "loss": 1.2609, "step": 694 }, { "epoch": 0.98, "learning_rate": 1.0243161275936274e-07, "loss": 1.2369, "step": 695 }, { "epoch": 0.98, "learning_rate": 8.832529145070267e-08, "loss": 1.2945, "step": 696 }, { "epoch": 0.98, "learning_rate": 7.526269891646176e-08, "loss": 1.2309, "step": 697 }, { "epoch": 0.98, "learning_rate": 6.324410831525795e-08, "loss": 1.2671, "step": 698 }, { "epoch": 0.99, "learning_rate": 5.226977097403385e-08, "loss": 1.2309, "step": 699 }, { "epoch": 0.99, "learning_rate": 4.233991638281642e-08, "loss": 1.1994, "step": 700 }, { "epoch": 0.99, "learning_rate": 3.3454752189926444e-08, "loss": 1.234, "step": 701 }, { "epoch": 0.99, "learning_rate": 2.561446419760416e-08, "loss": 1.2064, "step": 702 }, { "epoch": 0.99, "learning_rate": 1.8819216358156864e-08, "loss": 1.1986, "step": 703 }, { "epoch": 0.99, "learning_rate": 1.3069150770528282e-08, "loss": 1.2388, "step": 704 }, { "epoch": 0.99, "learning_rate": 8.364387677295415e-09, "loss": 1.2003, "step": 705 }, { "epoch": 1.0, "learning_rate": 4.705025462187207e-09, "loss": 1.2338, "step": 706 }, { "epoch": 1.0, "learning_rate": 2.091140648013967e-09, "loss": 1.264, "step": 707 }, { "epoch": 1.0, "learning_rate": 5.22787895074206e-10, "loss": 1.2477, "step": 708 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.208, "step": 709 }, { "epoch": 1.0, "step": 709, "total_flos": 0.0, "train_loss": 0.7294200115714995, "train_runtime": 11040.3966, "train_samples_per_second": 7.201, "train_steps_per_second": 0.064 } ], "logging_steps": 1.0, "max_steps": 709, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }