{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998989184271707, "eval_steps": 500, "global_step": 4946, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.7114093959731546e-06, "loss": 6.5081, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3422818791946309e-05, "loss": 6.5167, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.013422818791946e-05, "loss": 6.4218, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.6845637583892618e-05, "loss": 5.3159, "step": 4 }, { "epoch": 0.0, "learning_rate": 3.3557046979865775e-05, "loss": 5.0078, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.026845637583892e-05, "loss": 4.4732, "step": 6 }, { "epoch": 0.0, "learning_rate": 4.697986577181208e-05, "loss": 3.6586, "step": 7 }, { "epoch": 0.0, "learning_rate": 5.3691275167785237e-05, "loss": 3.3308, "step": 8 }, { "epoch": 0.0, "learning_rate": 6.040268456375839e-05, "loss": 3.0749, "step": 9 }, { "epoch": 0.0, "learning_rate": 6.711409395973155e-05, "loss": 2.9314, "step": 10 }, { "epoch": 0.0, "learning_rate": 7.38255033557047e-05, "loss": 2.8257, "step": 11 }, { "epoch": 0.0, "learning_rate": 8.053691275167784e-05, "loss": 2.7712, "step": 12 }, { "epoch": 0.0, "learning_rate": 8.7248322147651e-05, "loss": 2.6402, "step": 13 }, { "epoch": 0.0, "learning_rate": 9.395973154362417e-05, "loss": 2.5781, "step": 14 }, { "epoch": 0.0, "learning_rate": 0.00010067114093959731, "loss": 2.4845, "step": 15 }, { "epoch": 0.0, "learning_rate": 0.00010738255033557047, "loss": 2.4082, "step": 16 }, { "epoch": 0.0, "learning_rate": 0.00011409395973154363, "loss": 2.3542, "step": 17 }, { "epoch": 0.0, "learning_rate": 0.00012080536912751678, "loss": 2.3931, "step": 18 }, { "epoch": 0.0, "learning_rate": 0.00012751677852348994, "loss": 2.3453, "step": 19 }, { "epoch": 0.0, "learning_rate": 0.0001342281879194631, "loss": 2.2771, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00014093959731543624, "loss": 2.2842, "step": 21 }, { "epoch": 0.0, "learning_rate": 0.0001476510067114094, "loss": 2.2795, "step": 22 }, { "epoch": 0.0, "learning_rate": 0.00015436241610738256, "loss": 2.2922, "step": 23 }, { "epoch": 0.0, "learning_rate": 0.0001610738255033557, "loss": 2.2773, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.00016778523489932888, "loss": 2.246, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.000174496644295302, "loss": 2.1612, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.00018120805369127517, "loss": 2.1943, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.00018791946308724833, "loss": 2.1769, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.00019463087248322146, "loss": 2.2003, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.00020134228187919463, "loss": 2.213, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00020805369127516779, "loss": 2.1629, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.00021476510067114095, "loss": 2.1677, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.0002214765100671141, "loss": 2.2235, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.00022818791946308727, "loss": 2.1486, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.0002348993288590604, "loss": 2.1535, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00024161073825503356, "loss": 2.111, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.0002483221476510067, "loss": 2.1341, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.0002550335570469799, "loss": 2.1604, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.000261744966442953, "loss": 2.1548, "step": 39 }, { "epoch": 0.01, "learning_rate": 0.0002684563758389262, "loss": 2.1062, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00027516778523489934, "loss": 2.1495, "step": 41 }, { "epoch": 0.01, "learning_rate": 0.00028187919463087247, "loss": 2.1688, "step": 42 }, { "epoch": 0.01, "learning_rate": 0.00028859060402684566, "loss": 2.0777, "step": 43 }, { "epoch": 0.01, "learning_rate": 0.0002953020134228188, "loss": 2.0693, "step": 44 }, { "epoch": 0.01, "learning_rate": 0.000302013422818792, "loss": 2.0645, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.0003087248322147651, "loss": 2.0279, "step": 46 }, { "epoch": 0.01, "learning_rate": 0.00031543624161073825, "loss": 2.1481, "step": 47 }, { "epoch": 0.01, "learning_rate": 0.0003221476510067114, "loss": 2.0765, "step": 48 }, { "epoch": 0.01, "learning_rate": 0.0003288590604026846, "loss": 2.0709, "step": 49 }, { "epoch": 0.01, "learning_rate": 0.00033557046979865775, "loss": 2.0834, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0003422818791946309, "loss": 2.0597, "step": 51 }, { "epoch": 0.01, "learning_rate": 0.000348993288590604, "loss": 2.0304, "step": 52 }, { "epoch": 0.01, "learning_rate": 0.00035570469798657715, "loss": 2.042, "step": 53 }, { "epoch": 0.01, "learning_rate": 0.00036241610738255034, "loss": 2.0856, "step": 54 }, { "epoch": 0.01, "learning_rate": 0.00036912751677852353, "loss": 2.1084, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.00037583892617449666, "loss": 2.0576, "step": 56 }, { "epoch": 0.01, "learning_rate": 0.0003825503355704698, "loss": 2.072, "step": 57 }, { "epoch": 0.01, "learning_rate": 0.00038926174496644293, "loss": 2.0896, "step": 58 }, { "epoch": 0.01, "learning_rate": 0.0003959731543624161, "loss": 2.0074, "step": 59 }, { "epoch": 0.01, "learning_rate": 0.00040268456375838925, "loss": 2.0437, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.00040939597315436244, "loss": 2.0408, "step": 61 }, { "epoch": 0.01, "learning_rate": 0.00041610738255033557, "loss": 2.0581, "step": 62 }, { "epoch": 0.01, "learning_rate": 0.00042281879194630876, "loss": 2.0622, "step": 63 }, { "epoch": 0.01, "learning_rate": 0.0004295302013422819, "loss": 2.0788, "step": 64 }, { "epoch": 0.01, "learning_rate": 0.000436241610738255, "loss": 2.0516, "step": 65 }, { "epoch": 0.01, "learning_rate": 0.0004429530201342282, "loss": 2.062, "step": 66 }, { "epoch": 0.01, "learning_rate": 0.00044966442953020135, "loss": 2.0968, "step": 67 }, { "epoch": 0.01, "learning_rate": 0.00045637583892617453, "loss": 2.0695, "step": 68 }, { "epoch": 0.01, "learning_rate": 0.00046308724832214767, "loss": 2.0023, "step": 69 }, { "epoch": 0.01, "learning_rate": 0.0004697986577181208, "loss": 2.0383, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.00047651006711409394, "loss": 2.0434, "step": 71 }, { "epoch": 0.01, "learning_rate": 0.0004832214765100671, "loss": 2.0139, "step": 72 }, { "epoch": 0.01, "learning_rate": 0.0004899328859060403, "loss": 2.065, "step": 73 }, { "epoch": 0.01, "learning_rate": 0.0004966442953020134, "loss": 2.0446, "step": 74 }, { "epoch": 0.02, "learning_rate": 0.0005033557046979866, "loss": 2.0108, "step": 75 }, { "epoch": 0.02, "learning_rate": 0.0005100671140939598, "loss": 2.0048, "step": 76 }, { "epoch": 0.02, "learning_rate": 0.0005167785234899329, "loss": 2.0606, "step": 77 }, { "epoch": 0.02, "learning_rate": 0.000523489932885906, "loss": 2.0048, "step": 78 }, { "epoch": 0.02, "learning_rate": 0.0005302013422818792, "loss": 2.0257, "step": 79 }, { "epoch": 0.02, "learning_rate": 0.0005369127516778524, "loss": 1.9665, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.0005436241610738255, "loss": 2.0459, "step": 81 }, { "epoch": 0.02, "learning_rate": 0.0005503355704697987, "loss": 2.024, "step": 82 }, { "epoch": 0.02, "learning_rate": 0.0005570469798657718, "loss": 2.0347, "step": 83 }, { "epoch": 0.02, "learning_rate": 0.0005637583892617449, "loss": 2.0128, "step": 84 }, { "epoch": 0.02, "learning_rate": 0.0005704697986577181, "loss": 1.9931, "step": 85 }, { "epoch": 0.02, "learning_rate": 0.0005771812080536913, "loss": 2.0367, "step": 86 }, { "epoch": 0.02, "learning_rate": 0.0005838926174496644, "loss": 2.035, "step": 87 }, { "epoch": 0.02, "learning_rate": 0.0005906040268456376, "loss": 2.0506, "step": 88 }, { "epoch": 0.02, "learning_rate": 0.0005973154362416108, "loss": 2.0223, "step": 89 }, { "epoch": 0.02, "learning_rate": 0.000604026845637584, "loss": 2.0345, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.0006107382550335571, "loss": 2.0479, "step": 91 }, { "epoch": 0.02, "learning_rate": 0.0006174496644295302, "loss": 2.0201, "step": 92 }, { "epoch": 0.02, "learning_rate": 0.0006241610738255034, "loss": 2.0141, "step": 93 }, { "epoch": 0.02, "learning_rate": 0.0006308724832214765, "loss": 2.0105, "step": 94 }, { "epoch": 0.02, "learning_rate": 0.0006375838926174496, "loss": 2.0691, "step": 95 }, { "epoch": 0.02, "learning_rate": 0.0006442953020134228, "loss": 2.0247, "step": 96 }, { "epoch": 0.02, "learning_rate": 0.0006510067114093959, "loss": 2.0333, "step": 97 }, { "epoch": 0.02, "learning_rate": 0.0006577181208053692, "loss": 2.041, "step": 98 }, { "epoch": 0.02, "learning_rate": 0.0006644295302013424, "loss": 2.0171, "step": 99 }, { "epoch": 0.02, "learning_rate": 0.0006711409395973155, "loss": 2.009, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0006778523489932886, "loss": 1.9819, "step": 101 }, { "epoch": 0.02, "learning_rate": 0.0006845637583892618, "loss": 1.9484, "step": 102 }, { "epoch": 0.02, "learning_rate": 0.0006912751677852349, "loss": 2.0053, "step": 103 }, { "epoch": 0.02, "learning_rate": 0.000697986577181208, "loss": 2.031, "step": 104 }, { "epoch": 0.02, "learning_rate": 0.0007046979865771812, "loss": 2.013, "step": 105 }, { "epoch": 0.02, "learning_rate": 0.0007114093959731543, "loss": 2.0107, "step": 106 }, { "epoch": 0.02, "learning_rate": 0.0007181208053691274, "loss": 2.0004, "step": 107 }, { "epoch": 0.02, "learning_rate": 0.0007248322147651007, "loss": 1.957, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.0007315436241610739, "loss": 2.0122, "step": 109 }, { "epoch": 0.02, "learning_rate": 0.0007382550335570471, "loss": 2.0139, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.0007449664429530202, "loss": 1.9701, "step": 111 }, { "epoch": 0.02, "learning_rate": 0.0007516778523489933, "loss": 1.9604, "step": 112 }, { "epoch": 0.02, "learning_rate": 0.0007583892617449665, "loss": 2.0031, "step": 113 }, { "epoch": 0.02, "learning_rate": 0.0007651006711409396, "loss": 2.0365, "step": 114 }, { "epoch": 0.02, "learning_rate": 0.0007718120805369127, "loss": 1.9908, "step": 115 }, { "epoch": 0.02, "learning_rate": 0.0007785234899328859, "loss": 1.9759, "step": 116 }, { "epoch": 0.02, "learning_rate": 0.0007852348993288591, "loss": 1.9766, "step": 117 }, { "epoch": 0.02, "learning_rate": 0.0007919463087248322, "loss": 1.9981, "step": 118 }, { "epoch": 0.02, "learning_rate": 0.0007986577181208054, "loss": 2.0232, "step": 119 }, { "epoch": 0.02, "learning_rate": 0.0008053691275167785, "loss": 1.9637, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.0008120805369127517, "loss": 1.9471, "step": 121 }, { "epoch": 0.02, "learning_rate": 0.0008187919463087249, "loss": 2.0101, "step": 122 }, { "epoch": 0.02, "learning_rate": 0.000825503355704698, "loss": 1.9532, "step": 123 }, { "epoch": 0.03, "learning_rate": 0.0008322147651006711, "loss": 1.958, "step": 124 }, { "epoch": 0.03, "learning_rate": 0.0008389261744966443, "loss": 2.0223, "step": 125 }, { "epoch": 0.03, "learning_rate": 0.0008456375838926175, "loss": 1.9738, "step": 126 }, { "epoch": 0.03, "learning_rate": 0.0008523489932885907, "loss": 1.9769, "step": 127 }, { "epoch": 0.03, "learning_rate": 0.0008590604026845638, "loss": 1.9681, "step": 128 }, { "epoch": 0.03, "learning_rate": 0.0008657718120805369, "loss": 1.9992, "step": 129 }, { "epoch": 0.03, "learning_rate": 0.00087248322147651, "loss": 2.0236, "step": 130 }, { "epoch": 0.03, "learning_rate": 0.0008791946308724832, "loss": 1.9902, "step": 131 }, { "epoch": 0.03, "learning_rate": 0.0008859060402684564, "loss": 1.9624, "step": 132 }, { "epoch": 0.03, "learning_rate": 0.0008926174496644296, "loss": 1.9921, "step": 133 }, { "epoch": 0.03, "learning_rate": 0.0008993288590604027, "loss": 1.9378, "step": 134 }, { "epoch": 0.03, "learning_rate": 0.0009060402684563759, "loss": 1.9522, "step": 135 }, { "epoch": 0.03, "learning_rate": 0.0009127516778523491, "loss": 2.0026, "step": 136 }, { "epoch": 0.03, "learning_rate": 0.0009194630872483222, "loss": 2.0072, "step": 137 }, { "epoch": 0.03, "learning_rate": 0.0009261744966442953, "loss": 1.9815, "step": 138 }, { "epoch": 0.03, "learning_rate": 0.0009328859060402685, "loss": 1.9662, "step": 139 }, { "epoch": 0.03, "learning_rate": 0.0009395973154362416, "loss": 1.9571, "step": 140 }, { "epoch": 0.03, "learning_rate": 0.0009463087248322147, "loss": 1.9718, "step": 141 }, { "epoch": 0.03, "learning_rate": 0.0009530201342281879, "loss": 1.9573, "step": 142 }, { "epoch": 0.03, "learning_rate": 0.000959731543624161, "loss": 1.9602, "step": 143 }, { "epoch": 0.03, "learning_rate": 0.0009664429530201342, "loss": 1.9107, "step": 144 }, { "epoch": 0.03, "learning_rate": 0.0009731543624161075, "loss": 1.9404, "step": 145 }, { "epoch": 0.03, "learning_rate": 0.0009798657718120805, "loss": 1.9445, "step": 146 }, { "epoch": 0.03, "learning_rate": 0.0009865771812080538, "loss": 1.9775, "step": 147 }, { "epoch": 0.03, "learning_rate": 0.0009932885906040268, "loss": 1.9759, "step": 148 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 1.9526, "step": 149 }, { "epoch": 0.03, "learning_rate": 0.0009999998927739515, "loss": 1.9265, "step": 150 }, { "epoch": 0.03, "learning_rate": 0.0009999995710958521, "loss": 1.9413, "step": 151 }, { "epoch": 0.03, "learning_rate": 0.0009999990349658396, "loss": 1.907, "step": 152 }, { "epoch": 0.03, "learning_rate": 0.000999998284384144, "loss": 1.935, "step": 153 }, { "epoch": 0.03, "learning_rate": 0.000999997319351087, "loss": 1.9273, "step": 154 }, { "epoch": 0.03, "learning_rate": 0.0009999961398670834, "loss": 1.9635, "step": 155 }, { "epoch": 0.03, "learning_rate": 0.0009999947459326378, "loss": 1.8989, "step": 156 }, { "epoch": 0.03, "learning_rate": 0.000999993137548349, "loss": 1.9327, "step": 157 }, { "epoch": 0.03, "learning_rate": 0.0009999913147149067, "loss": 1.9149, "step": 158 }, { "epoch": 0.03, "learning_rate": 0.0009999892774330923, "loss": 1.9641, "step": 159 }, { "epoch": 0.03, "learning_rate": 0.00099998702570378, "loss": 1.9029, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0009999845595279355, "loss": 1.9133, "step": 161 }, { "epoch": 0.03, "learning_rate": 0.0009999818789066164, "loss": 1.8718, "step": 162 }, { "epoch": 0.03, "learning_rate": 0.0009999789838409726, "loss": 1.9595, "step": 163 }, { "epoch": 0.03, "learning_rate": 0.0009999758743322457, "loss": 1.9446, "step": 164 }, { "epoch": 0.03, "learning_rate": 0.0009999725503817693, "loss": 1.8969, "step": 165 }, { "epoch": 0.03, "learning_rate": 0.0009999690119909693, "loss": 1.9231, "step": 166 }, { "epoch": 0.03, "learning_rate": 0.000999965259161363, "loss": 1.9635, "step": 167 }, { "epoch": 0.03, "learning_rate": 0.0009999612918945606, "loss": 1.8788, "step": 168 }, { "epoch": 0.03, "learning_rate": 0.000999957110192263, "loss": 1.8874, "step": 169 }, { "epoch": 0.03, "learning_rate": 0.0009999527140562641, "loss": 1.9123, "step": 170 }, { "epoch": 0.03, "learning_rate": 0.0009999481034884494, "loss": 1.8927, "step": 171 }, { "epoch": 0.03, "learning_rate": 0.0009999432784907963, "loss": 1.8692, "step": 172 }, { "epoch": 0.03, "learning_rate": 0.0009999382390653744, "loss": 1.8893, "step": 173 }, { "epoch": 0.04, "learning_rate": 0.0009999329852143448, "loss": 1.9054, "step": 174 }, { "epoch": 0.04, "learning_rate": 0.0009999275169399614, "loss": 1.8664, "step": 175 }, { "epoch": 0.04, "learning_rate": 0.0009999218342445692, "loss": 1.936, "step": 176 }, { "epoch": 0.04, "learning_rate": 0.0009999159371306056, "loss": 1.8473, "step": 177 }, { "epoch": 0.04, "learning_rate": 0.0009999098256006, "loss": 1.8801, "step": 178 }, { "epoch": 0.04, "learning_rate": 0.0009999034996571736, "loss": 1.9026, "step": 179 }, { "epoch": 0.04, "learning_rate": 0.0009998969593030397, "loss": 1.8645, "step": 180 }, { "epoch": 0.04, "learning_rate": 0.0009998902045410033, "loss": 1.8665, "step": 181 }, { "epoch": 0.04, "learning_rate": 0.0009998832353739614, "loss": 1.9017, "step": 182 }, { "epoch": 0.04, "learning_rate": 0.0009998760518049037, "loss": 1.8621, "step": 183 }, { "epoch": 0.04, "learning_rate": 0.000999868653836911, "loss": 1.9002, "step": 184 }, { "epoch": 0.04, "learning_rate": 0.000999861041473156, "loss": 1.8738, "step": 185 }, { "epoch": 0.04, "learning_rate": 0.000999853214716904, "loss": 1.8815, "step": 186 }, { "epoch": 0.04, "learning_rate": 0.0009998451735715116, "loss": 1.8518, "step": 187 }, { "epoch": 0.04, "learning_rate": 0.0009998369180404282, "loss": 1.8291, "step": 188 }, { "epoch": 0.04, "learning_rate": 0.0009998284481271943, "loss": 1.8793, "step": 189 }, { "epoch": 0.04, "learning_rate": 0.0009998197638354427, "loss": 1.8673, "step": 190 }, { "epoch": 0.04, "learning_rate": 0.0009998108651688982, "loss": 1.8596, "step": 191 }, { "epoch": 0.04, "learning_rate": 0.0009998017521313774, "loss": 1.8752, "step": 192 }, { "epoch": 0.04, "learning_rate": 0.0009997924247267891, "loss": 1.8468, "step": 193 }, { "epoch": 0.04, "learning_rate": 0.0009997828829591336, "loss": 1.8513, "step": 194 }, { "epoch": 0.04, "learning_rate": 0.0009997731268325035, "loss": 1.8084, "step": 195 }, { "epoch": 0.04, "learning_rate": 0.0009997631563510832, "loss": 1.8522, "step": 196 }, { "epoch": 0.04, "learning_rate": 0.0009997529715191494, "loss": 1.9125, "step": 197 }, { "epoch": 0.04, "learning_rate": 0.00099974257234107, "loss": 1.8737, "step": 198 }, { "epoch": 0.04, "learning_rate": 0.0009997319588213054, "loss": 1.8628, "step": 199 }, { "epoch": 0.04, "learning_rate": 0.0009997211309644078, "loss": 1.8286, "step": 200 }, { "epoch": 0.04, "learning_rate": 0.0009997100887750214, "loss": 1.8457, "step": 201 }, { "epoch": 0.04, "learning_rate": 0.0009996988322578822, "loss": 1.8623, "step": 202 }, { "epoch": 0.04, "learning_rate": 0.000999687361417818, "loss": 1.8129, "step": 203 }, { "epoch": 0.04, "learning_rate": 0.0009996756762597487, "loss": 1.8357, "step": 204 }, { "epoch": 0.04, "learning_rate": 0.0009996637767886866, "loss": 1.8504, "step": 205 }, { "epoch": 0.04, "learning_rate": 0.0009996516630097348, "loss": 1.8338, "step": 206 }, { "epoch": 0.04, "learning_rate": 0.0009996393349280893, "loss": 1.8022, "step": 207 }, { "epoch": 0.04, "learning_rate": 0.0009996267925490375, "loss": 1.8386, "step": 208 }, { "epoch": 0.04, "learning_rate": 0.000999614035877959, "loss": 1.8044, "step": 209 }, { "epoch": 0.04, "learning_rate": 0.0009996010649203251, "loss": 1.7704, "step": 210 }, { "epoch": 0.04, "learning_rate": 0.0009995878796816991, "loss": 1.8068, "step": 211 }, { "epoch": 0.04, "learning_rate": 0.0009995744801677363, "loss": 1.833, "step": 212 }, { "epoch": 0.04, "learning_rate": 0.000999560866384184, "loss": 1.8079, "step": 213 }, { "epoch": 0.04, "learning_rate": 0.0009995470383368807, "loss": 1.819, "step": 214 }, { "epoch": 0.04, "learning_rate": 0.0009995329960317575, "loss": 1.801, "step": 215 }, { "epoch": 0.04, "learning_rate": 0.0009995187394748375, "loss": 1.8745, "step": 216 }, { "epoch": 0.04, "learning_rate": 0.000999504268672235, "loss": 1.7976, "step": 217 }, { "epoch": 0.04, "learning_rate": 0.000999489583630157, "loss": 1.8153, "step": 218 }, { "epoch": 0.04, "learning_rate": 0.0009994746843549017, "loss": 1.7774, "step": 219 }, { "epoch": 0.04, "learning_rate": 0.0009994595708528594, "loss": 1.7987, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.0009994442431305122, "loss": 1.8244, "step": 221 }, { "epoch": 0.04, "learning_rate": 0.0009994287011944347, "loss": 1.8026, "step": 222 }, { "epoch": 0.05, "learning_rate": 0.0009994129450512927, "loss": 1.8118, "step": 223 }, { "epoch": 0.05, "learning_rate": 0.0009993969747078442, "loss": 1.8418, "step": 224 }, { "epoch": 0.05, "learning_rate": 0.0009993807901709385, "loss": 1.7652, "step": 225 }, { "epoch": 0.05, "learning_rate": 0.0009993643914475176, "loss": 1.7513, "step": 226 }, { "epoch": 0.05, "learning_rate": 0.000999347778544615, "loss": 1.8353, "step": 227 }, { "epoch": 0.05, "learning_rate": 0.0009993309514693559, "loss": 1.7968, "step": 228 }, { "epoch": 0.05, "learning_rate": 0.0009993139102289575, "loss": 1.7863, "step": 229 }, { "epoch": 0.05, "learning_rate": 0.000999296654830729, "loss": 1.81, "step": 230 }, { "epoch": 0.05, "learning_rate": 0.0009992791852820708, "loss": 1.7832, "step": 231 }, { "epoch": 0.05, "learning_rate": 0.0009992615015904764, "loss": 1.7726, "step": 232 }, { "epoch": 0.05, "learning_rate": 0.0009992436037635302, "loss": 1.7518, "step": 233 }, { "epoch": 0.05, "learning_rate": 0.0009992254918089083, "loss": 1.7747, "step": 234 }, { "epoch": 0.05, "learning_rate": 0.0009992071657343793, "loss": 1.7721, "step": 235 }, { "epoch": 0.05, "learning_rate": 0.0009991886255478033, "loss": 1.7776, "step": 236 }, { "epoch": 0.05, "learning_rate": 0.0009991698712571323, "loss": 1.7783, "step": 237 }, { "epoch": 0.05, "learning_rate": 0.0009991509028704098, "loss": 1.7804, "step": 238 }, { "epoch": 0.05, "learning_rate": 0.0009991317203957718, "loss": 1.7491, "step": 239 }, { "epoch": 0.05, "learning_rate": 0.0009991123238414455, "loss": 1.7423, "step": 240 }, { "epoch": 0.05, "learning_rate": 0.0009990927132157504, "loss": 1.8294, "step": 241 }, { "epoch": 0.05, "learning_rate": 0.000999072888527097, "loss": 1.7857, "step": 242 }, { "epoch": 0.05, "learning_rate": 0.0009990528497839891, "loss": 1.7919, "step": 243 }, { "epoch": 0.05, "learning_rate": 0.0009990325969950207, "loss": 1.7839, "step": 244 }, { "epoch": 0.05, "learning_rate": 0.0009990121301688786, "loss": 1.772, "step": 245 }, { "epoch": 0.05, "learning_rate": 0.000998991449314341, "loss": 1.749, "step": 246 }, { "epoch": 0.05, "learning_rate": 0.000998970554440278, "loss": 1.7704, "step": 247 }, { "epoch": 0.05, "learning_rate": 0.0009989494455556517, "loss": 1.7584, "step": 248 }, { "epoch": 0.05, "learning_rate": 0.0009989281226695154, "loss": 1.8027, "step": 249 }, { "epoch": 0.05, "learning_rate": 0.0009989065857910148, "loss": 1.7719, "step": 250 }, { "epoch": 0.05, "learning_rate": 0.0009988848349293874, "loss": 1.7167, "step": 251 }, { "epoch": 0.05, "learning_rate": 0.0009988628700939616, "loss": 1.7283, "step": 252 }, { "epoch": 0.05, "learning_rate": 0.000998840691294159, "loss": 1.7778, "step": 253 }, { "epoch": 0.05, "learning_rate": 0.0009988182985394916, "loss": 1.7692, "step": 254 }, { "epoch": 0.05, "learning_rate": 0.000998795691839564, "loss": 1.7376, "step": 255 }, { "epoch": 0.05, "learning_rate": 0.000998772871204072, "loss": 1.7571, "step": 256 }, { "epoch": 0.05, "learning_rate": 0.000998749836642804, "loss": 1.7836, "step": 257 }, { "epoch": 0.05, "learning_rate": 0.0009987265881656392, "loss": 1.7532, "step": 258 }, { "epoch": 0.05, "learning_rate": 0.000998703125782549, "loss": 1.7422, "step": 259 }, { "epoch": 0.05, "learning_rate": 0.0009986794495035968, "loss": 1.7295, "step": 260 }, { "epoch": 0.05, "learning_rate": 0.0009986555593389373, "loss": 1.7469, "step": 261 }, { "epoch": 0.05, "learning_rate": 0.000998631455298817, "loss": 1.7163, "step": 262 }, { "epoch": 0.05, "learning_rate": 0.000998607137393574, "loss": 1.7355, "step": 263 }, { "epoch": 0.05, "learning_rate": 0.000998582605633639, "loss": 1.7526, "step": 264 }, { "epoch": 0.05, "learning_rate": 0.000998557860029533, "loss": 1.7162, "step": 265 }, { "epoch": 0.05, "learning_rate": 0.0009985329005918703, "loss": 1.7068, "step": 266 }, { "epoch": 0.05, "learning_rate": 0.0009985077273313553, "loss": 1.7544, "step": 267 }, { "epoch": 0.05, "learning_rate": 0.0009984823402587857, "loss": 1.7292, "step": 268 }, { "epoch": 0.05, "learning_rate": 0.0009984567393850497, "loss": 1.7766, "step": 269 }, { "epoch": 0.05, "learning_rate": 0.0009984309247211276, "loss": 1.7529, "step": 270 }, { "epoch": 0.05, "learning_rate": 0.0009984048962780913, "loss": 1.7264, "step": 271 }, { "epoch": 0.05, "learning_rate": 0.000998378654067105, "loss": 1.7021, "step": 272 }, { "epoch": 0.06, "learning_rate": 0.0009983521980994237, "loss": 1.7337, "step": 273 }, { "epoch": 0.06, "learning_rate": 0.0009983255283863945, "loss": 1.7234, "step": 274 }, { "epoch": 0.06, "learning_rate": 0.0009982986449394561, "loss": 1.8016, "step": 275 }, { "epoch": 0.06, "learning_rate": 0.0009982715477701394, "loss": 1.6971, "step": 276 }, { "epoch": 0.06, "learning_rate": 0.0009982442368900656, "loss": 1.6974, "step": 277 }, { "epoch": 0.06, "learning_rate": 0.0009982167123109495, "loss": 1.6988, "step": 278 }, { "epoch": 0.06, "learning_rate": 0.0009981889740445957, "loss": 1.6865, "step": 279 }, { "epoch": 0.06, "learning_rate": 0.0009981610221029016, "loss": 1.686, "step": 280 }, { "epoch": 0.06, "learning_rate": 0.0009981328564978558, "loss": 1.706, "step": 281 }, { "epoch": 0.06, "learning_rate": 0.000998104477241539, "loss": 1.7252, "step": 282 }, { "epoch": 0.06, "learning_rate": 0.0009980758843461223, "loss": 1.7231, "step": 283 }, { "epoch": 0.06, "learning_rate": 0.0009980470778238703, "loss": 1.7304, "step": 284 }, { "epoch": 0.06, "learning_rate": 0.0009980180576871379, "loss": 1.7082, "step": 285 }, { "epoch": 0.06, "learning_rate": 0.0009979888239483716, "loss": 1.7073, "step": 286 }, { "epoch": 0.06, "learning_rate": 0.0009979593766201102, "loss": 1.6992, "step": 287 }, { "epoch": 0.06, "learning_rate": 0.000997929715714984, "loss": 1.7154, "step": 288 }, { "epoch": 0.06, "learning_rate": 0.0009978998412457142, "loss": 1.6996, "step": 289 }, { "epoch": 0.06, "learning_rate": 0.0009978697532251144, "loss": 1.6998, "step": 290 }, { "epoch": 0.06, "learning_rate": 0.0009978394516660894, "loss": 1.684, "step": 291 }, { "epoch": 0.06, "learning_rate": 0.0009978089365816356, "loss": 1.6753, "step": 292 }, { "epoch": 0.06, "learning_rate": 0.000997778207984841, "loss": 1.7114, "step": 293 }, { "epoch": 0.06, "learning_rate": 0.0009977472658888856, "loss": 1.7357, "step": 294 }, { "epoch": 0.06, "learning_rate": 0.0009977161103070402, "loss": 1.7019, "step": 295 }, { "epoch": 0.06, "learning_rate": 0.0009976847412526677, "loss": 1.6701, "step": 296 }, { "epoch": 0.06, "learning_rate": 0.0009976531587392223, "loss": 1.6703, "step": 297 }, { "epoch": 0.06, "learning_rate": 0.0009976213627802503, "loss": 1.6886, "step": 298 }, { "epoch": 0.06, "learning_rate": 0.0009975893533893884, "loss": 1.7077, "step": 299 }, { "epoch": 0.06, "learning_rate": 0.0009975571305803662, "loss": 1.7072, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.0009975246943670038, "loss": 1.6897, "step": 301 }, { "epoch": 0.06, "learning_rate": 0.0009974920447632135, "loss": 1.6816, "step": 302 }, { "epoch": 0.06, "learning_rate": 0.0009974591817829986, "loss": 1.6706, "step": 303 }, { "epoch": 0.06, "learning_rate": 0.0009974261054404544, "loss": 1.6649, "step": 304 }, { "epoch": 0.06, "learning_rate": 0.0009973928157497674, "loss": 1.6646, "step": 305 }, { "epoch": 0.06, "learning_rate": 0.0009973593127252157, "loss": 1.6807, "step": 306 }, { "epoch": 0.06, "learning_rate": 0.0009973255963811687, "loss": 1.6763, "step": 307 }, { "epoch": 0.06, "learning_rate": 0.000997291666732088, "loss": 1.7083, "step": 308 }, { "epoch": 0.06, "learning_rate": 0.0009972575237925255, "loss": 1.6756, "step": 309 }, { "epoch": 0.06, "learning_rate": 0.0009972231675771256, "loss": 1.7091, "step": 310 }, { "epoch": 0.06, "learning_rate": 0.0009971885981006237, "loss": 1.6445, "step": 311 }, { "epoch": 0.06, "learning_rate": 0.000997153815377847, "loss": 1.6516, "step": 312 }, { "epoch": 0.06, "learning_rate": 0.000997118819423714, "loss": 1.6648, "step": 313 }, { "epoch": 0.06, "learning_rate": 0.0009970836102532344, "loss": 1.6639, "step": 314 }, { "epoch": 0.06, "learning_rate": 0.0009970481878815093, "loss": 1.6534, "step": 315 }, { "epoch": 0.06, "learning_rate": 0.0009970125523237321, "loss": 1.6773, "step": 316 }, { "epoch": 0.06, "learning_rate": 0.0009969767035951866, "loss": 1.6888, "step": 317 }, { "epoch": 0.06, "learning_rate": 0.0009969406417112488, "loss": 1.6818, "step": 318 }, { "epoch": 0.06, "learning_rate": 0.0009969043666873856, "loss": 1.6691, "step": 319 }, { "epoch": 0.06, "learning_rate": 0.0009968678785391553, "loss": 1.6745, "step": 320 }, { "epoch": 0.06, "learning_rate": 0.0009968311772822083, "loss": 1.6725, "step": 321 }, { "epoch": 0.07, "learning_rate": 0.0009967942629322856, "loss": 1.6692, "step": 322 }, { "epoch": 0.07, "learning_rate": 0.00099675713550522, "loss": 1.6329, "step": 323 }, { "epoch": 0.07, "learning_rate": 0.0009967197950169356, "loss": 1.714, "step": 324 }, { "epoch": 0.07, "learning_rate": 0.0009966822414834479, "loss": 1.627, "step": 325 }, { "epoch": 0.07, "learning_rate": 0.0009966444749208636, "loss": 1.6394, "step": 326 }, { "epoch": 0.07, "learning_rate": 0.0009966064953453812, "loss": 1.6461, "step": 327 }, { "epoch": 0.07, "learning_rate": 0.0009965683027732901, "loss": 1.6282, "step": 328 }, { "epoch": 0.07, "learning_rate": 0.0009965298972209715, "loss": 1.6257, "step": 329 }, { "epoch": 0.07, "learning_rate": 0.0009964912787048977, "loss": 1.6376, "step": 330 }, { "epoch": 0.07, "learning_rate": 0.0009964524472416319, "loss": 1.6908, "step": 331 }, { "epoch": 0.07, "learning_rate": 0.0009964134028478296, "loss": 1.6317, "step": 332 }, { "epoch": 0.07, "learning_rate": 0.0009963741455402367, "loss": 1.6345, "step": 333 }, { "epoch": 0.07, "learning_rate": 0.000996334675335691, "loss": 1.6571, "step": 334 }, { "epoch": 0.07, "learning_rate": 0.0009962949922511215, "loss": 1.6057, "step": 335 }, { "epoch": 0.07, "learning_rate": 0.0009962550963035484, "loss": 1.6405, "step": 336 }, { "epoch": 0.07, "learning_rate": 0.0009962149875100833, "loss": 1.6664, "step": 337 }, { "epoch": 0.07, "learning_rate": 0.0009961746658879286, "loss": 1.6316, "step": 338 }, { "epoch": 0.07, "learning_rate": 0.0009961341314543791, "loss": 1.689, "step": 339 }, { "epoch": 0.07, "learning_rate": 0.0009960933842268198, "loss": 1.6617, "step": 340 }, { "epoch": 0.07, "learning_rate": 0.0009960524242227274, "loss": 1.6677, "step": 341 }, { "epoch": 0.07, "learning_rate": 0.0009960112514596697, "loss": 1.6499, "step": 342 }, { "epoch": 0.07, "learning_rate": 0.0009959698659553061, "loss": 1.6413, "step": 343 }, { "epoch": 0.07, "learning_rate": 0.0009959282677273868, "loss": 1.6193, "step": 344 }, { "epoch": 0.07, "learning_rate": 0.0009958864567937537, "loss": 1.6198, "step": 345 }, { "epoch": 0.07, "learning_rate": 0.0009958444331723394, "loss": 1.669, "step": 346 }, { "epoch": 0.07, "learning_rate": 0.0009958021968811683, "loss": 1.6407, "step": 347 }, { "epoch": 0.07, "learning_rate": 0.0009957597479383557, "loss": 1.6771, "step": 348 }, { "epoch": 0.07, "learning_rate": 0.0009957170863621077, "loss": 1.6439, "step": 349 }, { "epoch": 0.07, "learning_rate": 0.0009956742121707226, "loss": 1.6256, "step": 350 }, { "epoch": 0.07, "learning_rate": 0.000995631125382589, "loss": 1.6186, "step": 351 }, { "epoch": 0.07, "learning_rate": 0.0009955878260161871, "loss": 1.6559, "step": 352 }, { "epoch": 0.07, "learning_rate": 0.000995544314090088, "loss": 1.6252, "step": 353 }, { "epoch": 0.07, "learning_rate": 0.0009955005896229543, "loss": 1.642, "step": 354 }, { "epoch": 0.07, "learning_rate": 0.0009954566526335395, "loss": 1.6125, "step": 355 }, { "epoch": 0.07, "learning_rate": 0.0009954125031406886, "loss": 1.6952, "step": 356 }, { "epoch": 0.07, "learning_rate": 0.0009953681411633375, "loss": 1.6447, "step": 357 }, { "epoch": 0.07, "learning_rate": 0.000995323566720513, "loss": 1.6373, "step": 358 }, { "epoch": 0.07, "learning_rate": 0.0009952787798313332, "loss": 1.6339, "step": 359 }, { "epoch": 0.07, "learning_rate": 0.0009952337805150078, "loss": 1.6527, "step": 360 }, { "epoch": 0.07, "learning_rate": 0.0009951885687908368, "loss": 1.6225, "step": 361 }, { "epoch": 0.07, "learning_rate": 0.0009951431446782117, "loss": 1.6137, "step": 362 }, { "epoch": 0.07, "learning_rate": 0.0009950975081966156, "loss": 1.6054, "step": 363 }, { "epoch": 0.07, "learning_rate": 0.0009950516593656214, "loss": 1.6075, "step": 364 }, { "epoch": 0.07, "learning_rate": 0.0009950055982048945, "loss": 1.6111, "step": 365 }, { "epoch": 0.07, "learning_rate": 0.0009949593247341904, "loss": 1.6334, "step": 366 }, { "epoch": 0.07, "learning_rate": 0.0009949128389733562, "loss": 1.6569, "step": 367 }, { "epoch": 0.07, "learning_rate": 0.0009948661409423297, "loss": 1.6069, "step": 368 }, { "epoch": 0.07, "learning_rate": 0.0009948192306611398, "loss": 1.6161, "step": 369 }, { "epoch": 0.07, "learning_rate": 0.0009947721081499067, "loss": 1.5909, "step": 370 }, { "epoch": 0.08, "learning_rate": 0.0009947247734288414, "loss": 1.6224, "step": 371 }, { "epoch": 0.08, "learning_rate": 0.0009946772265182458, "loss": 1.5997, "step": 372 }, { "epoch": 0.08, "learning_rate": 0.0009946294674385132, "loss": 1.6496, "step": 373 }, { "epoch": 0.08, "learning_rate": 0.0009945814962101275, "loss": 1.6125, "step": 374 }, { "epoch": 0.08, "learning_rate": 0.0009945333128536637, "loss": 1.6568, "step": 375 }, { "epoch": 0.08, "learning_rate": 0.0009944849173897882, "loss": 1.6211, "step": 376 }, { "epoch": 0.08, "learning_rate": 0.0009944363098392576, "loss": 1.6334, "step": 377 }, { "epoch": 0.08, "learning_rate": 0.00099438749022292, "loss": 1.6538, "step": 378 }, { "epoch": 0.08, "learning_rate": 0.0009943384585617146, "loss": 1.6011, "step": 379 }, { "epoch": 0.08, "learning_rate": 0.000994289214876671, "loss": 1.5974, "step": 380 }, { "epoch": 0.08, "learning_rate": 0.0009942397591889101, "loss": 1.6235, "step": 381 }, { "epoch": 0.08, "learning_rate": 0.0009941900915196435, "loss": 1.6655, "step": 382 }, { "epoch": 0.08, "learning_rate": 0.0009941402118901744, "loss": 1.596, "step": 383 }, { "epoch": 0.08, "learning_rate": 0.0009940901203218957, "loss": 1.618, "step": 384 }, { "epoch": 0.08, "learning_rate": 0.0009940398168362923, "loss": 1.6549, "step": 385 }, { "epoch": 0.08, "learning_rate": 0.0009939893014549394, "loss": 1.6573, "step": 386 }, { "epoch": 0.08, "learning_rate": 0.0009939385741995035, "loss": 1.646, "step": 387 }, { "epoch": 0.08, "learning_rate": 0.0009938876350917415, "loss": 1.6898, "step": 388 }, { "epoch": 0.08, "learning_rate": 0.0009938364841535013, "loss": 1.632, "step": 389 }, { "epoch": 0.08, "learning_rate": 0.000993785121406722, "loss": 1.6056, "step": 390 }, { "epoch": 0.08, "learning_rate": 0.0009937335468734334, "loss": 1.6083, "step": 391 }, { "epoch": 0.08, "learning_rate": 0.0009936817605757555, "loss": 1.6061, "step": 392 }, { "epoch": 0.08, "learning_rate": 0.0009936297625359003, "loss": 1.5867, "step": 393 }, { "epoch": 0.08, "learning_rate": 0.0009935775527761697, "loss": 1.62, "step": 394 }, { "epoch": 0.08, "learning_rate": 0.0009935251313189565, "loss": 1.6134, "step": 395 }, { "epoch": 0.08, "learning_rate": 0.0009934724981867447, "loss": 1.6306, "step": 396 }, { "epoch": 0.08, "learning_rate": 0.0009934196534021086, "loss": 1.6174, "step": 397 }, { "epoch": 0.08, "learning_rate": 0.0009933665969877141, "loss": 1.6279, "step": 398 }, { "epoch": 0.08, "learning_rate": 0.000993313328966317, "loss": 1.5923, "step": 399 }, { "epoch": 0.08, "learning_rate": 0.000993259849360764, "loss": 1.6256, "step": 400 }, { "epoch": 0.08, "learning_rate": 0.0009932061581939928, "loss": 1.5959, "step": 401 }, { "epoch": 0.08, "learning_rate": 0.000993152255489032, "loss": 1.6333, "step": 402 }, { "epoch": 0.08, "learning_rate": 0.0009930981412690006, "loss": 1.6108, "step": 403 }, { "epoch": 0.08, "learning_rate": 0.0009930438155571085, "loss": 1.6181, "step": 404 }, { "epoch": 0.08, "learning_rate": 0.0009929892783766558, "loss": 1.6132, "step": 405 }, { "epoch": 0.08, "learning_rate": 0.000992934529751034, "loss": 1.6148, "step": 406 }, { "epoch": 0.08, "learning_rate": 0.0009928795697037255, "loss": 1.6021, "step": 407 }, { "epoch": 0.08, "learning_rate": 0.000992824398258302, "loss": 1.6116, "step": 408 }, { "epoch": 0.08, "learning_rate": 0.0009927690154384271, "loss": 1.621, "step": 409 }, { "epoch": 0.08, "learning_rate": 0.0009927134212678553, "loss": 1.6273, "step": 410 }, { "epoch": 0.08, "learning_rate": 0.0009926576157704304, "loss": 1.5907, "step": 411 }, { "epoch": 0.08, "learning_rate": 0.000992601598970088, "loss": 1.5878, "step": 412 }, { "epoch": 0.08, "learning_rate": 0.0009925453708908537, "loss": 1.6383, "step": 413 }, { "epoch": 0.08, "learning_rate": 0.0009924889315568442, "loss": 1.5887, "step": 414 }, { "epoch": 0.08, "learning_rate": 0.0009924322809922665, "loss": 1.6573, "step": 415 }, { "epoch": 0.08, "learning_rate": 0.0009923754192214182, "loss": 1.6493, "step": 416 }, { "epoch": 0.08, "learning_rate": 0.0009923183462686876, "loss": 1.6291, "step": 417 }, { "epoch": 0.08, "learning_rate": 0.0009922610621585534, "loss": 1.6488, "step": 418 }, { "epoch": 0.08, "learning_rate": 0.0009922035669155853, "loss": 1.6103, "step": 419 }, { "epoch": 0.08, "learning_rate": 0.000992145860564443, "loss": 1.6298, "step": 420 }, { "epoch": 0.09, "learning_rate": 0.0009920879431298772, "loss": 1.5927, "step": 421 }, { "epoch": 0.09, "learning_rate": 0.0009920298146367287, "loss": 1.6031, "step": 422 }, { "epoch": 0.09, "learning_rate": 0.000991971475109929, "loss": 1.5964, "step": 423 }, { "epoch": 0.09, "learning_rate": 0.0009919129245745005, "loss": 1.5631, "step": 424 }, { "epoch": 0.09, "learning_rate": 0.0009918541630555555, "loss": 1.6171, "step": 425 }, { "epoch": 0.09, "learning_rate": 0.000991795190578297, "loss": 1.6381, "step": 426 }, { "epoch": 0.09, "learning_rate": 0.0009917360071680188, "loss": 1.6313, "step": 427 }, { "epoch": 0.09, "learning_rate": 0.000991676612850105, "loss": 1.5553, "step": 428 }, { "epoch": 0.09, "learning_rate": 0.0009916170076500297, "loss": 1.5707, "step": 429 }, { "epoch": 0.09, "learning_rate": 0.0009915571915933578, "loss": 1.606, "step": 430 }, { "epoch": 0.09, "learning_rate": 0.0009914971647057452, "loss": 1.5753, "step": 431 }, { "epoch": 0.09, "learning_rate": 0.0009914369270129372, "loss": 1.5833, "step": 432 }, { "epoch": 0.09, "learning_rate": 0.0009913764785407701, "loss": 1.6306, "step": 433 }, { "epoch": 0.09, "learning_rate": 0.0009913158193151706, "loss": 1.5911, "step": 434 }, { "epoch": 0.09, "learning_rate": 0.0009912549493621555, "loss": 1.6143, "step": 435 }, { "epoch": 0.09, "learning_rate": 0.0009911938687078323, "loss": 1.6072, "step": 436 }, { "epoch": 0.09, "learning_rate": 0.000991132577378399, "loss": 1.5957, "step": 437 }, { "epoch": 0.09, "learning_rate": 0.0009910710754001432, "loss": 1.6091, "step": 438 }, { "epoch": 0.09, "learning_rate": 0.0009910093627994439, "loss": 1.6076, "step": 439 }, { "epoch": 0.09, "learning_rate": 0.0009909474396027694, "loss": 1.6117, "step": 440 }, { "epoch": 0.09, "learning_rate": 0.000990885305836679, "loss": 1.6064, "step": 441 }, { "epoch": 0.09, "learning_rate": 0.0009908229615278224, "loss": 1.5946, "step": 442 }, { "epoch": 0.09, "learning_rate": 0.000990760406702939, "loss": 1.5975, "step": 443 }, { "epoch": 0.09, "learning_rate": 0.000990697641388859, "loss": 1.6034, "step": 444 }, { "epoch": 0.09, "learning_rate": 0.0009906346656125024, "loss": 1.6227, "step": 445 }, { "epoch": 0.09, "learning_rate": 0.0009905714794008803, "loss": 1.5954, "step": 446 }, { "epoch": 0.09, "learning_rate": 0.0009905080827810931, "loss": 1.594, "step": 447 }, { "epoch": 0.09, "learning_rate": 0.000990444475780332, "loss": 1.5796, "step": 448 }, { "epoch": 0.09, "learning_rate": 0.0009903806584258784, "loss": 1.5983, "step": 449 }, { "epoch": 0.09, "learning_rate": 0.0009903166307451037, "loss": 1.6189, "step": 450 }, { "epoch": 0.09, "learning_rate": 0.00099025239276547, "loss": 1.592, "step": 451 }, { "epoch": 0.09, "learning_rate": 0.0009901879445145284, "loss": 1.6399, "step": 452 }, { "epoch": 0.09, "learning_rate": 0.0009901232860199218, "loss": 1.6583, "step": 453 }, { "epoch": 0.09, "learning_rate": 0.0009900584173093824, "loss": 1.6162, "step": 454 }, { "epoch": 0.09, "learning_rate": 0.0009899933384107324, "loss": 1.5462, "step": 455 }, { "epoch": 0.09, "learning_rate": 0.0009899280493518847, "loss": 1.6128, "step": 456 }, { "epoch": 0.09, "learning_rate": 0.0009898625501608418, "loss": 1.6049, "step": 457 }, { "epoch": 0.09, "learning_rate": 0.0009897968408656966, "loss": 1.6487, "step": 458 }, { "epoch": 0.09, "learning_rate": 0.0009897309214946322, "loss": 1.6348, "step": 459 }, { "epoch": 0.09, "learning_rate": 0.0009896647920759217, "loss": 1.5641, "step": 460 }, { "epoch": 0.09, "learning_rate": 0.000989598452637928, "loss": 1.5633, "step": 461 }, { "epoch": 0.09, "learning_rate": 0.0009895319032091048, "loss": 1.5874, "step": 462 }, { "epoch": 0.09, "learning_rate": 0.0009894651438179954, "loss": 1.568, "step": 463 }, { "epoch": 0.09, "learning_rate": 0.0009893981744932329, "loss": 1.6366, "step": 464 }, { "epoch": 0.09, "learning_rate": 0.0009893309952635407, "loss": 1.6235, "step": 465 }, { "epoch": 0.09, "learning_rate": 0.0009892636061577325, "loss": 1.6132, "step": 466 }, { "epoch": 0.09, "learning_rate": 0.0009891960072047116, "loss": 1.6143, "step": 467 }, { "epoch": 0.09, "learning_rate": 0.0009891281984334716, "loss": 1.5894, "step": 468 }, { "epoch": 0.09, "learning_rate": 0.0009890601798730957, "loss": 1.6046, "step": 469 }, { "epoch": 0.1, "learning_rate": 0.0009889919515527579, "loss": 1.5593, "step": 470 }, { "epoch": 0.1, "learning_rate": 0.0009889235135017212, "loss": 1.6162, "step": 471 }, { "epoch": 0.1, "learning_rate": 0.0009888548657493387, "loss": 1.6182, "step": 472 }, { "epoch": 0.1, "learning_rate": 0.0009887860083250544, "loss": 1.5807, "step": 473 }, { "epoch": 0.1, "learning_rate": 0.000988716941258401, "loss": 1.5741, "step": 474 }, { "epoch": 0.1, "learning_rate": 0.000988647664579002, "loss": 1.5593, "step": 475 }, { "epoch": 0.1, "learning_rate": 0.0009885781783165702, "loss": 1.6052, "step": 476 }, { "epoch": 0.1, "learning_rate": 0.0009885084825009085, "loss": 1.618, "step": 477 }, { "epoch": 0.1, "learning_rate": 0.00098843857716191, "loss": 1.6329, "step": 478 }, { "epoch": 0.1, "learning_rate": 0.0009883684623295573, "loss": 1.5917, "step": 479 }, { "epoch": 0.1, "learning_rate": 0.000988298138033923, "loss": 1.6056, "step": 480 }, { "epoch": 0.1, "learning_rate": 0.000988227604305169, "loss": 1.6155, "step": 481 }, { "epoch": 0.1, "learning_rate": 0.0009881568611735483, "loss": 1.6052, "step": 482 }, { "epoch": 0.1, "learning_rate": 0.0009880859086694022, "loss": 1.5913, "step": 483 }, { "epoch": 0.1, "learning_rate": 0.0009880147468231632, "loss": 1.5798, "step": 484 }, { "epoch": 0.1, "learning_rate": 0.0009879433756653524, "loss": 1.5903, "step": 485 }, { "epoch": 0.1, "learning_rate": 0.0009878717952265812, "loss": 1.5835, "step": 486 }, { "epoch": 0.1, "learning_rate": 0.0009878000055375512, "loss": 1.6372, "step": 487 }, { "epoch": 0.1, "learning_rate": 0.0009877280066290528, "loss": 1.6102, "step": 488 }, { "epoch": 0.1, "learning_rate": 0.000987655798531967, "loss": 1.5875, "step": 489 }, { "epoch": 0.1, "learning_rate": 0.0009875833812772639, "loss": 1.5885, "step": 490 }, { "epoch": 0.1, "learning_rate": 0.0009875107548960036, "loss": 1.6073, "step": 491 }, { "epoch": 0.1, "learning_rate": 0.000987437919419336, "loss": 1.5999, "step": 492 }, { "epoch": 0.1, "learning_rate": 0.0009873648748785005, "loss": 1.5516, "step": 493 }, { "epoch": 0.1, "learning_rate": 0.0009872916213048261, "loss": 1.6187, "step": 494 }, { "epoch": 0.1, "learning_rate": 0.0009872181587297318, "loss": 1.5678, "step": 495 }, { "epoch": 0.1, "learning_rate": 0.0009871444871847256, "loss": 1.5558, "step": 496 }, { "epoch": 0.1, "learning_rate": 0.000987070606701406, "loss": 1.601, "step": 497 }, { "epoch": 0.1, "learning_rate": 0.0009869965173114601, "loss": 1.613, "step": 498 }, { "epoch": 0.1, "learning_rate": 0.0009869222190466659, "loss": 1.5662, "step": 499 }, { "epoch": 0.1, "learning_rate": 0.0009868477119388895, "loss": 1.5413, "step": 500 }, { "epoch": 0.1, "learning_rate": 0.0009867729960200877, "loss": 1.6158, "step": 501 }, { "epoch": 0.1, "learning_rate": 0.0009866980713223066, "loss": 1.5974, "step": 502 }, { "epoch": 0.1, "learning_rate": 0.0009866229378776813, "loss": 1.6139, "step": 503 }, { "epoch": 0.1, "learning_rate": 0.0009865475957184372, "loss": 1.6038, "step": 504 }, { "epoch": 0.1, "learning_rate": 0.0009864720448768886, "loss": 1.6037, "step": 505 }, { "epoch": 0.1, "learning_rate": 0.0009863962853854397, "loss": 1.5838, "step": 506 }, { "epoch": 0.1, "learning_rate": 0.0009863203172765843, "loss": 1.6403, "step": 507 }, { "epoch": 0.1, "learning_rate": 0.000986244140582905, "loss": 1.5944, "step": 508 }, { "epoch": 0.1, "learning_rate": 0.0009861677553370744, "loss": 1.6114, "step": 509 }, { "epoch": 0.1, "learning_rate": 0.0009860911615718548, "loss": 1.6033, "step": 510 }, { "epoch": 0.1, "learning_rate": 0.0009860143593200972, "loss": 1.5516, "step": 511 }, { "epoch": 0.1, "learning_rate": 0.0009859373486147426, "loss": 1.6273, "step": 512 }, { "epoch": 0.1, "learning_rate": 0.0009858601294888212, "loss": 1.5562, "step": 513 }, { "epoch": 0.1, "learning_rate": 0.0009857827019754525, "loss": 1.582, "step": 514 }, { "epoch": 0.1, "learning_rate": 0.0009857050661078457, "loss": 1.6031, "step": 515 }, { "epoch": 0.1, "learning_rate": 0.000985627221919299, "loss": 1.5598, "step": 516 }, { "epoch": 0.1, "learning_rate": 0.0009855491694432002, "loss": 1.5933, "step": 517 }, { "epoch": 0.1, "learning_rate": 0.000985470908713026, "loss": 1.5677, "step": 518 }, { "epoch": 0.1, "learning_rate": 0.0009853924397623432, "loss": 1.5778, "step": 519 }, { "epoch": 0.11, "learning_rate": 0.0009853137626248073, "loss": 1.5669, "step": 520 }, { "epoch": 0.11, "learning_rate": 0.000985234877334163, "loss": 1.585, "step": 521 }, { "epoch": 0.11, "learning_rate": 0.0009851557839242449, "loss": 1.6115, "step": 522 }, { "epoch": 0.11, "learning_rate": 0.0009850764824289762, "loss": 1.587, "step": 523 }, { "epoch": 0.11, "learning_rate": 0.00098499697288237, "loss": 1.6415, "step": 524 }, { "epoch": 0.11, "learning_rate": 0.000984917255318528, "loss": 1.5761, "step": 525 }, { "epoch": 0.11, "learning_rate": 0.0009848373297716413, "loss": 1.5585, "step": 526 }, { "epoch": 0.11, "learning_rate": 0.0009847571962759906, "loss": 1.5745, "step": 527 }, { "epoch": 0.11, "learning_rate": 0.0009846768548659455, "loss": 1.6034, "step": 528 }, { "epoch": 0.11, "learning_rate": 0.0009845963055759646, "loss": 1.5846, "step": 529 }, { "epoch": 0.11, "learning_rate": 0.0009845155484405957, "loss": 1.5549, "step": 530 }, { "epoch": 0.11, "learning_rate": 0.0009844345834944762, "loss": 1.5691, "step": 531 }, { "epoch": 0.11, "learning_rate": 0.000984353410772332, "loss": 1.5836, "step": 532 }, { "epoch": 0.11, "learning_rate": 0.0009842720303089784, "loss": 1.5943, "step": 533 }, { "epoch": 0.11, "learning_rate": 0.0009841904421393204, "loss": 1.6178, "step": 534 }, { "epoch": 0.11, "learning_rate": 0.0009841086462983508, "loss": 1.5673, "step": 535 }, { "epoch": 0.11, "learning_rate": 0.0009840266428211525, "loss": 1.6006, "step": 536 }, { "epoch": 0.11, "learning_rate": 0.0009839444317428971, "loss": 1.577, "step": 537 }, { "epoch": 0.11, "learning_rate": 0.0009838620130988454, "loss": 1.569, "step": 538 }, { "epoch": 0.11, "learning_rate": 0.0009837793869243467, "loss": 1.546, "step": 539 }, { "epoch": 0.11, "learning_rate": 0.0009836965532548402, "loss": 1.5808, "step": 540 }, { "epoch": 0.11, "learning_rate": 0.0009836135121258536, "loss": 1.6005, "step": 541 }, { "epoch": 0.11, "learning_rate": 0.000983530263573003, "loss": 1.5904, "step": 542 }, { "epoch": 0.11, "learning_rate": 0.0009834468076319947, "loss": 1.5779, "step": 543 }, { "epoch": 0.11, "learning_rate": 0.000983363144338623, "loss": 1.5887, "step": 544 }, { "epoch": 0.11, "learning_rate": 0.0009832792737287715, "loss": 1.537, "step": 545 }, { "epoch": 0.11, "learning_rate": 0.0009831951958384126, "loss": 1.5745, "step": 546 }, { "epoch": 0.11, "learning_rate": 0.0009831109107036075, "loss": 1.589, "step": 547 }, { "epoch": 0.11, "learning_rate": 0.000983026418360507, "loss": 1.5868, "step": 548 }, { "epoch": 0.11, "learning_rate": 0.0009829417188453498, "loss": 1.5573, "step": 549 }, { "epoch": 0.11, "learning_rate": 0.0009828568121944639, "loss": 1.5669, "step": 550 }, { "epoch": 0.11, "learning_rate": 0.000982771698444266, "loss": 1.5984, "step": 551 }, { "epoch": 0.11, "learning_rate": 0.000982686377631262, "loss": 1.5937, "step": 552 }, { "epoch": 0.11, "learning_rate": 0.0009826008497920463, "loss": 1.5783, "step": 553 }, { "epoch": 0.11, "learning_rate": 0.0009825151149633021, "loss": 1.5782, "step": 554 }, { "epoch": 0.11, "learning_rate": 0.0009824291731818015, "loss": 1.5631, "step": 555 }, { "epoch": 0.11, "learning_rate": 0.000982343024484405, "loss": 1.5475, "step": 556 }, { "epoch": 0.11, "learning_rate": 0.0009822566689080628, "loss": 1.5481, "step": 557 }, { "epoch": 0.11, "learning_rate": 0.0009821701064898125, "loss": 1.6072, "step": 558 }, { "epoch": 0.11, "learning_rate": 0.0009820833372667813, "loss": 1.5396, "step": 559 }, { "epoch": 0.11, "learning_rate": 0.000981996361276185, "loss": 1.6054, "step": 560 }, { "epoch": 0.11, "learning_rate": 0.0009819091785553276, "loss": 1.583, "step": 561 }, { "epoch": 0.11, "learning_rate": 0.0009818217891416027, "loss": 1.5615, "step": 562 }, { "epoch": 0.11, "learning_rate": 0.0009817341930724915, "loss": 1.612, "step": 563 }, { "epoch": 0.11, "learning_rate": 0.0009816463903855647, "loss": 1.5659, "step": 564 }, { "epoch": 0.11, "learning_rate": 0.0009815583811184807, "loss": 1.5818, "step": 565 }, { "epoch": 0.11, "learning_rate": 0.0009814701653089878, "loss": 1.5405, "step": 566 }, { "epoch": 0.11, "learning_rate": 0.0009813817429949216, "loss": 1.6069, "step": 567 }, { "epoch": 0.11, "learning_rate": 0.000981293114214207, "loss": 1.5678, "step": 568 }, { "epoch": 0.12, "learning_rate": 0.000981204279004857, "loss": 1.5951, "step": 569 }, { "epoch": 0.12, "learning_rate": 0.0009811152374049736, "loss": 1.5687, "step": 570 }, { "epoch": 0.12, "learning_rate": 0.0009810259894527473, "loss": 1.5826, "step": 571 }, { "epoch": 0.12, "learning_rate": 0.0009809365351864565, "loss": 1.5635, "step": 572 }, { "epoch": 0.12, "learning_rate": 0.000980846874644469, "loss": 1.5787, "step": 573 }, { "epoch": 0.12, "learning_rate": 0.00098075700786524, "loss": 1.5725, "step": 574 }, { "epoch": 0.12, "learning_rate": 0.000980666934887314, "loss": 1.5828, "step": 575 }, { "epoch": 0.12, "learning_rate": 0.0009805766557493241, "loss": 1.5687, "step": 576 }, { "epoch": 0.12, "learning_rate": 0.0009804861704899909, "loss": 1.6073, "step": 577 }, { "epoch": 0.12, "learning_rate": 0.000980395479148124, "loss": 1.5666, "step": 578 }, { "epoch": 0.12, "learning_rate": 0.0009803045817626211, "loss": 1.5997, "step": 579 }, { "epoch": 0.12, "learning_rate": 0.000980213478372469, "loss": 1.5545, "step": 580 }, { "epoch": 0.12, "learning_rate": 0.000980122169016742, "loss": 1.5506, "step": 581 }, { "epoch": 0.12, "learning_rate": 0.000980030653734603, "loss": 1.6492, "step": 582 }, { "epoch": 0.12, "learning_rate": 0.0009799389325653033, "loss": 1.6018, "step": 583 }, { "epoch": 0.12, "learning_rate": 0.0009798470055481828, "loss": 1.5486, "step": 584 }, { "epoch": 0.12, "learning_rate": 0.0009797548727226689, "loss": 1.5742, "step": 585 }, { "epoch": 0.12, "learning_rate": 0.0009796625341282781, "loss": 1.5973, "step": 586 }, { "epoch": 0.12, "learning_rate": 0.0009795699898046148, "loss": 1.5831, "step": 587 }, { "epoch": 0.12, "learning_rate": 0.0009794772397913715, "loss": 1.5577, "step": 588 }, { "epoch": 0.12, "learning_rate": 0.0009793842841283292, "loss": 1.5796, "step": 589 }, { "epoch": 0.12, "learning_rate": 0.000979291122855357, "loss": 1.5329, "step": 590 }, { "epoch": 0.12, "learning_rate": 0.000979197756012412, "loss": 1.5612, "step": 591 }, { "epoch": 0.12, "learning_rate": 0.0009791041836395395, "loss": 1.595, "step": 592 }, { "epoch": 0.12, "learning_rate": 0.0009790104057768737, "loss": 1.575, "step": 593 }, { "epoch": 0.12, "learning_rate": 0.0009789164224646356, "loss": 1.5734, "step": 594 }, { "epoch": 0.12, "learning_rate": 0.0009788222337431354, "loss": 1.584, "step": 595 }, { "epoch": 0.12, "learning_rate": 0.000978727839652771, "loss": 1.5928, "step": 596 }, { "epoch": 0.12, "learning_rate": 0.0009786332402340284, "loss": 1.6086, "step": 597 }, { "epoch": 0.12, "learning_rate": 0.0009785384355274819, "loss": 1.5609, "step": 598 }, { "epoch": 0.12, "learning_rate": 0.000978443425573793, "loss": 1.5385, "step": 599 }, { "epoch": 0.12, "learning_rate": 0.0009783482104137127, "loss": 1.5641, "step": 600 }, { "epoch": 0.12, "learning_rate": 0.0009782527900880785, "loss": 1.5038, "step": 601 }, { "epoch": 0.12, "learning_rate": 0.000978157164637817, "loss": 1.5868, "step": 602 }, { "epoch": 0.12, "learning_rate": 0.0009780613341039421, "loss": 1.5801, "step": 603 }, { "epoch": 0.12, "learning_rate": 0.0009779652985275563, "loss": 1.5794, "step": 604 }, { "epoch": 0.12, "learning_rate": 0.000977869057949849, "loss": 1.5929, "step": 605 }, { "epoch": 0.12, "learning_rate": 0.000977772612412099, "loss": 1.5958, "step": 606 }, { "epoch": 0.12, "learning_rate": 0.0009776759619556715, "loss": 1.6075, "step": 607 }, { "epoch": 0.12, "learning_rate": 0.0009775791066220206, "loss": 1.5543, "step": 608 }, { "epoch": 0.12, "learning_rate": 0.0009774820464526878, "loss": 1.5368, "step": 609 }, { "epoch": 0.12, "learning_rate": 0.000977384781489303, "loss": 1.5872, "step": 610 }, { "epoch": 0.12, "learning_rate": 0.000977287311773583, "loss": 1.5711, "step": 611 }, { "epoch": 0.12, "learning_rate": 0.0009771896373473334, "loss": 1.5424, "step": 612 }, { "epoch": 0.12, "learning_rate": 0.000977091758252447, "loss": 1.6008, "step": 613 }, { "epoch": 0.12, "learning_rate": 0.0009769936745309047, "loss": 1.5966, "step": 614 }, { "epoch": 0.12, "learning_rate": 0.0009768953862247748, "loss": 1.5478, "step": 615 }, { "epoch": 0.12, "learning_rate": 0.0009767968933762138, "loss": 1.5576, "step": 616 }, { "epoch": 0.12, "learning_rate": 0.0009766981960274653, "loss": 1.5403, "step": 617 }, { "epoch": 0.12, "learning_rate": 0.0009765992942208615, "loss": 1.6011, "step": 618 }, { "epoch": 0.13, "learning_rate": 0.0009765001879988213, "loss": 1.6004, "step": 619 }, { "epoch": 0.13, "learning_rate": 0.0009764008774038521, "loss": 1.5474, "step": 620 }, { "epoch": 0.13, "learning_rate": 0.0009763013624785489, "loss": 1.603, "step": 621 }, { "epoch": 0.13, "learning_rate": 0.0009762016432655935, "loss": 1.5821, "step": 622 }, { "epoch": 0.13, "learning_rate": 0.0009761017198077562, "loss": 1.5936, "step": 623 }, { "epoch": 0.13, "learning_rate": 0.0009760015921478944, "loss": 1.5823, "step": 624 }, { "epoch": 0.13, "learning_rate": 0.0009759012603289534, "loss": 1.561, "step": 625 }, { "epoch": 0.13, "learning_rate": 0.000975800724393966, "loss": 1.5832, "step": 626 }, { "epoch": 0.13, "learning_rate": 0.0009756999843860525, "loss": 1.5702, "step": 627 }, { "epoch": 0.13, "learning_rate": 0.0009755990403484205, "loss": 1.57, "step": 628 }, { "epoch": 0.13, "learning_rate": 0.0009754978923243655, "loss": 1.573, "step": 629 }, { "epoch": 0.13, "learning_rate": 0.0009753965403572702, "loss": 1.5224, "step": 630 }, { "epoch": 0.13, "learning_rate": 0.000975294984490605, "loss": 1.5477, "step": 631 }, { "epoch": 0.13, "learning_rate": 0.0009751932247679276, "loss": 1.5391, "step": 632 }, { "epoch": 0.13, "learning_rate": 0.0009750912612328831, "loss": 1.5499, "step": 633 }, { "epoch": 0.13, "learning_rate": 0.0009749890939292041, "loss": 1.5455, "step": 634 }, { "epoch": 0.13, "learning_rate": 0.0009748867229007108, "loss": 1.5425, "step": 635 }, { "epoch": 0.13, "learning_rate": 0.0009747841481913103, "loss": 1.5404, "step": 636 }, { "epoch": 0.13, "learning_rate": 0.0009746813698449973, "loss": 1.615, "step": 637 }, { "epoch": 0.13, "learning_rate": 0.0009745783879058541, "loss": 1.5459, "step": 638 }, { "epoch": 0.13, "learning_rate": 0.0009744752024180499, "loss": 1.5646, "step": 639 }, { "epoch": 0.13, "learning_rate": 0.0009743718134258415, "loss": 1.5297, "step": 640 }, { "epoch": 0.13, "learning_rate": 0.0009742682209735727, "loss": 1.5568, "step": 641 }, { "epoch": 0.13, "learning_rate": 0.0009741644251056751, "loss": 1.6104, "step": 642 }, { "epoch": 0.13, "learning_rate": 0.0009740604258666668, "loss": 1.5378, "step": 643 }, { "epoch": 0.13, "learning_rate": 0.0009739562233011536, "loss": 1.568, "step": 644 }, { "epoch": 0.13, "learning_rate": 0.0009738518174538284, "loss": 1.5814, "step": 645 }, { "epoch": 0.13, "learning_rate": 0.0009737472083694716, "loss": 1.5552, "step": 646 }, { "epoch": 0.13, "learning_rate": 0.0009736423960929501, "loss": 1.5531, "step": 647 }, { "epoch": 0.13, "learning_rate": 0.0009735373806692186, "loss": 1.5356, "step": 648 }, { "epoch": 0.13, "learning_rate": 0.0009734321621433184, "loss": 1.5112, "step": 649 }, { "epoch": 0.13, "learning_rate": 0.0009733267405603784, "loss": 1.5518, "step": 650 }, { "epoch": 0.13, "learning_rate": 0.0009732211159656142, "loss": 1.5558, "step": 651 }, { "epoch": 0.13, "learning_rate": 0.0009731152884043287, "loss": 1.5407, "step": 652 }, { "epoch": 0.13, "learning_rate": 0.0009730092579219118, "loss": 1.5803, "step": 653 }, { "epoch": 0.13, "learning_rate": 0.0009729030245638404, "loss": 1.5468, "step": 654 }, { "epoch": 0.13, "learning_rate": 0.0009727965883756784, "loss": 1.4974, "step": 655 }, { "epoch": 0.13, "learning_rate": 0.0009726899494030768, "loss": 1.5813, "step": 656 }, { "epoch": 0.13, "learning_rate": 0.0009725831076917733, "loss": 1.5422, "step": 657 }, { "epoch": 0.13, "learning_rate": 0.000972476063287593, "loss": 1.5434, "step": 658 }, { "epoch": 0.13, "learning_rate": 0.0009723688162364477, "loss": 1.5686, "step": 659 }, { "epoch": 0.13, "learning_rate": 0.0009722613665843358, "loss": 1.5738, "step": 660 }, { "epoch": 0.13, "learning_rate": 0.0009721537143773433, "loss": 1.6303, "step": 661 }, { "epoch": 0.13, "learning_rate": 0.0009720458596616426, "loss": 1.5926, "step": 662 }, { "epoch": 0.13, "learning_rate": 0.0009719378024834927, "loss": 1.5368, "step": 663 }, { "epoch": 0.13, "learning_rate": 0.0009718295428892402, "loss": 1.5663, "step": 664 }, { "epoch": 0.13, "learning_rate": 0.0009717210809253178, "loss": 1.5809, "step": 665 }, { "epoch": 0.13, "learning_rate": 0.0009716124166382456, "loss": 1.5666, "step": 666 }, { "epoch": 0.13, "learning_rate": 0.0009715035500746298, "loss": 1.565, "step": 667 }, { "epoch": 0.14, "learning_rate": 0.0009713944812811639, "loss": 1.5708, "step": 668 }, { "epoch": 0.14, "learning_rate": 0.000971285210304628, "loss": 1.5799, "step": 669 }, { "epoch": 0.14, "learning_rate": 0.0009711757371918889, "loss": 1.5707, "step": 670 }, { "epoch": 0.14, "learning_rate": 0.0009710660619899, "loss": 1.5847, "step": 671 }, { "epoch": 0.14, "learning_rate": 0.0009709561847457013, "loss": 1.5699, "step": 672 }, { "epoch": 0.14, "learning_rate": 0.0009708461055064202, "loss": 1.5973, "step": 673 }, { "epoch": 0.14, "learning_rate": 0.0009707358243192694, "loss": 1.5755, "step": 674 }, { "epoch": 0.14, "learning_rate": 0.0009706253412315493, "loss": 1.5512, "step": 675 }, { "epoch": 0.14, "learning_rate": 0.0009705146562906467, "loss": 1.538, "step": 676 }, { "epoch": 0.14, "learning_rate": 0.0009704037695440346, "loss": 1.5708, "step": 677 }, { "epoch": 0.14, "learning_rate": 0.0009702926810392729, "loss": 1.5521, "step": 678 }, { "epoch": 0.14, "learning_rate": 0.000970181390824008, "loss": 1.5384, "step": 679 }, { "epoch": 0.14, "learning_rate": 0.0009700698989459726, "loss": 1.5762, "step": 680 }, { "epoch": 0.14, "learning_rate": 0.000969958205452986, "loss": 1.5415, "step": 681 }, { "epoch": 0.14, "learning_rate": 0.0009698463103929542, "loss": 1.569, "step": 682 }, { "epoch": 0.14, "learning_rate": 0.0009697342138138694, "loss": 1.5677, "step": 683 }, { "epoch": 0.14, "learning_rate": 0.0009696219157638101, "loss": 1.5078, "step": 684 }, { "epoch": 0.14, "learning_rate": 0.0009695094162909416, "loss": 1.5129, "step": 685 }, { "epoch": 0.14, "learning_rate": 0.0009693967154435154, "loss": 1.5462, "step": 686 }, { "epoch": 0.14, "learning_rate": 0.0009692838132698692, "loss": 1.5644, "step": 687 }, { "epoch": 0.14, "learning_rate": 0.0009691707098184274, "loss": 1.5391, "step": 688 }, { "epoch": 0.14, "learning_rate": 0.0009690574051377006, "loss": 1.5655, "step": 689 }, { "epoch": 0.14, "learning_rate": 0.0009689438992762854, "loss": 1.5744, "step": 690 }, { "epoch": 0.14, "learning_rate": 0.0009688301922828649, "loss": 1.6157, "step": 691 }, { "epoch": 0.14, "learning_rate": 0.0009687162842062089, "loss": 1.5501, "step": 692 }, { "epoch": 0.14, "learning_rate": 0.0009686021750951728, "loss": 1.5131, "step": 693 }, { "epoch": 0.14, "learning_rate": 0.0009684878649986983, "loss": 1.5717, "step": 694 }, { "epoch": 0.14, "learning_rate": 0.0009683733539658139, "loss": 1.5767, "step": 695 }, { "epoch": 0.14, "learning_rate": 0.0009682586420456336, "loss": 1.549, "step": 696 }, { "epoch": 0.14, "learning_rate": 0.0009681437292873578, "loss": 1.5036, "step": 697 }, { "epoch": 0.14, "learning_rate": 0.0009680286157402732, "loss": 1.5856, "step": 698 }, { "epoch": 0.14, "learning_rate": 0.0009679133014537524, "loss": 1.5627, "step": 699 }, { "epoch": 0.14, "learning_rate": 0.0009677977864772543, "loss": 1.5764, "step": 700 }, { "epoch": 0.14, "learning_rate": 0.0009676820708603235, "loss": 1.5514, "step": 701 }, { "epoch": 0.14, "learning_rate": 0.0009675661546525911, "loss": 1.5797, "step": 702 }, { "epoch": 0.14, "learning_rate": 0.000967450037903774, "loss": 1.5418, "step": 703 }, { "epoch": 0.14, "learning_rate": 0.0009673337206636752, "loss": 1.552, "step": 704 }, { "epoch": 0.14, "learning_rate": 0.0009672172029821835, "loss": 1.5953, "step": 705 }, { "epoch": 0.14, "learning_rate": 0.0009671004849092741, "loss": 1.5346, "step": 706 }, { "epoch": 0.14, "learning_rate": 0.0009669835664950077, "loss": 1.5265, "step": 707 }, { "epoch": 0.14, "learning_rate": 0.0009668664477895311, "loss": 1.5892, "step": 708 }, { "epoch": 0.14, "learning_rate": 0.000966749128843077, "loss": 1.5149, "step": 709 }, { "epoch": 0.14, "learning_rate": 0.000966631609705964, "loss": 1.5866, "step": 710 }, { "epoch": 0.14, "learning_rate": 0.0009665138904285966, "loss": 1.5777, "step": 711 }, { "epoch": 0.14, "learning_rate": 0.0009663959710614652, "loss": 1.529, "step": 712 }, { "epoch": 0.14, "learning_rate": 0.0009662778516551455, "loss": 1.5282, "step": 713 }, { "epoch": 0.14, "learning_rate": 0.0009661595322602997, "loss": 1.6049, "step": 714 }, { "epoch": 0.14, "learning_rate": 0.0009660410129276756, "loss": 1.5273, "step": 715 }, { "epoch": 0.14, "learning_rate": 0.0009659222937081065, "loss": 1.5461, "step": 716 }, { "epoch": 0.14, "learning_rate": 0.0009658033746525115, "loss": 1.5801, "step": 717 }, { "epoch": 0.15, "learning_rate": 0.0009656842558118955, "loss": 1.5669, "step": 718 }, { "epoch": 0.15, "learning_rate": 0.0009655649372373491, "loss": 1.5911, "step": 719 }, { "epoch": 0.15, "learning_rate": 0.0009654454189800486, "loss": 1.5728, "step": 720 }, { "epoch": 0.15, "learning_rate": 0.0009653257010912559, "loss": 1.5707, "step": 721 }, { "epoch": 0.15, "learning_rate": 0.0009652057836223183, "loss": 1.5389, "step": 722 }, { "epoch": 0.15, "learning_rate": 0.0009650856666246693, "loss": 1.5316, "step": 723 }, { "epoch": 0.15, "learning_rate": 0.000964965350149827, "loss": 1.5945, "step": 724 }, { "epoch": 0.15, "learning_rate": 0.0009648448342493962, "loss": 1.5242, "step": 725 }, { "epoch": 0.15, "learning_rate": 0.0009647241189750664, "loss": 1.5708, "step": 726 }, { "epoch": 0.15, "learning_rate": 0.0009646032043786128, "loss": 1.5259, "step": 727 }, { "epoch": 0.15, "learning_rate": 0.0009644820905118965, "loss": 1.5882, "step": 728 }, { "epoch": 0.15, "learning_rate": 0.0009643607774268635, "loss": 1.5437, "step": 729 }, { "epoch": 0.15, "learning_rate": 0.0009642392651755456, "loss": 1.5783, "step": 730 }, { "epoch": 0.15, "learning_rate": 0.0009641175538100597, "loss": 1.5253, "step": 731 }, { "epoch": 0.15, "learning_rate": 0.0009639956433826087, "loss": 1.5335, "step": 732 }, { "epoch": 0.15, "learning_rate": 0.00096387353394548, "loss": 1.57, "step": 733 }, { "epoch": 0.15, "learning_rate": 0.0009637512255510475, "loss": 1.5279, "step": 734 }, { "epoch": 0.15, "learning_rate": 0.000963628718251769, "loss": 1.5502, "step": 735 }, { "epoch": 0.15, "learning_rate": 0.0009635060121001891, "loss": 1.5993, "step": 736 }, { "epoch": 0.15, "learning_rate": 0.0009633831071489365, "loss": 1.5485, "step": 737 }, { "epoch": 0.15, "learning_rate": 0.0009632600034507259, "loss": 1.5504, "step": 738 }, { "epoch": 0.15, "learning_rate": 0.0009631367010583569, "loss": 1.5078, "step": 739 }, { "epoch": 0.15, "learning_rate": 0.0009630132000247144, "loss": 1.5566, "step": 740 }, { "epoch": 0.15, "learning_rate": 0.0009628895004027687, "loss": 1.5195, "step": 741 }, { "epoch": 0.15, "learning_rate": 0.0009627656022455748, "loss": 1.5658, "step": 742 }, { "epoch": 0.15, "learning_rate": 0.0009626415056062732, "loss": 1.5764, "step": 743 }, { "epoch": 0.15, "learning_rate": 0.0009625172105380898, "loss": 1.5726, "step": 744 }, { "epoch": 0.15, "learning_rate": 0.0009623927170943347, "loss": 1.5362, "step": 745 }, { "epoch": 0.15, "learning_rate": 0.0009622680253284042, "loss": 1.5629, "step": 746 }, { "epoch": 0.15, "learning_rate": 0.0009621431352937788, "loss": 1.5682, "step": 747 }, { "epoch": 0.15, "learning_rate": 0.0009620180470440245, "loss": 1.5077, "step": 748 }, { "epoch": 0.15, "learning_rate": 0.0009618927606327922, "loss": 1.5416, "step": 749 }, { "epoch": 0.15, "learning_rate": 0.0009617672761138177, "loss": 1.5608, "step": 750 }, { "epoch": 0.15, "learning_rate": 0.0009616415935409218, "loss": 1.5619, "step": 751 }, { "epoch": 0.15, "learning_rate": 0.0009615157129680102, "loss": 1.5367, "step": 752 }, { "epoch": 0.15, "learning_rate": 0.0009613896344490738, "loss": 1.5464, "step": 753 }, { "epoch": 0.15, "learning_rate": 0.0009612633580381883, "loss": 1.5494, "step": 754 }, { "epoch": 0.15, "learning_rate": 0.0009611368837895138, "loss": 1.5442, "step": 755 }, { "epoch": 0.15, "learning_rate": 0.0009610102117572958, "loss": 1.542, "step": 756 }, { "epoch": 0.15, "learning_rate": 0.0009608833419958647, "loss": 1.5348, "step": 757 }, { "epoch": 0.15, "learning_rate": 0.0009607562745596352, "loss": 1.5689, "step": 758 }, { "epoch": 0.15, "learning_rate": 0.0009606290095031071, "loss": 1.5129, "step": 759 }, { "epoch": 0.15, "learning_rate": 0.0009605015468808651, "loss": 1.5699, "step": 760 }, { "epoch": 0.15, "learning_rate": 0.0009603738867475783, "loss": 1.5389, "step": 761 }, { "epoch": 0.15, "learning_rate": 0.0009602460291580007, "loss": 1.5776, "step": 762 }, { "epoch": 0.15, "learning_rate": 0.0009601179741669707, "loss": 1.5828, "step": 763 }, { "epoch": 0.15, "learning_rate": 0.0009599897218294122, "loss": 1.5114, "step": 764 }, { "epoch": 0.15, "learning_rate": 0.0009598612722003326, "loss": 1.6012, "step": 765 }, { "epoch": 0.15, "learning_rate": 0.0009597326253348248, "loss": 1.5492, "step": 766 }, { "epoch": 0.16, "learning_rate": 0.0009596037812880658, "loss": 1.618, "step": 767 }, { "epoch": 0.16, "learning_rate": 0.0009594747401153174, "loss": 1.5399, "step": 768 }, { "epoch": 0.16, "learning_rate": 0.0009593455018719262, "loss": 1.561, "step": 769 }, { "epoch": 0.16, "learning_rate": 0.0009592160666133225, "loss": 1.5536, "step": 770 }, { "epoch": 0.16, "learning_rate": 0.0009590864343950221, "loss": 1.5404, "step": 771 }, { "epoch": 0.16, "learning_rate": 0.0009589566052726244, "loss": 1.523, "step": 772 }, { "epoch": 0.16, "learning_rate": 0.0009588265793018141, "loss": 1.5126, "step": 773 }, { "epoch": 0.16, "learning_rate": 0.0009586963565383594, "loss": 1.5213, "step": 774 }, { "epoch": 0.16, "learning_rate": 0.0009585659370381136, "loss": 1.5537, "step": 775 }, { "epoch": 0.16, "learning_rate": 0.0009584353208570145, "loss": 1.5198, "step": 776 }, { "epoch": 0.16, "learning_rate": 0.0009583045080510833, "loss": 1.5939, "step": 777 }, { "epoch": 0.16, "learning_rate": 0.0009581734986764265, "loss": 1.5222, "step": 778 }, { "epoch": 0.16, "learning_rate": 0.0009580422927892347, "loss": 1.5258, "step": 779 }, { "epoch": 0.16, "learning_rate": 0.0009579108904457825, "loss": 1.5692, "step": 780 }, { "epoch": 0.16, "learning_rate": 0.000957779291702429, "loss": 1.5466, "step": 781 }, { "epoch": 0.16, "learning_rate": 0.0009576474966156173, "loss": 1.5396, "step": 782 }, { "epoch": 0.16, "learning_rate": 0.000957515505241875, "loss": 1.4974, "step": 783 }, { "epoch": 0.16, "learning_rate": 0.0009573833176378135, "loss": 1.5288, "step": 784 }, { "epoch": 0.16, "learning_rate": 0.000957250933860129, "loss": 1.5423, "step": 785 }, { "epoch": 0.16, "learning_rate": 0.000957118353965601, "loss": 1.5318, "step": 786 }, { "epoch": 0.16, "learning_rate": 0.0009569855780110941, "loss": 1.5417, "step": 787 }, { "epoch": 0.16, "learning_rate": 0.0009568526060535562, "loss": 1.5157, "step": 788 }, { "epoch": 0.16, "learning_rate": 0.0009567194381500195, "loss": 1.5141, "step": 789 }, { "epoch": 0.16, "learning_rate": 0.0009565860743576002, "loss": 1.5244, "step": 790 }, { "epoch": 0.16, "learning_rate": 0.0009564525147334989, "loss": 1.5689, "step": 791 }, { "epoch": 0.16, "learning_rate": 0.0009563187593349996, "loss": 1.5691, "step": 792 }, { "epoch": 0.16, "learning_rate": 0.0009561848082194706, "loss": 1.5323, "step": 793 }, { "epoch": 0.16, "learning_rate": 0.0009560506614443643, "loss": 1.532, "step": 794 }, { "epoch": 0.16, "learning_rate": 0.0009559163190672165, "loss": 1.5579, "step": 795 }, { "epoch": 0.16, "learning_rate": 0.0009557817811456472, "loss": 1.5324, "step": 796 }, { "epoch": 0.16, "learning_rate": 0.0009556470477373606, "loss": 1.5552, "step": 797 }, { "epoch": 0.16, "learning_rate": 0.0009555121189001443, "loss": 1.5746, "step": 798 }, { "epoch": 0.16, "learning_rate": 0.0009553769946918697, "loss": 1.5635, "step": 799 }, { "epoch": 0.16, "learning_rate": 0.0009552416751704923, "loss": 1.5622, "step": 800 }, { "epoch": 0.16, "learning_rate": 0.0009551061603940512, "loss": 1.5574, "step": 801 }, { "epoch": 0.16, "learning_rate": 0.000954970450420669, "loss": 1.5659, "step": 802 }, { "epoch": 0.16, "learning_rate": 0.0009548345453085528, "loss": 1.4997, "step": 803 }, { "epoch": 0.16, "learning_rate": 0.0009546984451159924, "loss": 1.5496, "step": 804 }, { "epoch": 0.16, "learning_rate": 0.0009545621499013619, "loss": 1.5315, "step": 805 }, { "epoch": 0.16, "learning_rate": 0.0009544256597231189, "loss": 1.5356, "step": 806 }, { "epoch": 0.16, "learning_rate": 0.0009542889746398045, "loss": 1.519, "step": 807 }, { "epoch": 0.16, "learning_rate": 0.0009541520947100437, "loss": 1.5459, "step": 808 }, { "epoch": 0.16, "learning_rate": 0.0009540150199925448, "loss": 1.5235, "step": 809 }, { "epoch": 0.16, "learning_rate": 0.0009538777505460996, "loss": 1.5602, "step": 810 }, { "epoch": 0.16, "learning_rate": 0.0009537402864295836, "loss": 1.5278, "step": 811 }, { "epoch": 0.16, "learning_rate": 0.0009536026277019561, "loss": 1.551, "step": 812 }, { "epoch": 0.16, "learning_rate": 0.0009534647744222589, "loss": 1.5249, "step": 813 }, { "epoch": 0.16, "learning_rate": 0.0009533267266496182, "loss": 1.5253, "step": 814 }, { "epoch": 0.16, "learning_rate": 0.0009531884844432432, "loss": 1.559, "step": 815 }, { "epoch": 0.16, "learning_rate": 0.0009530500478624267, "loss": 1.5272, "step": 816 }, { "epoch": 0.17, "learning_rate": 0.0009529114169665443, "loss": 1.5348, "step": 817 }, { "epoch": 0.17, "learning_rate": 0.0009527725918150558, "loss": 1.5717, "step": 818 }, { "epoch": 0.17, "learning_rate": 0.0009526335724675039, "loss": 1.6046, "step": 819 }, { "epoch": 0.17, "learning_rate": 0.0009524943589835144, "loss": 1.5098, "step": 820 }, { "epoch": 0.17, "learning_rate": 0.0009523549514227965, "loss": 1.5635, "step": 821 }, { "epoch": 0.17, "learning_rate": 0.0009522153498451427, "loss": 1.5201, "step": 822 }, { "epoch": 0.17, "learning_rate": 0.0009520755543104289, "loss": 1.5486, "step": 823 }, { "epoch": 0.17, "learning_rate": 0.0009519355648786138, "loss": 1.551, "step": 824 }, { "epoch": 0.17, "learning_rate": 0.0009517953816097395, "loss": 1.5375, "step": 825 }, { "epoch": 0.17, "learning_rate": 0.0009516550045639314, "loss": 1.536, "step": 826 }, { "epoch": 0.17, "learning_rate": 0.0009515144338013975, "loss": 1.5212, "step": 827 }, { "epoch": 0.17, "learning_rate": 0.0009513736693824293, "loss": 1.5348, "step": 828 }, { "epoch": 0.17, "learning_rate": 0.0009512327113674012, "loss": 1.5541, "step": 829 }, { "epoch": 0.17, "learning_rate": 0.0009510915598167709, "loss": 1.5304, "step": 830 }, { "epoch": 0.17, "learning_rate": 0.0009509502147910785, "loss": 1.5625, "step": 831 }, { "epoch": 0.17, "learning_rate": 0.0009508086763509478, "loss": 1.5453, "step": 832 }, { "epoch": 0.17, "learning_rate": 0.0009506669445570853, "loss": 1.5342, "step": 833 }, { "epoch": 0.17, "learning_rate": 0.0009505250194702799, "loss": 1.5314, "step": 834 }, { "epoch": 0.17, "learning_rate": 0.0009503829011514044, "loss": 1.5004, "step": 835 }, { "epoch": 0.17, "learning_rate": 0.0009502405896614136, "loss": 1.5197, "step": 836 }, { "epoch": 0.17, "learning_rate": 0.0009500980850613457, "loss": 1.5601, "step": 837 }, { "epoch": 0.17, "learning_rate": 0.0009499553874123212, "loss": 1.5302, "step": 838 }, { "epoch": 0.17, "learning_rate": 0.0009498124967755442, "loss": 1.5447, "step": 839 }, { "epoch": 0.17, "learning_rate": 0.0009496694132123007, "loss": 1.5342, "step": 840 }, { "epoch": 0.17, "learning_rate": 0.00094952613678396, "loss": 1.6032, "step": 841 }, { "epoch": 0.17, "learning_rate": 0.0009493826675519739, "loss": 1.5281, "step": 842 }, { "epoch": 0.17, "learning_rate": 0.000949239005577877, "loss": 1.5791, "step": 843 }, { "epoch": 0.17, "learning_rate": 0.0009490951509232866, "loss": 1.5682, "step": 844 }, { "epoch": 0.17, "learning_rate": 0.0009489511036499024, "loss": 1.5534, "step": 845 }, { "epoch": 0.17, "learning_rate": 0.0009488068638195071, "loss": 1.5709, "step": 846 }, { "epoch": 0.17, "learning_rate": 0.0009486624314939654, "loss": 1.5293, "step": 847 }, { "epoch": 0.17, "learning_rate": 0.0009485178067352253, "loss": 1.558, "step": 848 }, { "epoch": 0.17, "learning_rate": 0.0009483729896053168, "loss": 1.5167, "step": 849 }, { "epoch": 0.17, "learning_rate": 0.0009482279801663525, "loss": 1.5108, "step": 850 }, { "epoch": 0.17, "learning_rate": 0.0009480827784805278, "loss": 1.4925, "step": 851 }, { "epoch": 0.17, "learning_rate": 0.0009479373846101202, "loss": 1.5475, "step": 852 }, { "epoch": 0.17, "learning_rate": 0.0009477917986174895, "loss": 1.5383, "step": 853 }, { "epoch": 0.17, "learning_rate": 0.0009476460205650785, "loss": 1.5266, "step": 854 }, { "epoch": 0.17, "learning_rate": 0.0009475000505154118, "loss": 1.5657, "step": 855 }, { "epoch": 0.17, "learning_rate": 0.0009473538885310967, "loss": 1.5758, "step": 856 }, { "epoch": 0.17, "learning_rate": 0.0009472075346748226, "loss": 1.5779, "step": 857 }, { "epoch": 0.17, "learning_rate": 0.0009470609890093613, "loss": 1.4989, "step": 858 }, { "epoch": 0.17, "learning_rate": 0.0009469142515975669, "loss": 1.5669, "step": 859 }, { "epoch": 0.17, "learning_rate": 0.0009467673225023756, "loss": 1.5268, "step": 860 }, { "epoch": 0.17, "learning_rate": 0.0009466202017868058, "loss": 1.5677, "step": 861 }, { "epoch": 0.17, "learning_rate": 0.0009464728895139585, "loss": 1.5386, "step": 862 }, { "epoch": 0.17, "learning_rate": 0.0009463253857470164, "loss": 1.5411, "step": 863 }, { "epoch": 0.17, "learning_rate": 0.0009461776905492445, "loss": 1.5525, "step": 864 }, { "epoch": 0.17, "learning_rate": 0.0009460298039839899, "loss": 1.5352, "step": 865 }, { "epoch": 0.18, "learning_rate": 0.0009458817261146816, "loss": 1.5756, "step": 866 }, { "epoch": 0.18, "learning_rate": 0.000945733457004831, "loss": 1.5498, "step": 867 }, { "epoch": 0.18, "learning_rate": 0.0009455849967180313, "loss": 1.5259, "step": 868 }, { "epoch": 0.18, "learning_rate": 0.0009454363453179577, "loss": 1.5603, "step": 869 }, { "epoch": 0.18, "learning_rate": 0.0009452875028683677, "loss": 1.5127, "step": 870 }, { "epoch": 0.18, "learning_rate": 0.0009451384694331, "loss": 1.5346, "step": 871 }, { "epoch": 0.18, "learning_rate": 0.0009449892450760758, "loss": 1.5633, "step": 872 }, { "epoch": 0.18, "learning_rate": 0.0009448398298612981, "loss": 1.5329, "step": 873 }, { "epoch": 0.18, "learning_rate": 0.0009446902238528518, "loss": 1.5565, "step": 874 }, { "epoch": 0.18, "learning_rate": 0.0009445404271149037, "loss": 1.5164, "step": 875 }, { "epoch": 0.18, "learning_rate": 0.0009443904397117018, "loss": 1.553, "step": 876 }, { "epoch": 0.18, "learning_rate": 0.0009442402617075765, "loss": 1.4972, "step": 877 }, { "epoch": 0.18, "learning_rate": 0.00094408989316694, "loss": 1.5198, "step": 878 }, { "epoch": 0.18, "learning_rate": 0.0009439393341542857, "loss": 1.5494, "step": 879 }, { "epoch": 0.18, "learning_rate": 0.000943788584734189, "loss": 1.5552, "step": 880 }, { "epoch": 0.18, "learning_rate": 0.0009436376449713073, "loss": 1.5868, "step": 881 }, { "epoch": 0.18, "learning_rate": 0.0009434865149303788, "loss": 1.5355, "step": 882 }, { "epoch": 0.18, "learning_rate": 0.0009433351946762243, "loss": 1.5336, "step": 883 }, { "epoch": 0.18, "learning_rate": 0.0009431836842737455, "loss": 1.5511, "step": 884 }, { "epoch": 0.18, "learning_rate": 0.0009430319837879258, "loss": 1.5668, "step": 885 }, { "epoch": 0.18, "learning_rate": 0.0009428800932838301, "loss": 1.5692, "step": 886 }, { "epoch": 0.18, "learning_rate": 0.0009427280128266049, "loss": 1.5522, "step": 887 }, { "epoch": 0.18, "learning_rate": 0.0009425757424814784, "loss": 1.592, "step": 888 }, { "epoch": 0.18, "learning_rate": 0.0009424232823137598, "loss": 1.5375, "step": 889 }, { "epoch": 0.18, "learning_rate": 0.0009422706323888397, "loss": 1.5428, "step": 890 }, { "epoch": 0.18, "learning_rate": 0.0009421177927721907, "loss": 1.5227, "step": 891 }, { "epoch": 0.18, "learning_rate": 0.000941964763529366, "loss": 1.5331, "step": 892 }, { "epoch": 0.18, "learning_rate": 0.0009418115447260007, "loss": 1.5362, "step": 893 }, { "epoch": 0.18, "learning_rate": 0.0009416581364278109, "loss": 1.5178, "step": 894 }, { "epoch": 0.18, "learning_rate": 0.0009415045387005942, "loss": 1.5331, "step": 895 }, { "epoch": 0.18, "learning_rate": 0.0009413507516102291, "loss": 1.5055, "step": 896 }, { "epoch": 0.18, "learning_rate": 0.0009411967752226757, "loss": 1.5551, "step": 897 }, { "epoch": 0.18, "learning_rate": 0.000941042609603975, "loss": 1.531, "step": 898 }, { "epoch": 0.18, "learning_rate": 0.0009408882548202493, "loss": 1.5015, "step": 899 }, { "epoch": 0.18, "learning_rate": 0.000940733710937702, "loss": 1.4979, "step": 900 }, { "epoch": 0.18, "learning_rate": 0.0009405789780226177, "loss": 1.5276, "step": 901 }, { "epoch": 0.18, "learning_rate": 0.000940424056141362, "loss": 1.5214, "step": 902 }, { "epoch": 0.18, "learning_rate": 0.0009402689453603814, "loss": 1.577, "step": 903 }, { "epoch": 0.18, "learning_rate": 0.0009401136457462037, "loss": 1.5292, "step": 904 }, { "epoch": 0.18, "learning_rate": 0.0009399581573654376, "loss": 1.5198, "step": 905 }, { "epoch": 0.18, "learning_rate": 0.0009398024802847726, "loss": 1.5142, "step": 906 }, { "epoch": 0.18, "learning_rate": 0.0009396466145709792, "loss": 1.5395, "step": 907 }, { "epoch": 0.18, "learning_rate": 0.000939490560290909, "loss": 1.5181, "step": 908 }, { "epoch": 0.18, "learning_rate": 0.0009393343175114943, "loss": 1.5515, "step": 909 }, { "epoch": 0.18, "learning_rate": 0.0009391778862997482, "loss": 1.5179, "step": 910 }, { "epoch": 0.18, "learning_rate": 0.0009390212667227649, "loss": 1.5338, "step": 911 }, { "epoch": 0.18, "learning_rate": 0.000938864458847719, "loss": 1.5237, "step": 912 }, { "epoch": 0.18, "learning_rate": 0.0009387074627418661, "loss": 1.5486, "step": 913 }, { "epoch": 0.18, "learning_rate": 0.0009385502784725425, "loss": 1.5085, "step": 914 }, { "epoch": 0.18, "learning_rate": 0.0009383929061071652, "loss": 1.5723, "step": 915 }, { "epoch": 0.19, "learning_rate": 0.0009382353457132317, "loss": 1.5354, "step": 916 }, { "epoch": 0.19, "learning_rate": 0.0009380775973583208, "loss": 1.5494, "step": 917 }, { "epoch": 0.19, "learning_rate": 0.000937919661110091, "loss": 1.5152, "step": 918 }, { "epoch": 0.19, "learning_rate": 0.0009377615370362818, "loss": 1.5371, "step": 919 }, { "epoch": 0.19, "learning_rate": 0.0009376032252047136, "loss": 1.5256, "step": 920 }, { "epoch": 0.19, "learning_rate": 0.0009374447256832867, "loss": 1.5462, "step": 921 }, { "epoch": 0.19, "learning_rate": 0.0009372860385399824, "loss": 1.5325, "step": 922 }, { "epoch": 0.19, "learning_rate": 0.000937127163842862, "loss": 1.5975, "step": 923 }, { "epoch": 0.19, "learning_rate": 0.000936968101660068, "loss": 1.5271, "step": 924 }, { "epoch": 0.19, "learning_rate": 0.0009368088520598223, "loss": 1.5213, "step": 925 }, { "epoch": 0.19, "learning_rate": 0.0009366494151104284, "loss": 1.4945, "step": 926 }, { "epoch": 0.19, "learning_rate": 0.0009364897908802687, "loss": 1.5293, "step": 927 }, { "epoch": 0.19, "learning_rate": 0.0009363299794378072, "loss": 1.541, "step": 928 }, { "epoch": 0.19, "learning_rate": 0.0009361699808515876, "loss": 1.5564, "step": 929 }, { "epoch": 0.19, "learning_rate": 0.0009360097951902338, "loss": 1.5465, "step": 930 }, { "epoch": 0.19, "learning_rate": 0.0009358494225224503, "loss": 1.5428, "step": 931 }, { "epoch": 0.19, "learning_rate": 0.0009356888629170215, "loss": 1.5531, "step": 932 }, { "epoch": 0.19, "learning_rate": 0.0009355281164428122, "loss": 1.5314, "step": 933 }, { "epoch": 0.19, "learning_rate": 0.0009353671831687671, "loss": 1.4989, "step": 934 }, { "epoch": 0.19, "learning_rate": 0.0009352060631639113, "loss": 1.522, "step": 935 }, { "epoch": 0.19, "learning_rate": 0.0009350447564973498, "loss": 1.5404, "step": 936 }, { "epoch": 0.19, "learning_rate": 0.0009348832632382675, "loss": 1.5624, "step": 937 }, { "epoch": 0.19, "learning_rate": 0.00093472158345593, "loss": 1.5183, "step": 938 }, { "epoch": 0.19, "learning_rate": 0.0009345597172196818, "loss": 1.5245, "step": 939 }, { "epoch": 0.19, "learning_rate": 0.0009343976645989486, "loss": 1.5531, "step": 940 }, { "epoch": 0.19, "learning_rate": 0.0009342354256632352, "loss": 1.5665, "step": 941 }, { "epoch": 0.19, "learning_rate": 0.0009340730004821265, "loss": 1.5169, "step": 942 }, { "epoch": 0.19, "learning_rate": 0.0009339103891252874, "loss": 1.5621, "step": 943 }, { "epoch": 0.19, "learning_rate": 0.0009337475916624626, "loss": 1.5286, "step": 944 }, { "epoch": 0.19, "learning_rate": 0.0009335846081634766, "loss": 1.5452, "step": 945 }, { "epoch": 0.19, "learning_rate": 0.0009334214386982337, "loss": 1.5513, "step": 946 }, { "epoch": 0.19, "learning_rate": 0.000933258083336718, "loss": 1.5649, "step": 947 }, { "epoch": 0.19, "learning_rate": 0.0009330945421489933, "loss": 1.556, "step": 948 }, { "epoch": 0.19, "learning_rate": 0.0009329308152052031, "loss": 1.5373, "step": 949 }, { "epoch": 0.19, "learning_rate": 0.0009327669025755706, "loss": 1.5732, "step": 950 }, { "epoch": 0.19, "learning_rate": 0.0009326028043303984, "loss": 1.562, "step": 951 }, { "epoch": 0.19, "learning_rate": 0.0009324385205400694, "loss": 1.5338, "step": 952 }, { "epoch": 0.19, "learning_rate": 0.0009322740512750452, "loss": 1.5243, "step": 953 }, { "epoch": 0.19, "learning_rate": 0.0009321093966058674, "loss": 1.4532, "step": 954 }, { "epoch": 0.19, "learning_rate": 0.000931944556603157, "loss": 1.5375, "step": 955 }, { "epoch": 0.19, "learning_rate": 0.000931779531337615, "loss": 1.4972, "step": 956 }, { "epoch": 0.19, "learning_rate": 0.000931614320880021, "loss": 1.5477, "step": 957 }, { "epoch": 0.19, "learning_rate": 0.0009314489253012346, "loss": 1.5308, "step": 958 }, { "epoch": 0.19, "learning_rate": 0.0009312833446721946, "loss": 1.5061, "step": 959 }, { "epoch": 0.19, "learning_rate": 0.0009311175790639194, "loss": 1.5149, "step": 960 }, { "epoch": 0.19, "learning_rate": 0.0009309516285475063, "loss": 1.526, "step": 961 }, { "epoch": 0.19, "learning_rate": 0.0009307854931941324, "loss": 1.5362, "step": 962 }, { "epoch": 0.19, "learning_rate": 0.0009306191730750538, "loss": 1.5401, "step": 963 }, { "epoch": 0.19, "learning_rate": 0.0009304526682616059, "loss": 1.5019, "step": 964 }, { "epoch": 0.2, "learning_rate": 0.0009302859788252032, "loss": 1.4978, "step": 965 }, { "epoch": 0.2, "learning_rate": 0.0009301191048373396, "loss": 1.5329, "step": 966 }, { "epoch": 0.2, "learning_rate": 0.0009299520463695881, "loss": 1.5577, "step": 967 }, { "epoch": 0.2, "learning_rate": 0.0009297848034936007, "loss": 1.5639, "step": 968 }, { "epoch": 0.2, "learning_rate": 0.0009296173762811085, "loss": 1.549, "step": 969 }, { "epoch": 0.2, "learning_rate": 0.0009294497648039217, "loss": 1.5036, "step": 970 }, { "epoch": 0.2, "learning_rate": 0.0009292819691339298, "loss": 1.4939, "step": 971 }, { "epoch": 0.2, "learning_rate": 0.0009291139893431011, "loss": 1.5092, "step": 972 }, { "epoch": 0.2, "learning_rate": 0.0009289458255034824, "loss": 1.562, "step": 973 }, { "epoch": 0.2, "learning_rate": 0.0009287774776872003, "loss": 1.5209, "step": 974 }, { "epoch": 0.2, "learning_rate": 0.0009286089459664597, "loss": 1.5563, "step": 975 }, { "epoch": 0.2, "learning_rate": 0.0009284402304135447, "loss": 1.5455, "step": 976 }, { "epoch": 0.2, "learning_rate": 0.0009282713311008179, "loss": 1.5584, "step": 977 }, { "epoch": 0.2, "learning_rate": 0.000928102248100721, "loss": 1.4955, "step": 978 }, { "epoch": 0.2, "learning_rate": 0.0009279329814857746, "loss": 1.5771, "step": 979 }, { "epoch": 0.2, "learning_rate": 0.0009277635313285777, "loss": 1.5736, "step": 980 }, { "epoch": 0.2, "learning_rate": 0.0009275938977018082, "loss": 1.4925, "step": 981 }, { "epoch": 0.2, "learning_rate": 0.0009274240806782226, "loss": 1.5316, "step": 982 }, { "epoch": 0.2, "learning_rate": 0.0009272540803306562, "loss": 1.5348, "step": 983 }, { "epoch": 0.2, "learning_rate": 0.0009270838967320227, "loss": 1.5583, "step": 984 }, { "epoch": 0.2, "learning_rate": 0.0009269135299553149, "loss": 1.5195, "step": 985 }, { "epoch": 0.2, "learning_rate": 0.0009267429800736036, "loss": 1.5298, "step": 986 }, { "epoch": 0.2, "learning_rate": 0.0009265722471600384, "loss": 1.5507, "step": 987 }, { "epoch": 0.2, "learning_rate": 0.0009264013312878473, "loss": 1.5, "step": 988 }, { "epoch": 0.2, "learning_rate": 0.0009262302325303369, "loss": 1.5177, "step": 989 }, { "epoch": 0.2, "learning_rate": 0.0009260589509608923, "loss": 1.5127, "step": 990 }, { "epoch": 0.2, "learning_rate": 0.0009258874866529767, "loss": 1.5096, "step": 991 }, { "epoch": 0.2, "learning_rate": 0.0009257158396801319, "loss": 1.4997, "step": 992 }, { "epoch": 0.2, "learning_rate": 0.000925544010115978, "loss": 1.5351, "step": 993 }, { "epoch": 0.2, "learning_rate": 0.0009253719980342135, "loss": 1.5372, "step": 994 }, { "epoch": 0.2, "learning_rate": 0.000925199803508615, "loss": 1.5054, "step": 995 }, { "epoch": 0.2, "learning_rate": 0.0009250274266130375, "loss": 1.5297, "step": 996 }, { "epoch": 0.2, "learning_rate": 0.0009248548674214141, "loss": 1.5119, "step": 997 }, { "epoch": 0.2, "learning_rate": 0.0009246821260077564, "loss": 1.5224, "step": 998 }, { "epoch": 0.2, "learning_rate": 0.0009245092024461537, "loss": 1.5088, "step": 999 }, { "epoch": 0.2, "learning_rate": 0.0009243360968107737, "loss": 1.5051, "step": 1000 }, { "epoch": 0.2, "learning_rate": 0.000924162809175862, "loss": 1.5495, "step": 1001 }, { "epoch": 0.2, "learning_rate": 0.0009239893396157426, "loss": 1.5672, "step": 1002 }, { "epoch": 0.2, "learning_rate": 0.0009238156882048173, "loss": 1.508, "step": 1003 }, { "epoch": 0.2, "learning_rate": 0.0009236418550175659, "loss": 1.5187, "step": 1004 }, { "epoch": 0.2, "learning_rate": 0.0009234678401285461, "loss": 1.5318, "step": 1005 }, { "epoch": 0.2, "learning_rate": 0.0009232936436123936, "loss": 1.5384, "step": 1006 }, { "epoch": 0.2, "learning_rate": 0.0009231192655438221, "loss": 1.5119, "step": 1007 }, { "epoch": 0.2, "learning_rate": 0.0009229447059976231, "loss": 1.5641, "step": 1008 }, { "epoch": 0.2, "learning_rate": 0.0009227699650486657, "loss": 1.5305, "step": 1009 }, { "epoch": 0.2, "learning_rate": 0.0009225950427718975, "loss": 1.5143, "step": 1010 }, { "epoch": 0.2, "learning_rate": 0.0009224199392423429, "loss": 1.5335, "step": 1011 }, { "epoch": 0.2, "learning_rate": 0.0009222446545351047, "loss": 1.5581, "step": 1012 }, { "epoch": 0.2, "learning_rate": 0.0009220691887253634, "loss": 1.5739, "step": 1013 }, { "epoch": 0.2, "learning_rate": 0.000921893541888377, "loss": 1.4851, "step": 1014 }, { "epoch": 0.21, "learning_rate": 0.0009217177140994809, "loss": 1.5038, "step": 1015 }, { "epoch": 0.21, "learning_rate": 0.0009215417054340886, "loss": 1.5111, "step": 1016 }, { "epoch": 0.21, "learning_rate": 0.000921365515967691, "loss": 1.4995, "step": 1017 }, { "epoch": 0.21, "learning_rate": 0.0009211891457758563, "loss": 1.5229, "step": 1018 }, { "epoch": 0.21, "learning_rate": 0.0009210125949342306, "loss": 1.5188, "step": 1019 }, { "epoch": 0.21, "learning_rate": 0.0009208358635185372, "loss": 1.5311, "step": 1020 }, { "epoch": 0.21, "learning_rate": 0.0009206589516045769, "loss": 1.5514, "step": 1021 }, { "epoch": 0.21, "learning_rate": 0.0009204818592682281, "loss": 1.5376, "step": 1022 }, { "epoch": 0.21, "learning_rate": 0.0009203045865854465, "loss": 1.5229, "step": 1023 }, { "epoch": 0.21, "learning_rate": 0.0009201271336322647, "loss": 1.536, "step": 1024 }, { "epoch": 0.21, "learning_rate": 0.0009199495004847936, "loss": 1.536, "step": 1025 }, { "epoch": 0.21, "learning_rate": 0.0009197716872192201, "loss": 1.5352, "step": 1026 }, { "epoch": 0.21, "learning_rate": 0.0009195936939118096, "loss": 1.5409, "step": 1027 }, { "epoch": 0.21, "learning_rate": 0.0009194155206389042, "loss": 1.5355, "step": 1028 }, { "epoch": 0.21, "learning_rate": 0.0009192371674769229, "loss": 1.5429, "step": 1029 }, { "epoch": 0.21, "learning_rate": 0.000919058634502362, "loss": 1.5352, "step": 1030 }, { "epoch": 0.21, "learning_rate": 0.0009188799217917955, "loss": 1.5217, "step": 1031 }, { "epoch": 0.21, "learning_rate": 0.0009187010294218736, "loss": 1.5531, "step": 1032 }, { "epoch": 0.21, "learning_rate": 0.0009185219574693243, "loss": 1.5191, "step": 1033 }, { "epoch": 0.21, "learning_rate": 0.0009183427060109521, "loss": 1.5396, "step": 1034 }, { "epoch": 0.21, "learning_rate": 0.0009181632751236389, "loss": 1.4891, "step": 1035 }, { "epoch": 0.21, "learning_rate": 0.0009179836648843429, "loss": 1.4974, "step": 1036 }, { "epoch": 0.21, "learning_rate": 0.0009178038753701003, "loss": 1.5471, "step": 1037 }, { "epoch": 0.21, "learning_rate": 0.0009176239066580232, "loss": 1.546, "step": 1038 }, { "epoch": 0.21, "learning_rate": 0.0009174437588253011, "loss": 1.5328, "step": 1039 }, { "epoch": 0.21, "learning_rate": 0.0009172634319492001, "loss": 1.5827, "step": 1040 }, { "epoch": 0.21, "learning_rate": 0.0009170829261070632, "loss": 1.5425, "step": 1041 }, { "epoch": 0.21, "learning_rate": 0.0009169022413763099, "loss": 1.5044, "step": 1042 }, { "epoch": 0.21, "learning_rate": 0.000916721377834437, "loss": 1.5474, "step": 1043 }, { "epoch": 0.21, "learning_rate": 0.0009165403355590172, "loss": 1.4664, "step": 1044 }, { "epoch": 0.21, "learning_rate": 0.0009163591146277006, "loss": 1.5351, "step": 1045 }, { "epoch": 0.21, "learning_rate": 0.0009161777151182136, "loss": 1.5222, "step": 1046 }, { "epoch": 0.21, "learning_rate": 0.0009159961371083591, "loss": 1.5411, "step": 1047 }, { "epoch": 0.21, "learning_rate": 0.0009158143806760169, "loss": 1.5167, "step": 1048 }, { "epoch": 0.21, "learning_rate": 0.0009156324458991427, "loss": 1.5576, "step": 1049 }, { "epoch": 0.21, "learning_rate": 0.0009154503328557693, "loss": 1.5419, "step": 1050 }, { "epoch": 0.21, "learning_rate": 0.0009152680416240059, "loss": 1.5402, "step": 1051 }, { "epoch": 0.21, "learning_rate": 0.0009150855722820377, "loss": 1.5495, "step": 1052 }, { "epoch": 0.21, "learning_rate": 0.0009149029249081266, "loss": 1.5513, "step": 1053 }, { "epoch": 0.21, "learning_rate": 0.0009147200995806112, "loss": 1.5552, "step": 1054 }, { "epoch": 0.21, "learning_rate": 0.0009145370963779056, "loss": 1.5163, "step": 1055 }, { "epoch": 0.21, "learning_rate": 0.0009143539153785009, "loss": 1.5324, "step": 1056 }, { "epoch": 0.21, "learning_rate": 0.000914170556660964, "loss": 1.5646, "step": 1057 }, { "epoch": 0.21, "learning_rate": 0.0009139870203039384, "loss": 1.5462, "step": 1058 }, { "epoch": 0.21, "learning_rate": 0.0009138033063861435, "loss": 1.5129, "step": 1059 }, { "epoch": 0.21, "learning_rate": 0.000913619414986375, "loss": 1.5088, "step": 1060 }, { "epoch": 0.21, "learning_rate": 0.0009134353461835048, "loss": 1.522, "step": 1061 }, { "epoch": 0.21, "learning_rate": 0.0009132511000564806, "loss": 1.526, "step": 1062 }, { "epoch": 0.21, "learning_rate": 0.0009130666766843265, "loss": 1.5755, "step": 1063 }, { "epoch": 0.22, "learning_rate": 0.0009128820761461423, "loss": 1.5195, "step": 1064 }, { "epoch": 0.22, "learning_rate": 0.0009126972985211042, "loss": 1.4926, "step": 1065 }, { "epoch": 0.22, "learning_rate": 0.0009125123438884637, "loss": 1.5273, "step": 1066 }, { "epoch": 0.22, "learning_rate": 0.0009123272123275489, "loss": 1.5342, "step": 1067 }, { "epoch": 0.22, "learning_rate": 0.0009121419039177634, "loss": 1.4899, "step": 1068 }, { "epoch": 0.22, "learning_rate": 0.0009119564187385869, "loss": 1.5394, "step": 1069 }, { "epoch": 0.22, "learning_rate": 0.0009117707568695748, "loss": 1.5359, "step": 1070 }, { "epoch": 0.22, "learning_rate": 0.0009115849183903579, "loss": 1.5304, "step": 1071 }, { "epoch": 0.22, "learning_rate": 0.0009113989033806433, "loss": 1.518, "step": 1072 }, { "epoch": 0.22, "learning_rate": 0.0009112127119202139, "loss": 1.4891, "step": 1073 }, { "epoch": 0.22, "learning_rate": 0.0009110263440889276, "loss": 1.5434, "step": 1074 }, { "epoch": 0.22, "learning_rate": 0.0009108397999667184, "loss": 1.5117, "step": 1075 }, { "epoch": 0.22, "learning_rate": 0.0009106530796335961, "loss": 1.5387, "step": 1076 }, { "epoch": 0.22, "learning_rate": 0.0009104661831696455, "loss": 1.4914, "step": 1077 }, { "epoch": 0.22, "learning_rate": 0.0009102791106550276, "loss": 1.5653, "step": 1078 }, { "epoch": 0.22, "learning_rate": 0.0009100918621699786, "loss": 1.4948, "step": 1079 }, { "epoch": 0.22, "learning_rate": 0.0009099044377948097, "loss": 1.5192, "step": 1080 }, { "epoch": 0.22, "learning_rate": 0.0009097168376099085, "loss": 1.5156, "step": 1081 }, { "epoch": 0.22, "learning_rate": 0.0009095290616957372, "loss": 1.4912, "step": 1082 }, { "epoch": 0.22, "learning_rate": 0.0009093411101328338, "loss": 1.5318, "step": 1083 }, { "epoch": 0.22, "learning_rate": 0.0009091529830018114, "loss": 1.5553, "step": 1084 }, { "epoch": 0.22, "learning_rate": 0.0009089646803833589, "loss": 1.5058, "step": 1085 }, { "epoch": 0.22, "learning_rate": 0.0009087762023582396, "loss": 1.5483, "step": 1086 }, { "epoch": 0.22, "learning_rate": 0.0009085875490072928, "loss": 1.5444, "step": 1087 }, { "epoch": 0.22, "learning_rate": 0.0009083987204114326, "loss": 1.5525, "step": 1088 }, { "epoch": 0.22, "learning_rate": 0.0009082097166516483, "loss": 1.5201, "step": 1089 }, { "epoch": 0.22, "learning_rate": 0.0009080205378090046, "loss": 1.4924, "step": 1090 }, { "epoch": 0.22, "learning_rate": 0.000907831183964641, "loss": 1.5182, "step": 1091 }, { "epoch": 0.22, "learning_rate": 0.0009076416551997722, "loss": 1.5628, "step": 1092 }, { "epoch": 0.22, "learning_rate": 0.0009074519515956879, "loss": 1.473, "step": 1093 }, { "epoch": 0.22, "learning_rate": 0.0009072620732337527, "loss": 1.5232, "step": 1094 }, { "epoch": 0.22, "learning_rate": 0.0009070720201954061, "loss": 1.4854, "step": 1095 }, { "epoch": 0.22, "learning_rate": 0.0009068817925621628, "loss": 1.5378, "step": 1096 }, { "epoch": 0.22, "learning_rate": 0.0009066913904156124, "loss": 1.4954, "step": 1097 }, { "epoch": 0.22, "learning_rate": 0.0009065008138374189, "loss": 1.4579, "step": 1098 }, { "epoch": 0.22, "learning_rate": 0.0009063100629093215, "loss": 1.499, "step": 1099 }, { "epoch": 0.22, "learning_rate": 0.000906119137713134, "loss": 1.4974, "step": 1100 }, { "epoch": 0.22, "learning_rate": 0.0009059280383307452, "loss": 1.5348, "step": 1101 }, { "epoch": 0.22, "learning_rate": 0.0009057367648441182, "loss": 1.4805, "step": 1102 }, { "epoch": 0.22, "learning_rate": 0.0009055453173352912, "loss": 1.4976, "step": 1103 }, { "epoch": 0.22, "learning_rate": 0.0009053536958863769, "loss": 1.5339, "step": 1104 }, { "epoch": 0.22, "learning_rate": 0.0009051619005795621, "loss": 1.5049, "step": 1105 }, { "epoch": 0.22, "learning_rate": 0.0009049699314971091, "loss": 1.5516, "step": 1106 }, { "epoch": 0.22, "learning_rate": 0.000904777788721354, "loss": 1.4986, "step": 1107 }, { "epoch": 0.22, "learning_rate": 0.0009045854723347075, "loss": 1.4972, "step": 1108 }, { "epoch": 0.22, "learning_rate": 0.0009043929824196552, "loss": 1.527, "step": 1109 }, { "epoch": 0.22, "learning_rate": 0.000904200319058757, "loss": 1.4693, "step": 1110 }, { "epoch": 0.22, "learning_rate": 0.0009040074823346465, "loss": 1.4911, "step": 1111 }, { "epoch": 0.22, "learning_rate": 0.0009038144723300326, "loss": 1.5184, "step": 1112 }, { "epoch": 0.23, "learning_rate": 0.0009036212891276978, "loss": 1.5229, "step": 1113 }, { "epoch": 0.23, "learning_rate": 0.0009034279328104992, "loss": 1.5006, "step": 1114 }, { "epoch": 0.23, "learning_rate": 0.0009032344034613684, "loss": 1.528, "step": 1115 }, { "epoch": 0.23, "learning_rate": 0.0009030407011633108, "loss": 1.5385, "step": 1116 }, { "epoch": 0.23, "learning_rate": 0.0009028468259994063, "loss": 1.4954, "step": 1117 }, { "epoch": 0.23, "learning_rate": 0.0009026527780528085, "loss": 1.5152, "step": 1118 }, { "epoch": 0.23, "learning_rate": 0.0009024585574067452, "loss": 1.5318, "step": 1119 }, { "epoch": 0.23, "learning_rate": 0.0009022641641445191, "loss": 1.5366, "step": 1120 }, { "epoch": 0.23, "learning_rate": 0.0009020695983495057, "loss": 1.5764, "step": 1121 }, { "epoch": 0.23, "learning_rate": 0.0009018748601051554, "loss": 1.5669, "step": 1122 }, { "epoch": 0.23, "learning_rate": 0.0009016799494949921, "loss": 1.5256, "step": 1123 }, { "epoch": 0.23, "learning_rate": 0.0009014848666026138, "loss": 1.539, "step": 1124 }, { "epoch": 0.23, "learning_rate": 0.0009012896115116924, "loss": 1.4922, "step": 1125 }, { "epoch": 0.23, "learning_rate": 0.0009010941843059737, "loss": 1.5467, "step": 1126 }, { "epoch": 0.23, "learning_rate": 0.0009008985850692772, "loss": 1.536, "step": 1127 }, { "epoch": 0.23, "learning_rate": 0.0009007028138854961, "loss": 1.5013, "step": 1128 }, { "epoch": 0.23, "learning_rate": 0.0009005068708385978, "loss": 1.5335, "step": 1129 }, { "epoch": 0.23, "learning_rate": 0.0009003107560126226, "loss": 1.5042, "step": 1130 }, { "epoch": 0.23, "learning_rate": 0.0009001144694916854, "loss": 1.5326, "step": 1131 }, { "epoch": 0.23, "learning_rate": 0.000899918011359974, "loss": 1.4926, "step": 1132 }, { "epoch": 0.23, "learning_rate": 0.0008997213817017506, "loss": 1.5025, "step": 1133 }, { "epoch": 0.23, "learning_rate": 0.00089952458060135, "loss": 1.5231, "step": 1134 }, { "epoch": 0.23, "learning_rate": 0.0008993276081431811, "loss": 1.4875, "step": 1135 }, { "epoch": 0.23, "learning_rate": 0.0008991304644117266, "loss": 1.5273, "step": 1136 }, { "epoch": 0.23, "learning_rate": 0.0008989331494915417, "loss": 1.5184, "step": 1137 }, { "epoch": 0.23, "learning_rate": 0.0008987356634672559, "loss": 1.546, "step": 1138 }, { "epoch": 0.23, "learning_rate": 0.0008985380064235719, "loss": 1.5249, "step": 1139 }, { "epoch": 0.23, "learning_rate": 0.0008983401784452654, "loss": 1.495, "step": 1140 }, { "epoch": 0.23, "learning_rate": 0.0008981421796171857, "loss": 1.515, "step": 1141 }, { "epoch": 0.23, "learning_rate": 0.0008979440100242554, "loss": 1.5199, "step": 1142 }, { "epoch": 0.23, "learning_rate": 0.0008977456697514702, "loss": 1.5179, "step": 1143 }, { "epoch": 0.23, "learning_rate": 0.0008975471588838992, "loss": 1.4999, "step": 1144 }, { "epoch": 0.23, "learning_rate": 0.0008973484775066843, "loss": 1.4845, "step": 1145 }, { "epoch": 0.23, "learning_rate": 0.0008971496257050411, "loss": 1.5212, "step": 1146 }, { "epoch": 0.23, "learning_rate": 0.0008969506035642577, "loss": 1.5285, "step": 1147 }, { "epoch": 0.23, "learning_rate": 0.0008967514111696957, "loss": 1.5294, "step": 1148 }, { "epoch": 0.23, "learning_rate": 0.0008965520486067895, "loss": 1.4975, "step": 1149 }, { "epoch": 0.23, "learning_rate": 0.0008963525159610463, "loss": 1.5226, "step": 1150 }, { "epoch": 0.23, "learning_rate": 0.0008961528133180471, "loss": 1.5158, "step": 1151 }, { "epoch": 0.23, "learning_rate": 0.0008959529407634446, "loss": 1.5457, "step": 1152 }, { "epoch": 0.23, "learning_rate": 0.0008957528983829652, "loss": 1.5063, "step": 1153 }, { "epoch": 0.23, "learning_rate": 0.0008955526862624079, "loss": 1.5455, "step": 1154 }, { "epoch": 0.23, "learning_rate": 0.0008953523044876446, "loss": 1.5223, "step": 1155 }, { "epoch": 0.23, "learning_rate": 0.0008951517531446198, "loss": 1.5219, "step": 1156 }, { "epoch": 0.23, "learning_rate": 0.0008949510323193506, "loss": 1.5435, "step": 1157 }, { "epoch": 0.23, "learning_rate": 0.0008947501420979275, "loss": 1.4784, "step": 1158 }, { "epoch": 0.23, "learning_rate": 0.0008945490825665128, "loss": 1.5444, "step": 1159 }, { "epoch": 0.23, "learning_rate": 0.0008943478538113419, "loss": 1.5349, "step": 1160 }, { "epoch": 0.23, "learning_rate": 0.0008941464559187224, "loss": 1.5463, "step": 1161 }, { "epoch": 0.23, "learning_rate": 0.0008939448889750352, "loss": 1.5358, "step": 1162 }, { "epoch": 0.24, "learning_rate": 0.0008937431530667328, "loss": 1.4947, "step": 1163 }, { "epoch": 0.24, "learning_rate": 0.0008935412482803408, "loss": 1.5275, "step": 1164 }, { "epoch": 0.24, "learning_rate": 0.0008933391747024569, "loss": 1.5647, "step": 1165 }, { "epoch": 0.24, "learning_rate": 0.0008931369324197511, "loss": 1.4956, "step": 1166 }, { "epoch": 0.24, "learning_rate": 0.0008929345215189663, "loss": 1.5262, "step": 1167 }, { "epoch": 0.24, "learning_rate": 0.0008927319420869174, "loss": 1.5164, "step": 1168 }, { "epoch": 0.24, "learning_rate": 0.0008925291942104914, "loss": 1.504, "step": 1169 }, { "epoch": 0.24, "learning_rate": 0.0008923262779766477, "loss": 1.5093, "step": 1170 }, { "epoch": 0.24, "learning_rate": 0.0008921231934724179, "loss": 1.4834, "step": 1171 }, { "epoch": 0.24, "learning_rate": 0.000891919940784906, "loss": 1.533, "step": 1172 }, { "epoch": 0.24, "learning_rate": 0.0008917165200012877, "loss": 1.4903, "step": 1173 }, { "epoch": 0.24, "learning_rate": 0.0008915129312088111, "loss": 1.5505, "step": 1174 }, { "epoch": 0.24, "learning_rate": 0.0008913091744947964, "loss": 1.5545, "step": 1175 }, { "epoch": 0.24, "learning_rate": 0.0008911052499466357, "loss": 1.5355, "step": 1176 }, { "epoch": 0.24, "learning_rate": 0.0008909011576517928, "loss": 1.5369, "step": 1177 }, { "epoch": 0.24, "learning_rate": 0.0008906968976978041, "loss": 1.5198, "step": 1178 }, { "epoch": 0.24, "learning_rate": 0.0008904924701722774, "loss": 1.5613, "step": 1179 }, { "epoch": 0.24, "learning_rate": 0.0008902878751628925, "loss": 1.5459, "step": 1180 }, { "epoch": 0.24, "learning_rate": 0.000890083112757401, "loss": 1.5305, "step": 1181 }, { "epoch": 0.24, "learning_rate": 0.0008898781830436267, "loss": 1.5018, "step": 1182 }, { "epoch": 0.24, "learning_rate": 0.0008896730861094642, "loss": 1.4891, "step": 1183 }, { "epoch": 0.24, "learning_rate": 0.000889467822042881, "loss": 1.4861, "step": 1184 }, { "epoch": 0.24, "learning_rate": 0.0008892623909319156, "loss": 1.5247, "step": 1185 }, { "epoch": 0.24, "learning_rate": 0.000889056792864678, "loss": 1.5209, "step": 1186 }, { "epoch": 0.24, "learning_rate": 0.0008888510279293502, "loss": 1.5175, "step": 1187 }, { "epoch": 0.24, "learning_rate": 0.0008886450962141859, "loss": 1.5281, "step": 1188 }, { "epoch": 0.24, "learning_rate": 0.0008884389978075098, "loss": 1.5492, "step": 1189 }, { "epoch": 0.24, "learning_rate": 0.0008882327327977184, "loss": 1.5534, "step": 1190 }, { "epoch": 0.24, "learning_rate": 0.0008880263012732796, "loss": 1.5692, "step": 1191 }, { "epoch": 0.24, "learning_rate": 0.000887819703322733, "loss": 1.5493, "step": 1192 }, { "epoch": 0.24, "learning_rate": 0.0008876129390346891, "loss": 1.5163, "step": 1193 }, { "epoch": 0.24, "learning_rate": 0.0008874060084978299, "loss": 1.5508, "step": 1194 }, { "epoch": 0.24, "learning_rate": 0.0008871989118009089, "loss": 1.5291, "step": 1195 }, { "epoch": 0.24, "learning_rate": 0.0008869916490327509, "loss": 1.5448, "step": 1196 }, { "epoch": 0.24, "learning_rate": 0.0008867842202822515, "loss": 1.5496, "step": 1197 }, { "epoch": 0.24, "learning_rate": 0.0008865766256383778, "loss": 1.5361, "step": 1198 }, { "epoch": 0.24, "learning_rate": 0.000886368865190168, "loss": 1.5325, "step": 1199 }, { "epoch": 0.24, "learning_rate": 0.0008861609390267318, "loss": 1.5361, "step": 1200 }, { "epoch": 0.24, "learning_rate": 0.000885952847237249, "loss": 1.5167, "step": 1201 }, { "epoch": 0.24, "learning_rate": 0.0008857445899109715, "loss": 1.4999, "step": 1202 }, { "epoch": 0.24, "learning_rate": 0.0008855361671372215, "loss": 1.5507, "step": 1203 }, { "epoch": 0.24, "learning_rate": 0.0008853275790053926, "loss": 1.5073, "step": 1204 }, { "epoch": 0.24, "learning_rate": 0.0008851188256049489, "loss": 1.466, "step": 1205 }, { "epoch": 0.24, "learning_rate": 0.0008849099070254258, "loss": 1.5174, "step": 1206 }, { "epoch": 0.24, "learning_rate": 0.0008847008233564294, "loss": 1.5649, "step": 1207 }, { "epoch": 0.24, "learning_rate": 0.0008844915746876362, "loss": 1.5204, "step": 1208 }, { "epoch": 0.24, "learning_rate": 0.0008842821611087941, "loss": 1.5305, "step": 1209 }, { "epoch": 0.24, "learning_rate": 0.0008840725827097216, "loss": 1.5229, "step": 1210 }, { "epoch": 0.24, "learning_rate": 0.0008838628395803074, "loss": 1.5148, "step": 1211 }, { "epoch": 0.25, "learning_rate": 0.0008836529318105115, "loss": 1.504, "step": 1212 }, { "epoch": 0.25, "learning_rate": 0.0008834428594903641, "loss": 1.5108, "step": 1213 }, { "epoch": 0.25, "learning_rate": 0.000883232622709966, "loss": 1.5131, "step": 1214 }, { "epoch": 0.25, "learning_rate": 0.000883022221559489, "loss": 1.5021, "step": 1215 }, { "epoch": 0.25, "learning_rate": 0.0008828116561291747, "loss": 1.5506, "step": 1216 }, { "epoch": 0.25, "learning_rate": 0.0008826009265093355, "loss": 1.4866, "step": 1217 }, { "epoch": 0.25, "learning_rate": 0.0008823900327903545, "loss": 1.5356, "step": 1218 }, { "epoch": 0.25, "learning_rate": 0.0008821789750626845, "loss": 1.5297, "step": 1219 }, { "epoch": 0.25, "learning_rate": 0.0008819677534168493, "loss": 1.5134, "step": 1220 }, { "epoch": 0.25, "learning_rate": 0.0008817563679434427, "loss": 1.4883, "step": 1221 }, { "epoch": 0.25, "learning_rate": 0.0008815448187331289, "loss": 1.5354, "step": 1222 }, { "epoch": 0.25, "learning_rate": 0.0008813331058766421, "loss": 1.537, "step": 1223 }, { "epoch": 0.25, "learning_rate": 0.0008811212294647868, "loss": 1.5176, "step": 1224 }, { "epoch": 0.25, "learning_rate": 0.0008809091895884379, "loss": 1.482, "step": 1225 }, { "epoch": 0.25, "learning_rate": 0.0008806969863385401, "loss": 1.5227, "step": 1226 }, { "epoch": 0.25, "learning_rate": 0.0008804846198061081, "loss": 1.531, "step": 1227 }, { "epoch": 0.25, "learning_rate": 0.0008802720900822269, "loss": 1.5003, "step": 1228 }, { "epoch": 0.25, "learning_rate": 0.0008800593972580515, "loss": 1.5113, "step": 1229 }, { "epoch": 0.25, "learning_rate": 0.0008798465414248067, "loss": 1.5499, "step": 1230 }, { "epoch": 0.25, "learning_rate": 0.0008796335226737872, "loss": 1.5379, "step": 1231 }, { "epoch": 0.25, "learning_rate": 0.0008794203410963577, "loss": 1.5124, "step": 1232 }, { "epoch": 0.25, "learning_rate": 0.0008792069967839525, "loss": 1.4912, "step": 1233 }, { "epoch": 0.25, "learning_rate": 0.000878993489828076, "loss": 1.5345, "step": 1234 }, { "epoch": 0.25, "learning_rate": 0.0008787798203203024, "loss": 1.5343, "step": 1235 }, { "epoch": 0.25, "learning_rate": 0.0008785659883522751, "loss": 1.543, "step": 1236 }, { "epoch": 0.25, "learning_rate": 0.0008783519940157076, "loss": 1.5349, "step": 1237 }, { "epoch": 0.25, "learning_rate": 0.0008781378374023834, "loss": 1.4955, "step": 1238 }, { "epoch": 0.25, "learning_rate": 0.0008779235186041545, "loss": 1.5416, "step": 1239 }, { "epoch": 0.25, "learning_rate": 0.0008777090377129437, "loss": 1.5329, "step": 1240 }, { "epoch": 0.25, "learning_rate": 0.0008774943948207425, "loss": 1.4976, "step": 1241 }, { "epoch": 0.25, "learning_rate": 0.0008772795900196122, "loss": 1.5396, "step": 1242 }, { "epoch": 0.25, "learning_rate": 0.0008770646234016833, "loss": 1.5041, "step": 1243 }, { "epoch": 0.25, "learning_rate": 0.0008768494950591561, "loss": 1.5204, "step": 1244 }, { "epoch": 0.25, "learning_rate": 0.0008766342050843, "loss": 1.5308, "step": 1245 }, { "epoch": 0.25, "learning_rate": 0.0008764187535694537, "loss": 1.4922, "step": 1246 }, { "epoch": 0.25, "learning_rate": 0.0008762031406070255, "loss": 1.5072, "step": 1247 }, { "epoch": 0.25, "learning_rate": 0.0008759873662894922, "loss": 1.4866, "step": 1248 }, { "epoch": 0.25, "learning_rate": 0.0008757714307094008, "loss": 1.5309, "step": 1249 }, { "epoch": 0.25, "learning_rate": 0.0008755553339593668, "loss": 1.4993, "step": 1250 }, { "epoch": 0.25, "learning_rate": 0.000875339076132075, "loss": 1.5334, "step": 1251 }, { "epoch": 0.25, "learning_rate": 0.000875122657320279, "loss": 1.5267, "step": 1252 }, { "epoch": 0.25, "learning_rate": 0.0008749060776168023, "loss": 1.536, "step": 1253 }, { "epoch": 0.25, "learning_rate": 0.0008746893371145366, "loss": 1.5173, "step": 1254 }, { "epoch": 0.25, "learning_rate": 0.0008744724359064427, "loss": 1.5258, "step": 1255 }, { "epoch": 0.25, "learning_rate": 0.0008742553740855505, "loss": 1.5115, "step": 1256 }, { "epoch": 0.25, "learning_rate": 0.0008740381517449588, "loss": 1.4964, "step": 1257 }, { "epoch": 0.25, "learning_rate": 0.0008738207689778352, "loss": 1.5229, "step": 1258 }, { "epoch": 0.25, "learning_rate": 0.0008736032258774158, "loss": 1.5129, "step": 1259 }, { "epoch": 0.25, "learning_rate": 0.000873385522537006, "loss": 1.507, "step": 1260 }, { "epoch": 0.25, "learning_rate": 0.0008731676590499796, "loss": 1.496, "step": 1261 }, { "epoch": 0.26, "learning_rate": 0.0008729496355097793, "loss": 1.5157, "step": 1262 }, { "epoch": 0.26, "learning_rate": 0.0008727314520099161, "loss": 1.4978, "step": 1263 }, { "epoch": 0.26, "learning_rate": 0.0008725131086439699, "loss": 1.4692, "step": 1264 }, { "epoch": 0.26, "learning_rate": 0.0008722946055055892, "loss": 1.4887, "step": 1265 }, { "epoch": 0.26, "learning_rate": 0.0008720759426884906, "loss": 1.5221, "step": 1266 }, { "epoch": 0.26, "learning_rate": 0.0008718571202864598, "loss": 1.5223, "step": 1267 }, { "epoch": 0.26, "learning_rate": 0.0008716381383933507, "loss": 1.4995, "step": 1268 }, { "epoch": 0.26, "learning_rate": 0.0008714189971030852, "loss": 1.4754, "step": 1269 }, { "epoch": 0.26, "learning_rate": 0.0008711996965096542, "loss": 1.5254, "step": 1270 }, { "epoch": 0.26, "learning_rate": 0.0008709802367071166, "loss": 1.5065, "step": 1271 }, { "epoch": 0.26, "learning_rate": 0.0008707606177895996, "loss": 1.5385, "step": 1272 }, { "epoch": 0.26, "learning_rate": 0.0008705408398512986, "loss": 1.4868, "step": 1273 }, { "epoch": 0.26, "learning_rate": 0.0008703209029864774, "loss": 1.5124, "step": 1274 }, { "epoch": 0.26, "learning_rate": 0.0008701008072894679, "loss": 1.5089, "step": 1275 }, { "epoch": 0.26, "learning_rate": 0.0008698805528546699, "loss": 1.5711, "step": 1276 }, { "epoch": 0.26, "learning_rate": 0.0008696601397765513, "loss": 1.5459, "step": 1277 }, { "epoch": 0.26, "learning_rate": 0.0008694395681496486, "loss": 1.5022, "step": 1278 }, { "epoch": 0.26, "learning_rate": 0.0008692188380685658, "loss": 1.503, "step": 1279 }, { "epoch": 0.26, "learning_rate": 0.0008689979496279746, "loss": 1.4963, "step": 1280 }, { "epoch": 0.26, "learning_rate": 0.0008687769029226155, "loss": 1.5313, "step": 1281 }, { "epoch": 0.26, "learning_rate": 0.000868555698047296, "loss": 1.5202, "step": 1282 }, { "epoch": 0.26, "learning_rate": 0.0008683343350968918, "loss": 1.5024, "step": 1283 }, { "epoch": 0.26, "learning_rate": 0.0008681128141663465, "loss": 1.5352, "step": 1284 }, { "epoch": 0.26, "learning_rate": 0.0008678911353506715, "loss": 1.54, "step": 1285 }, { "epoch": 0.26, "learning_rate": 0.0008676692987449455, "loss": 1.5283, "step": 1286 }, { "epoch": 0.26, "learning_rate": 0.0008674473044443154, "loss": 1.5335, "step": 1287 }, { "epoch": 0.26, "learning_rate": 0.0008672251525439952, "loss": 1.4637, "step": 1288 }, { "epoch": 0.26, "learning_rate": 0.0008670028431392671, "loss": 1.4664, "step": 1289 }, { "epoch": 0.26, "learning_rate": 0.0008667803763254804, "loss": 1.5025, "step": 1290 }, { "epoch": 0.26, "learning_rate": 0.0008665577521980519, "loss": 1.5001, "step": 1291 }, { "epoch": 0.26, "learning_rate": 0.0008663349708524662, "loss": 1.5387, "step": 1292 }, { "epoch": 0.26, "learning_rate": 0.0008661120323842751, "loss": 1.5087, "step": 1293 }, { "epoch": 0.26, "learning_rate": 0.0008658889368890979, "loss": 1.5225, "step": 1294 }, { "epoch": 0.26, "learning_rate": 0.000865665684462621, "loss": 1.5144, "step": 1295 }, { "epoch": 0.26, "learning_rate": 0.0008654422752005985, "loss": 1.5001, "step": 1296 }, { "epoch": 0.26, "learning_rate": 0.0008652187091988516, "loss": 1.5855, "step": 1297 }, { "epoch": 0.26, "learning_rate": 0.0008649949865532686, "loss": 1.519, "step": 1298 }, { "epoch": 0.26, "learning_rate": 0.000864771107359805, "loss": 1.498, "step": 1299 }, { "epoch": 0.26, "learning_rate": 0.0008645470717144837, "loss": 1.4779, "step": 1300 }, { "epoch": 0.26, "learning_rate": 0.0008643228797133944, "loss": 1.4834, "step": 1301 }, { "epoch": 0.26, "learning_rate": 0.000864098531452694, "loss": 1.5587, "step": 1302 }, { "epoch": 0.26, "learning_rate": 0.0008638740270286065, "loss": 1.4837, "step": 1303 }, { "epoch": 0.26, "learning_rate": 0.0008636493665374228, "loss": 1.5317, "step": 1304 }, { "epoch": 0.26, "learning_rate": 0.0008634245500755005, "loss": 1.5093, "step": 1305 }, { "epoch": 0.26, "learning_rate": 0.0008631995777392645, "loss": 1.489, "step": 1306 }, { "epoch": 0.26, "learning_rate": 0.0008629744496252064, "loss": 1.5174, "step": 1307 }, { "epoch": 0.26, "learning_rate": 0.0008627491658298848, "loss": 1.5172, "step": 1308 }, { "epoch": 0.26, "learning_rate": 0.0008625237264499241, "loss": 1.4986, "step": 1309 }, { "epoch": 0.26, "learning_rate": 0.000862298131582017, "loss": 1.4696, "step": 1310 }, { "epoch": 0.27, "learning_rate": 0.000862072381322922, "loss": 1.5479, "step": 1311 }, { "epoch": 0.27, "learning_rate": 0.000861846475769464, "loss": 1.4935, "step": 1312 }, { "epoch": 0.27, "learning_rate": 0.0008616204150185348, "loss": 1.4968, "step": 1313 }, { "epoch": 0.27, "learning_rate": 0.000861394199167093, "loss": 1.506, "step": 1314 }, { "epoch": 0.27, "learning_rate": 0.0008611678283121636, "loss": 1.4529, "step": 1315 }, { "epoch": 0.27, "learning_rate": 0.000860941302550838, "loss": 1.5121, "step": 1316 }, { "epoch": 0.27, "learning_rate": 0.0008607146219802738, "loss": 1.5005, "step": 1317 }, { "epoch": 0.27, "learning_rate": 0.0008604877866976954, "loss": 1.5514, "step": 1318 }, { "epoch": 0.27, "learning_rate": 0.0008602607968003935, "loss": 1.5014, "step": 1319 }, { "epoch": 0.27, "learning_rate": 0.0008600336523857249, "loss": 1.5086, "step": 1320 }, { "epoch": 0.27, "learning_rate": 0.0008598063535511129, "loss": 1.5111, "step": 1321 }, { "epoch": 0.27, "learning_rate": 0.0008595789003940467, "loss": 1.4832, "step": 1322 }, { "epoch": 0.27, "learning_rate": 0.0008593512930120821, "loss": 1.5295, "step": 1323 }, { "epoch": 0.27, "learning_rate": 0.0008591235315028409, "loss": 1.4947, "step": 1324 }, { "epoch": 0.27, "learning_rate": 0.0008588956159640108, "loss": 1.5361, "step": 1325 }, { "epoch": 0.27, "learning_rate": 0.0008586675464933459, "loss": 1.5155, "step": 1326 }, { "epoch": 0.27, "learning_rate": 0.000858439323188666, "loss": 1.5497, "step": 1327 }, { "epoch": 0.27, "learning_rate": 0.0008582109461478572, "loss": 1.5046, "step": 1328 }, { "epoch": 0.27, "learning_rate": 0.0008579824154688711, "loss": 1.5156, "step": 1329 }, { "epoch": 0.27, "learning_rate": 0.0008577537312497258, "loss": 1.4995, "step": 1330 }, { "epoch": 0.27, "learning_rate": 0.0008575248935885048, "loss": 1.4933, "step": 1331 }, { "epoch": 0.27, "learning_rate": 0.0008572959025833573, "loss": 1.4649, "step": 1332 }, { "epoch": 0.27, "learning_rate": 0.0008570667583324988, "loss": 1.468, "step": 1333 }, { "epoch": 0.27, "learning_rate": 0.0008568374609342101, "loss": 1.5311, "step": 1334 }, { "epoch": 0.27, "learning_rate": 0.0008566080104868379, "loss": 1.5195, "step": 1335 }, { "epoch": 0.27, "learning_rate": 0.0008563784070887943, "loss": 1.5136, "step": 1336 }, { "epoch": 0.27, "learning_rate": 0.0008561486508385574, "loss": 1.5411, "step": 1337 }, { "epoch": 0.27, "learning_rate": 0.0008559187418346703, "loss": 1.5358, "step": 1338 }, { "epoch": 0.27, "learning_rate": 0.0008556886801757422, "loss": 1.5096, "step": 1339 }, { "epoch": 0.27, "learning_rate": 0.0008554584659604474, "loss": 1.5319, "step": 1340 }, { "epoch": 0.27, "learning_rate": 0.0008552280992875259, "loss": 1.4654, "step": 1341 }, { "epoch": 0.27, "learning_rate": 0.0008549975802557828, "loss": 1.5293, "step": 1342 }, { "epoch": 0.27, "learning_rate": 0.0008547669089640885, "loss": 1.4819, "step": 1343 }, { "epoch": 0.27, "learning_rate": 0.000854536085511379, "loss": 1.5234, "step": 1344 }, { "epoch": 0.27, "learning_rate": 0.0008543051099966557, "loss": 1.5607, "step": 1345 }, { "epoch": 0.27, "learning_rate": 0.0008540739825189848, "loss": 1.5287, "step": 1346 }, { "epoch": 0.27, "learning_rate": 0.0008538427031774978, "loss": 1.5465, "step": 1347 }, { "epoch": 0.27, "learning_rate": 0.0008536112720713913, "loss": 1.4925, "step": 1348 }, { "epoch": 0.27, "learning_rate": 0.0008533796892999273, "loss": 1.5225, "step": 1349 }, { "epoch": 0.27, "learning_rate": 0.0008531479549624324, "loss": 1.51, "step": 1350 }, { "epoch": 0.27, "learning_rate": 0.0008529160691582987, "loss": 1.501, "step": 1351 }, { "epoch": 0.27, "learning_rate": 0.0008526840319869827, "loss": 1.5051, "step": 1352 }, { "epoch": 0.27, "learning_rate": 0.0008524518435480061, "loss": 1.5067, "step": 1353 }, { "epoch": 0.27, "learning_rate": 0.0008522195039409557, "loss": 1.4834, "step": 1354 }, { "epoch": 0.27, "learning_rate": 0.000851987013265483, "loss": 1.5387, "step": 1355 }, { "epoch": 0.27, "learning_rate": 0.000851754371621304, "loss": 1.5265, "step": 1356 }, { "epoch": 0.27, "learning_rate": 0.0008515215791081998, "loss": 1.5132, "step": 1357 }, { "epoch": 0.27, "learning_rate": 0.0008512886358260161, "loss": 1.5221, "step": 1358 }, { "epoch": 0.27, "learning_rate": 0.0008510555418746631, "loss": 1.5309, "step": 1359 }, { "epoch": 0.27, "learning_rate": 0.0008508222973541157, "loss": 1.4964, "step": 1360 }, { "epoch": 0.28, "learning_rate": 0.000850588902364414, "loss": 1.4933, "step": 1361 }, { "epoch": 0.28, "learning_rate": 0.0008503553570056615, "loss": 1.4931, "step": 1362 }, { "epoch": 0.28, "learning_rate": 0.000850121661378027, "loss": 1.5339, "step": 1363 }, { "epoch": 0.28, "learning_rate": 0.0008498878155817437, "loss": 1.5083, "step": 1364 }, { "epoch": 0.28, "learning_rate": 0.0008496538197171087, "loss": 1.4963, "step": 1365 }, { "epoch": 0.28, "learning_rate": 0.000849419673884484, "loss": 1.4908, "step": 1366 }, { "epoch": 0.28, "learning_rate": 0.0008491853781842958, "loss": 1.4619, "step": 1367 }, { "epoch": 0.28, "learning_rate": 0.0008489509327170344, "loss": 1.487, "step": 1368 }, { "epoch": 0.28, "learning_rate": 0.0008487163375832545, "loss": 1.5207, "step": 1369 }, { "epoch": 0.28, "learning_rate": 0.0008484815928835748, "loss": 1.5172, "step": 1370 }, { "epoch": 0.28, "learning_rate": 0.0008482466987186785, "loss": 1.537, "step": 1371 }, { "epoch": 0.28, "learning_rate": 0.0008480116551893125, "loss": 1.5004, "step": 1372 }, { "epoch": 0.28, "learning_rate": 0.0008477764623962881, "loss": 1.4711, "step": 1373 }, { "epoch": 0.28, "learning_rate": 0.0008475411204404803, "loss": 1.5092, "step": 1374 }, { "epoch": 0.28, "learning_rate": 0.0008473056294228285, "loss": 1.509, "step": 1375 }, { "epoch": 0.28, "learning_rate": 0.0008470699894443357, "loss": 1.5351, "step": 1376 }, { "epoch": 0.28, "learning_rate": 0.0008468342006060687, "loss": 1.4922, "step": 1377 }, { "epoch": 0.28, "learning_rate": 0.0008465982630091586, "loss": 1.4967, "step": 1378 }, { "epoch": 0.28, "learning_rate": 0.0008463621767547997, "loss": 1.4791, "step": 1379 }, { "epoch": 0.28, "learning_rate": 0.0008461259419442507, "loss": 1.5126, "step": 1380 }, { "epoch": 0.28, "learning_rate": 0.0008458895586788334, "loss": 1.4815, "step": 1381 }, { "epoch": 0.28, "learning_rate": 0.0008456530270599339, "loss": 1.5334, "step": 1382 }, { "epoch": 0.28, "learning_rate": 0.0008454163471890013, "loss": 1.5005, "step": 1383 }, { "epoch": 0.28, "learning_rate": 0.0008451795191675487, "loss": 1.5202, "step": 1384 }, { "epoch": 0.28, "learning_rate": 0.0008449425430971529, "loss": 1.5378, "step": 1385 }, { "epoch": 0.28, "learning_rate": 0.0008447054190794534, "loss": 1.4925, "step": 1386 }, { "epoch": 0.28, "learning_rate": 0.0008444681472161542, "loss": 1.5343, "step": 1387 }, { "epoch": 0.28, "learning_rate": 0.0008442307276090219, "loss": 1.496, "step": 1388 }, { "epoch": 0.28, "learning_rate": 0.0008439931603598868, "loss": 1.5178, "step": 1389 }, { "epoch": 0.28, "learning_rate": 0.0008437554455706425, "loss": 1.517, "step": 1390 }, { "epoch": 0.28, "learning_rate": 0.000843517583343246, "loss": 1.4971, "step": 1391 }, { "epoch": 0.28, "learning_rate": 0.0008432795737797172, "loss": 1.508, "step": 1392 }, { "epoch": 0.28, "learning_rate": 0.0008430414169821396, "loss": 1.4963, "step": 1393 }, { "epoch": 0.28, "learning_rate": 0.0008428031130526594, "loss": 1.4978, "step": 1394 }, { "epoch": 0.28, "learning_rate": 0.0008425646620934864, "loss": 1.5354, "step": 1395 }, { "epoch": 0.28, "learning_rate": 0.0008423260642068932, "loss": 1.5375, "step": 1396 }, { "epoch": 0.28, "learning_rate": 0.0008420873194952153, "loss": 1.5326, "step": 1397 }, { "epoch": 0.28, "learning_rate": 0.0008418484280608513, "loss": 1.472, "step": 1398 }, { "epoch": 0.28, "learning_rate": 0.0008416093900062629, "loss": 1.4948, "step": 1399 }, { "epoch": 0.28, "learning_rate": 0.0008413702054339742, "loss": 1.4949, "step": 1400 }, { "epoch": 0.28, "learning_rate": 0.000841130874446573, "loss": 1.4821, "step": 1401 }, { "epoch": 0.28, "learning_rate": 0.0008408913971467089, "loss": 1.4922, "step": 1402 }, { "epoch": 0.28, "learning_rate": 0.0008406517736370949, "loss": 1.5556, "step": 1403 }, { "epoch": 0.28, "learning_rate": 0.0008404120040205066, "loss": 1.5269, "step": 1404 }, { "epoch": 0.28, "learning_rate": 0.000840172088399782, "loss": 1.4665, "step": 1405 }, { "epoch": 0.28, "learning_rate": 0.000839932026877822, "loss": 1.4987, "step": 1406 }, { "epoch": 0.28, "learning_rate": 0.00083969181955759, "loss": 1.4958, "step": 1407 }, { "epoch": 0.28, "learning_rate": 0.000839451466542112, "loss": 1.5257, "step": 1408 }, { "epoch": 0.28, "learning_rate": 0.0008392109679344763, "loss": 1.4944, "step": 1409 }, { "epoch": 0.29, "learning_rate": 0.0008389703238378339, "loss": 1.4839, "step": 1410 }, { "epoch": 0.29, "learning_rate": 0.0008387295343553979, "loss": 1.474, "step": 1411 }, { "epoch": 0.29, "learning_rate": 0.000838488599590444, "loss": 1.4919, "step": 1412 }, { "epoch": 0.29, "learning_rate": 0.0008382475196463102, "loss": 1.55, "step": 1413 }, { "epoch": 0.29, "learning_rate": 0.0008380062946263964, "loss": 1.5054, "step": 1414 }, { "epoch": 0.29, "learning_rate": 0.0008377649246341654, "loss": 1.4616, "step": 1415 }, { "epoch": 0.29, "learning_rate": 0.0008375234097731418, "loss": 1.4969, "step": 1416 }, { "epoch": 0.29, "learning_rate": 0.000837281750146912, "loss": 1.5027, "step": 1417 }, { "epoch": 0.29, "learning_rate": 0.0008370399458591251, "loss": 1.5209, "step": 1418 }, { "epoch": 0.29, "learning_rate": 0.0008367979970134917, "loss": 1.5168, "step": 1419 }, { "epoch": 0.29, "learning_rate": 0.0008365559037137851, "loss": 1.4843, "step": 1420 }, { "epoch": 0.29, "learning_rate": 0.0008363136660638397, "loss": 1.4916, "step": 1421 }, { "epoch": 0.29, "learning_rate": 0.0008360712841675526, "loss": 1.5108, "step": 1422 }, { "epoch": 0.29, "learning_rate": 0.0008358287581288823, "loss": 1.4948, "step": 1423 }, { "epoch": 0.29, "learning_rate": 0.0008355860880518489, "loss": 1.4986, "step": 1424 }, { "epoch": 0.29, "learning_rate": 0.0008353432740405353, "loss": 1.5156, "step": 1425 }, { "epoch": 0.29, "learning_rate": 0.0008351003161990847, "loss": 1.4962, "step": 1426 }, { "epoch": 0.29, "learning_rate": 0.0008348572146317033, "loss": 1.5429, "step": 1427 }, { "epoch": 0.29, "learning_rate": 0.000834613969442658, "loss": 1.4964, "step": 1428 }, { "epoch": 0.29, "learning_rate": 0.0008343705807362778, "loss": 1.4906, "step": 1429 }, { "epoch": 0.29, "learning_rate": 0.0008341270486169534, "loss": 1.5303, "step": 1430 }, { "epoch": 0.29, "learning_rate": 0.0008338833731891364, "loss": 1.4837, "step": 1431 }, { "epoch": 0.29, "learning_rate": 0.0008336395545573403, "loss": 1.5395, "step": 1432 }, { "epoch": 0.29, "learning_rate": 0.0008333955928261401, "loss": 1.4742, "step": 1433 }, { "epoch": 0.29, "learning_rate": 0.0008331514881001718, "loss": 1.5006, "step": 1434 }, { "epoch": 0.29, "learning_rate": 0.0008329072404841332, "loss": 1.5224, "step": 1435 }, { "epoch": 0.29, "learning_rate": 0.0008326628500827827, "loss": 1.513, "step": 1436 }, { "epoch": 0.29, "learning_rate": 0.0008324183170009407, "loss": 1.4728, "step": 1437 }, { "epoch": 0.29, "learning_rate": 0.0008321736413434884, "loss": 1.4935, "step": 1438 }, { "epoch": 0.29, "learning_rate": 0.0008319288232153684, "loss": 1.4888, "step": 1439 }, { "epoch": 0.29, "learning_rate": 0.0008316838627215837, "loss": 1.5183, "step": 1440 }, { "epoch": 0.29, "learning_rate": 0.0008314387599671994, "loss": 1.5253, "step": 1441 }, { "epoch": 0.29, "learning_rate": 0.0008311935150573409, "loss": 1.4955, "step": 1442 }, { "epoch": 0.29, "learning_rate": 0.0008309481280971947, "loss": 1.5268, "step": 1443 }, { "epoch": 0.29, "learning_rate": 0.0008307025991920085, "loss": 1.5043, "step": 1444 }, { "epoch": 0.29, "learning_rate": 0.0008304569284470904, "loss": 1.4785, "step": 1445 }, { "epoch": 0.29, "learning_rate": 0.0008302111159678099, "loss": 1.5116, "step": 1446 }, { "epoch": 0.29, "learning_rate": 0.0008299651618595967, "loss": 1.4906, "step": 1447 }, { "epoch": 0.29, "learning_rate": 0.0008297190662279419, "loss": 1.4774, "step": 1448 }, { "epoch": 0.29, "learning_rate": 0.0008294728291783967, "loss": 1.5044, "step": 1449 }, { "epoch": 0.29, "learning_rate": 0.0008292264508165733, "loss": 1.5501, "step": 1450 }, { "epoch": 0.29, "learning_rate": 0.0008289799312481442, "loss": 1.5291, "step": 1451 }, { "epoch": 0.29, "learning_rate": 0.0008287332705788431, "loss": 1.4934, "step": 1452 }, { "epoch": 0.29, "learning_rate": 0.0008284864689144634, "loss": 1.5073, "step": 1453 }, { "epoch": 0.29, "learning_rate": 0.0008282395263608596, "loss": 1.5367, "step": 1454 }, { "epoch": 0.29, "learning_rate": 0.0008279924430239462, "loss": 1.5328, "step": 1455 }, { "epoch": 0.29, "learning_rate": 0.0008277452190096985, "loss": 1.4924, "step": 1456 }, { "epoch": 0.29, "learning_rate": 0.0008274978544241517, "loss": 1.4566, "step": 1457 }, { "epoch": 0.29, "learning_rate": 0.0008272503493734018, "loss": 1.4803, "step": 1458 }, { "epoch": 0.29, "learning_rate": 0.0008270027039636044, "loss": 1.5072, "step": 1459 }, { "epoch": 0.3, "learning_rate": 0.0008267549183009759, "loss": 1.4776, "step": 1460 }, { "epoch": 0.3, "learning_rate": 0.0008265069924917925, "loss": 1.5367, "step": 1461 }, { "epoch": 0.3, "learning_rate": 0.0008262589266423908, "loss": 1.5141, "step": 1462 }, { "epoch": 0.3, "learning_rate": 0.000826010720859167, "loss": 1.4596, "step": 1463 }, { "epoch": 0.3, "learning_rate": 0.0008257623752485779, "loss": 1.5285, "step": 1464 }, { "epoch": 0.3, "learning_rate": 0.0008255138899171397, "loss": 1.5066, "step": 1465 }, { "epoch": 0.3, "learning_rate": 0.000825265264971429, "loss": 1.5328, "step": 1466 }, { "epoch": 0.3, "learning_rate": 0.0008250165005180819, "loss": 1.5011, "step": 1467 }, { "epoch": 0.3, "learning_rate": 0.0008247675966637947, "loss": 1.5323, "step": 1468 }, { "epoch": 0.3, "learning_rate": 0.0008245185535153232, "loss": 1.5173, "step": 1469 }, { "epoch": 0.3, "learning_rate": 0.0008242693711794831, "loss": 1.5011, "step": 1470 }, { "epoch": 0.3, "learning_rate": 0.0008240200497631497, "loss": 1.5305, "step": 1471 }, { "epoch": 0.3, "learning_rate": 0.0008237705893732581, "loss": 1.5069, "step": 1472 }, { "epoch": 0.3, "learning_rate": 0.0008235209901168026, "loss": 1.477, "step": 1473 }, { "epoch": 0.3, "learning_rate": 0.0008232712521008379, "loss": 1.523, "step": 1474 }, { "epoch": 0.3, "learning_rate": 0.0008230213754324773, "loss": 1.5092, "step": 1475 }, { "epoch": 0.3, "learning_rate": 0.000822771360218894, "loss": 1.5432, "step": 1476 }, { "epoch": 0.3, "learning_rate": 0.0008225212065673205, "loss": 1.5212, "step": 1477 }, { "epoch": 0.3, "learning_rate": 0.000822270914585049, "loss": 1.5133, "step": 1478 }, { "epoch": 0.3, "learning_rate": 0.0008220204843794306, "loss": 1.4819, "step": 1479 }, { "epoch": 0.3, "learning_rate": 0.000821769916057876, "loss": 1.4365, "step": 1480 }, { "epoch": 0.3, "learning_rate": 0.0008215192097278548, "loss": 1.4892, "step": 1481 }, { "epoch": 0.3, "learning_rate": 0.000821268365496896, "loss": 1.4891, "step": 1482 }, { "epoch": 0.3, "learning_rate": 0.000821017383472588, "loss": 1.5007, "step": 1483 }, { "epoch": 0.3, "learning_rate": 0.0008207662637625779, "loss": 1.5186, "step": 1484 }, { "epoch": 0.3, "learning_rate": 0.0008205150064745719, "loss": 1.5141, "step": 1485 }, { "epoch": 0.3, "learning_rate": 0.0008202636117163356, "loss": 1.471, "step": 1486 }, { "epoch": 0.3, "learning_rate": 0.0008200120795956928, "loss": 1.5047, "step": 1487 }, { "epoch": 0.3, "learning_rate": 0.000819760410220527, "loss": 1.4888, "step": 1488 }, { "epoch": 0.3, "learning_rate": 0.0008195086036987805, "loss": 1.524, "step": 1489 }, { "epoch": 0.3, "learning_rate": 0.0008192566601384534, "loss": 1.5148, "step": 1490 }, { "epoch": 0.3, "learning_rate": 0.000819004579647606, "loss": 1.5097, "step": 1491 }, { "epoch": 0.3, "learning_rate": 0.0008187523623343566, "loss": 1.4529, "step": 1492 }, { "epoch": 0.3, "learning_rate": 0.0008185000083068821, "loss": 1.5111, "step": 1493 }, { "epoch": 0.3, "learning_rate": 0.000818247517673418, "loss": 1.4389, "step": 1494 }, { "epoch": 0.3, "learning_rate": 0.0008179948905422593, "loss": 1.4923, "step": 1495 }, { "epoch": 0.3, "learning_rate": 0.0008177421270217582, "loss": 1.4887, "step": 1496 }, { "epoch": 0.3, "learning_rate": 0.0008174892272203261, "loss": 1.4971, "step": 1497 }, { "epoch": 0.3, "learning_rate": 0.000817236191246433, "loss": 1.5223, "step": 1498 }, { "epoch": 0.3, "learning_rate": 0.0008169830192086071, "loss": 1.4876, "step": 1499 }, { "epoch": 0.3, "learning_rate": 0.0008167297112154346, "loss": 1.5135, "step": 1500 }, { "epoch": 0.3, "learning_rate": 0.0008164762673755609, "loss": 1.4993, "step": 1501 }, { "epoch": 0.3, "learning_rate": 0.0008162226877976886, "loss": 1.4967, "step": 1502 }, { "epoch": 0.3, "learning_rate": 0.0008159689725905795, "loss": 1.4846, "step": 1503 }, { "epoch": 0.3, "learning_rate": 0.0008157151218630527, "loss": 1.527, "step": 1504 }, { "epoch": 0.3, "learning_rate": 0.000815461135723986, "loss": 1.4661, "step": 1505 }, { "epoch": 0.3, "learning_rate": 0.0008152070142823153, "loss": 1.5337, "step": 1506 }, { "epoch": 0.3, "learning_rate": 0.0008149527576470342, "loss": 1.5238, "step": 1507 }, { "epoch": 0.3, "learning_rate": 0.0008146983659271942, "loss": 1.4479, "step": 1508 }, { "epoch": 0.31, "learning_rate": 0.0008144438392319055, "loss": 1.5335, "step": 1509 }, { "epoch": 0.31, "learning_rate": 0.0008141891776703354, "loss": 1.4987, "step": 1510 }, { "epoch": 0.31, "learning_rate": 0.0008139343813517092, "loss": 1.4778, "step": 1511 }, { "epoch": 0.31, "learning_rate": 0.0008136794503853102, "loss": 1.4737, "step": 1512 }, { "epoch": 0.31, "learning_rate": 0.0008134243848804794, "loss": 1.5144, "step": 1513 }, { "epoch": 0.31, "learning_rate": 0.0008131691849466153, "loss": 1.5394, "step": 1514 }, { "epoch": 0.31, "learning_rate": 0.0008129138506931745, "loss": 1.5289, "step": 1515 }, { "epoch": 0.31, "learning_rate": 0.0008126583822296707, "loss": 1.4855, "step": 1516 }, { "epoch": 0.31, "learning_rate": 0.0008124027796656757, "loss": 1.5759, "step": 1517 }, { "epoch": 0.31, "learning_rate": 0.000812147043110818, "loss": 1.5203, "step": 1518 }, { "epoch": 0.31, "learning_rate": 0.0008118911726747847, "loss": 1.4889, "step": 1519 }, { "epoch": 0.31, "learning_rate": 0.000811635168467319, "loss": 1.537, "step": 1520 }, { "epoch": 0.31, "learning_rate": 0.0008113790305982227, "loss": 1.4959, "step": 1521 }, { "epoch": 0.31, "learning_rate": 0.0008111227591773544, "loss": 1.5228, "step": 1522 }, { "epoch": 0.31, "learning_rate": 0.0008108663543146298, "loss": 1.4637, "step": 1523 }, { "epoch": 0.31, "learning_rate": 0.0008106098161200219, "loss": 1.4778, "step": 1524 }, { "epoch": 0.31, "learning_rate": 0.0008103531447035613, "loss": 1.4674, "step": 1525 }, { "epoch": 0.31, "learning_rate": 0.0008100963401753354, "loss": 1.5111, "step": 1526 }, { "epoch": 0.31, "learning_rate": 0.0008098394026454885, "loss": 1.489, "step": 1527 }, { "epoch": 0.31, "learning_rate": 0.0008095823322242224, "loss": 1.5055, "step": 1528 }, { "epoch": 0.31, "learning_rate": 0.0008093251290217958, "loss": 1.5001, "step": 1529 }, { "epoch": 0.31, "learning_rate": 0.0008090677931485239, "loss": 1.5466, "step": 1530 }, { "epoch": 0.31, "learning_rate": 0.0008088103247147793, "loss": 1.4982, "step": 1531 }, { "epoch": 0.31, "learning_rate": 0.0008085527238309913, "loss": 1.4761, "step": 1532 }, { "epoch": 0.31, "learning_rate": 0.000808294990607646, "loss": 1.5016, "step": 1533 }, { "epoch": 0.31, "learning_rate": 0.0008080371251552863, "loss": 1.5421, "step": 1534 }, { "epoch": 0.31, "learning_rate": 0.0008077791275845118, "loss": 1.52, "step": 1535 }, { "epoch": 0.31, "learning_rate": 0.0008075209980059786, "loss": 1.5172, "step": 1536 }, { "epoch": 0.31, "learning_rate": 0.0008072627365303995, "loss": 1.5015, "step": 1537 }, { "epoch": 0.31, "learning_rate": 0.0008070043432685441, "loss": 1.5191, "step": 1538 }, { "epoch": 0.31, "learning_rate": 0.0008067458183312384, "loss": 1.4611, "step": 1539 }, { "epoch": 0.31, "learning_rate": 0.0008064871618293645, "loss": 1.4975, "step": 1540 }, { "epoch": 0.31, "learning_rate": 0.0008062283738738619, "loss": 1.5112, "step": 1541 }, { "epoch": 0.31, "learning_rate": 0.0008059694545757251, "loss": 1.5197, "step": 1542 }, { "epoch": 0.31, "learning_rate": 0.0008057104040460061, "loss": 1.4843, "step": 1543 }, { "epoch": 0.31, "learning_rate": 0.0008054512223958126, "loss": 1.4639, "step": 1544 }, { "epoch": 0.31, "learning_rate": 0.000805191909736309, "loss": 1.527, "step": 1545 }, { "epoch": 0.31, "learning_rate": 0.0008049324661787149, "loss": 1.5261, "step": 1546 }, { "epoch": 0.31, "learning_rate": 0.0008046728918343076, "loss": 1.5325, "step": 1547 }, { "epoch": 0.31, "learning_rate": 0.0008044131868144191, "loss": 1.497, "step": 1548 }, { "epoch": 0.31, "learning_rate": 0.000804153351230438, "loss": 1.5157, "step": 1549 }, { "epoch": 0.31, "learning_rate": 0.0008038933851938091, "loss": 1.4751, "step": 1550 }, { "epoch": 0.31, "learning_rate": 0.0008036332888160327, "loss": 1.4609, "step": 1551 }, { "epoch": 0.31, "learning_rate": 0.0008033730622086652, "loss": 1.4858, "step": 1552 }, { "epoch": 0.31, "learning_rate": 0.000803112705483319, "loss": 1.4739, "step": 1553 }, { "epoch": 0.31, "learning_rate": 0.0008028522187516622, "loss": 1.5114, "step": 1554 }, { "epoch": 0.31, "learning_rate": 0.0008025916021254187, "loss": 1.4305, "step": 1555 }, { "epoch": 0.31, "learning_rate": 0.000802330855716368, "loss": 1.5026, "step": 1556 }, { "epoch": 0.31, "learning_rate": 0.0008020699796363452, "loss": 1.5379, "step": 1557 }, { "epoch": 0.31, "learning_rate": 0.0008018089739972412, "loss": 1.445, "step": 1558 }, { "epoch": 0.32, "learning_rate": 0.0008015478389110027, "loss": 1.487, "step": 1559 }, { "epoch": 0.32, "learning_rate": 0.0008012865744896312, "loss": 1.5434, "step": 1560 }, { "epoch": 0.32, "learning_rate": 0.0008010251808451844, "loss": 1.4516, "step": 1561 }, { "epoch": 0.32, "learning_rate": 0.0008007636580897753, "loss": 1.5433, "step": 1562 }, { "epoch": 0.32, "learning_rate": 0.0008005020063355714, "loss": 1.4959, "step": 1563 }, { "epoch": 0.32, "learning_rate": 0.000800240225694797, "loss": 1.4783, "step": 1564 }, { "epoch": 0.32, "learning_rate": 0.0007999783162797305, "loss": 1.5285, "step": 1565 }, { "epoch": 0.32, "learning_rate": 0.0007997162782027061, "loss": 1.4707, "step": 1566 }, { "epoch": 0.32, "learning_rate": 0.0007994541115761129, "loss": 1.5161, "step": 1567 }, { "epoch": 0.32, "learning_rate": 0.0007991918165123954, "loss": 1.5051, "step": 1568 }, { "epoch": 0.32, "learning_rate": 0.000798929393124053, "loss": 1.5001, "step": 1569 }, { "epoch": 0.32, "learning_rate": 0.0007986668415236401, "loss": 1.4729, "step": 1570 }, { "epoch": 0.32, "learning_rate": 0.0007984041618237664, "loss": 1.5259, "step": 1571 }, { "epoch": 0.32, "learning_rate": 0.0007981413541370961, "loss": 1.4965, "step": 1572 }, { "epoch": 0.32, "learning_rate": 0.0007978784185763486, "loss": 1.4696, "step": 1573 }, { "epoch": 0.32, "learning_rate": 0.0007976153552542982, "loss": 1.4868, "step": 1574 }, { "epoch": 0.32, "learning_rate": 0.0007973521642837736, "loss": 1.4855, "step": 1575 }, { "epoch": 0.32, "learning_rate": 0.0007970888457776588, "loss": 1.4907, "step": 1576 }, { "epoch": 0.32, "learning_rate": 0.000796825399848892, "loss": 1.4535, "step": 1577 }, { "epoch": 0.32, "learning_rate": 0.0007965618266104664, "loss": 1.548, "step": 1578 }, { "epoch": 0.32, "learning_rate": 0.0007962981261754295, "loss": 1.5268, "step": 1579 }, { "epoch": 0.32, "learning_rate": 0.0007960342986568836, "loss": 1.4937, "step": 1580 }, { "epoch": 0.32, "learning_rate": 0.0007957703441679856, "loss": 1.4521, "step": 1581 }, { "epoch": 0.32, "learning_rate": 0.0007955062628219463, "loss": 1.4917, "step": 1582 }, { "epoch": 0.32, "learning_rate": 0.0007952420547320316, "loss": 1.4644, "step": 1583 }, { "epoch": 0.32, "learning_rate": 0.0007949777200115615, "loss": 1.4901, "step": 1584 }, { "epoch": 0.32, "learning_rate": 0.0007947132587739101, "loss": 1.5209, "step": 1585 }, { "epoch": 0.32, "learning_rate": 0.000794448671132506, "loss": 1.4633, "step": 1586 }, { "epoch": 0.32, "learning_rate": 0.000794183957200832, "loss": 1.4997, "step": 1587 }, { "epoch": 0.32, "learning_rate": 0.0007939191170924249, "loss": 1.4875, "step": 1588 }, { "epoch": 0.32, "learning_rate": 0.0007936541509208757, "loss": 1.4693, "step": 1589 }, { "epoch": 0.32, "learning_rate": 0.0007933890587998297, "loss": 1.4752, "step": 1590 }, { "epoch": 0.32, "learning_rate": 0.0007931238408429859, "loss": 1.4481, "step": 1591 }, { "epoch": 0.32, "learning_rate": 0.0007928584971640975, "loss": 1.5159, "step": 1592 }, { "epoch": 0.32, "learning_rate": 0.0007925930278769713, "loss": 1.5013, "step": 1593 }, { "epoch": 0.32, "learning_rate": 0.0007923274330954683, "loss": 1.4636, "step": 1594 }, { "epoch": 0.32, "learning_rate": 0.0007920617129335033, "loss": 1.5256, "step": 1595 }, { "epoch": 0.32, "learning_rate": 0.0007917958675050448, "loss": 1.4818, "step": 1596 }, { "epoch": 0.32, "learning_rate": 0.000791529896924115, "loss": 1.5693, "step": 1597 }, { "epoch": 0.32, "learning_rate": 0.0007912638013047895, "loss": 1.5324, "step": 1598 }, { "epoch": 0.32, "learning_rate": 0.0007909975807611982, "loss": 1.5336, "step": 1599 }, { "epoch": 0.32, "learning_rate": 0.000790731235407524, "loss": 1.514, "step": 1600 }, { "epoch": 0.32, "learning_rate": 0.0007904647653580036, "loss": 1.4885, "step": 1601 }, { "epoch": 0.32, "learning_rate": 0.0007901981707269273, "loss": 1.4889, "step": 1602 }, { "epoch": 0.32, "learning_rate": 0.0007899314516286385, "loss": 1.5105, "step": 1603 }, { "epoch": 0.32, "learning_rate": 0.0007896646081775339, "loss": 1.535, "step": 1604 }, { "epoch": 0.32, "learning_rate": 0.0007893976404880641, "loss": 1.4738, "step": 1605 }, { "epoch": 0.32, "learning_rate": 0.0007891305486747327, "loss": 1.4881, "step": 1606 }, { "epoch": 0.32, "learning_rate": 0.0007888633328520963, "loss": 1.512, "step": 1607 }, { "epoch": 0.33, "learning_rate": 0.000788595993134765, "loss": 1.4818, "step": 1608 }, { "epoch": 0.33, "learning_rate": 0.0007883285296374018, "loss": 1.4841, "step": 1609 }, { "epoch": 0.33, "learning_rate": 0.000788060942474723, "loss": 1.4916, "step": 1610 }, { "epoch": 0.33, "learning_rate": 0.000787793231761498, "loss": 1.5255, "step": 1611 }, { "epoch": 0.33, "learning_rate": 0.0007875253976125488, "loss": 1.52, "step": 1612 }, { "epoch": 0.33, "learning_rate": 0.0007872574401427506, "loss": 1.4688, "step": 1613 }, { "epoch": 0.33, "learning_rate": 0.0007869893594670316, "loss": 1.5304, "step": 1614 }, { "epoch": 0.33, "learning_rate": 0.0007867211557003727, "loss": 1.5066, "step": 1615 }, { "epoch": 0.33, "learning_rate": 0.0007864528289578077, "loss": 1.49, "step": 1616 }, { "epoch": 0.33, "learning_rate": 0.000786184379354423, "loss": 1.4746, "step": 1617 }, { "epoch": 0.33, "learning_rate": 0.0007859158070053577, "loss": 1.4568, "step": 1618 }, { "epoch": 0.33, "learning_rate": 0.0007856471120258037, "loss": 1.4828, "step": 1619 }, { "epoch": 0.33, "learning_rate": 0.0007853782945310053, "loss": 1.4878, "step": 1620 }, { "epoch": 0.33, "learning_rate": 0.0007851093546362596, "loss": 1.5154, "step": 1621 }, { "epoch": 0.33, "learning_rate": 0.0007848402924569158, "loss": 1.4918, "step": 1622 }, { "epoch": 0.33, "learning_rate": 0.0007845711081083761, "loss": 1.5194, "step": 1623 }, { "epoch": 0.33, "learning_rate": 0.0007843018017060946, "loss": 1.4911, "step": 1624 }, { "epoch": 0.33, "learning_rate": 0.0007840323733655779, "loss": 1.4721, "step": 1625 }, { "epoch": 0.33, "learning_rate": 0.0007837628232023852, "loss": 1.5026, "step": 1626 }, { "epoch": 0.33, "learning_rate": 0.0007834931513321275, "loss": 1.5541, "step": 1627 }, { "epoch": 0.33, "learning_rate": 0.0007832233578704682, "loss": 1.5034, "step": 1628 }, { "epoch": 0.33, "learning_rate": 0.0007829534429331229, "loss": 1.4796, "step": 1629 }, { "epoch": 0.33, "learning_rate": 0.0007826834066358592, "loss": 1.5192, "step": 1630 }, { "epoch": 0.33, "learning_rate": 0.0007824132490944967, "loss": 1.4391, "step": 1631 }, { "epoch": 0.33, "learning_rate": 0.0007821429704249074, "loss": 1.4891, "step": 1632 }, { "epoch": 0.33, "learning_rate": 0.0007818725707430148, "loss": 1.4921, "step": 1633 }, { "epoch": 0.33, "learning_rate": 0.0007816020501647944, "loss": 1.4764, "step": 1634 }, { "epoch": 0.33, "learning_rate": 0.0007813314088062738, "loss": 1.4909, "step": 1635 }, { "epoch": 0.33, "learning_rate": 0.0007810606467835318, "loss": 1.4907, "step": 1636 }, { "epoch": 0.33, "learning_rate": 0.0007807897642126998, "loss": 1.5153, "step": 1637 }, { "epoch": 0.33, "learning_rate": 0.0007805187612099602, "loss": 1.5098, "step": 1638 }, { "epoch": 0.33, "learning_rate": 0.0007802476378915475, "loss": 1.5163, "step": 1639 }, { "epoch": 0.33, "learning_rate": 0.0007799763943737475, "loss": 1.4579, "step": 1640 }, { "epoch": 0.33, "learning_rate": 0.0007797050307728979, "loss": 1.4945, "step": 1641 }, { "epoch": 0.33, "learning_rate": 0.0007794335472053874, "loss": 1.4783, "step": 1642 }, { "epoch": 0.33, "learning_rate": 0.0007791619437876566, "loss": 1.4946, "step": 1643 }, { "epoch": 0.33, "learning_rate": 0.0007788902206361973, "loss": 1.5123, "step": 1644 }, { "epoch": 0.33, "learning_rate": 0.0007786183778675526, "loss": 1.527, "step": 1645 }, { "epoch": 0.33, "learning_rate": 0.0007783464155983173, "loss": 1.4693, "step": 1646 }, { "epoch": 0.33, "learning_rate": 0.0007780743339451371, "loss": 1.4465, "step": 1647 }, { "epoch": 0.33, "learning_rate": 0.0007778021330247085, "loss": 1.4875, "step": 1648 }, { "epoch": 0.33, "learning_rate": 0.0007775298129537801, "loss": 1.4802, "step": 1649 }, { "epoch": 0.33, "learning_rate": 0.000777257373849151, "loss": 1.4962, "step": 1650 }, { "epoch": 0.33, "learning_rate": 0.0007769848158276715, "loss": 1.5125, "step": 1651 }, { "epoch": 0.33, "learning_rate": 0.0007767121390062426, "loss": 1.5026, "step": 1652 }, { "epoch": 0.33, "learning_rate": 0.0007764393435018169, "loss": 1.5332, "step": 1653 }, { "epoch": 0.33, "learning_rate": 0.0007761664294313975, "loss": 1.4958, "step": 1654 }, { "epoch": 0.33, "learning_rate": 0.0007758933969120381, "loss": 1.4971, "step": 1655 }, { "epoch": 0.33, "learning_rate": 0.0007756202460608438, "loss": 1.5314, "step": 1656 }, { "epoch": 0.33, "learning_rate": 0.0007753469769949701, "loss": 1.4888, "step": 1657 }, { "epoch": 0.34, "learning_rate": 0.000775073589831623, "loss": 1.5237, "step": 1658 }, { "epoch": 0.34, "learning_rate": 0.0007748000846880596, "loss": 1.506, "step": 1659 }, { "epoch": 0.34, "learning_rate": 0.0007745264616815875, "loss": 1.4858, "step": 1660 }, { "epoch": 0.34, "learning_rate": 0.0007742527209295644, "loss": 1.5137, "step": 1661 }, { "epoch": 0.34, "learning_rate": 0.0007739788625493993, "loss": 1.4907, "step": 1662 }, { "epoch": 0.34, "learning_rate": 0.000773704886658551, "loss": 1.5147, "step": 1663 }, { "epoch": 0.34, "learning_rate": 0.0007734307933745287, "loss": 1.4825, "step": 1664 }, { "epoch": 0.34, "learning_rate": 0.0007731565828148926, "loss": 1.4943, "step": 1665 }, { "epoch": 0.34, "learning_rate": 0.0007728822550972523, "loss": 1.4955, "step": 1666 }, { "epoch": 0.34, "learning_rate": 0.0007726078103392684, "loss": 1.4706, "step": 1667 }, { "epoch": 0.34, "learning_rate": 0.0007723332486586514, "loss": 1.4733, "step": 1668 }, { "epoch": 0.34, "learning_rate": 0.000772058570173162, "loss": 1.4564, "step": 1669 }, { "epoch": 0.34, "learning_rate": 0.0007717837750006106, "loss": 1.5797, "step": 1670 }, { "epoch": 0.34, "learning_rate": 0.0007715088632588582, "loss": 1.486, "step": 1671 }, { "epoch": 0.34, "learning_rate": 0.000771233835065816, "loss": 1.5394, "step": 1672 }, { "epoch": 0.34, "learning_rate": 0.0007709586905394441, "loss": 1.4978, "step": 1673 }, { "epoch": 0.34, "learning_rate": 0.0007706834297977537, "loss": 1.4939, "step": 1674 }, { "epoch": 0.34, "learning_rate": 0.0007704080529588049, "loss": 1.4714, "step": 1675 }, { "epoch": 0.34, "learning_rate": 0.000770132560140708, "loss": 1.4692, "step": 1676 }, { "epoch": 0.34, "learning_rate": 0.0007698569514616231, "loss": 1.5031, "step": 1677 }, { "epoch": 0.34, "learning_rate": 0.0007695812270397601, "loss": 1.5492, "step": 1678 }, { "epoch": 0.34, "learning_rate": 0.0007693053869933782, "loss": 1.5111, "step": 1679 }, { "epoch": 0.34, "learning_rate": 0.0007690294314407862, "loss": 1.4806, "step": 1680 }, { "epoch": 0.34, "learning_rate": 0.000768753360500343, "loss": 1.4976, "step": 1681 }, { "epoch": 0.34, "learning_rate": 0.0007684771742904561, "loss": 1.4942, "step": 1682 }, { "epoch": 0.34, "learning_rate": 0.0007682008729295832, "loss": 1.4468, "step": 1683 }, { "epoch": 0.34, "learning_rate": 0.0007679244565362314, "loss": 1.4535, "step": 1684 }, { "epoch": 0.34, "learning_rate": 0.000767647925228956, "loss": 1.4805, "step": 1685 }, { "epoch": 0.34, "learning_rate": 0.0007673712791263634, "loss": 1.5266, "step": 1686 }, { "epoch": 0.34, "learning_rate": 0.0007670945183471076, "loss": 1.5073, "step": 1687 }, { "epoch": 0.34, "learning_rate": 0.0007668176430098929, "loss": 1.504, "step": 1688 }, { "epoch": 0.34, "learning_rate": 0.0007665406532334719, "loss": 1.5146, "step": 1689 }, { "epoch": 0.34, "learning_rate": 0.000766263549136647, "loss": 1.4973, "step": 1690 }, { "epoch": 0.34, "learning_rate": 0.0007659863308382691, "loss": 1.5233, "step": 1691 }, { "epoch": 0.34, "learning_rate": 0.0007657089984572383, "loss": 1.5074, "step": 1692 }, { "epoch": 0.34, "learning_rate": 0.0007654315521125037, "loss": 1.4725, "step": 1693 }, { "epoch": 0.34, "learning_rate": 0.0007651539919230633, "loss": 1.5147, "step": 1694 }, { "epoch": 0.34, "learning_rate": 0.0007648763180079637, "loss": 1.5285, "step": 1695 }, { "epoch": 0.34, "learning_rate": 0.0007645985304863003, "loss": 1.4979, "step": 1696 }, { "epoch": 0.34, "learning_rate": 0.0007643206294772176, "loss": 1.479, "step": 1697 }, { "epoch": 0.34, "learning_rate": 0.0007640426150999082, "loss": 1.4853, "step": 1698 }, { "epoch": 0.34, "learning_rate": 0.000763764487473614, "loss": 1.4979, "step": 1699 }, { "epoch": 0.34, "learning_rate": 0.0007634862467176248, "loss": 1.5001, "step": 1700 }, { "epoch": 0.34, "learning_rate": 0.0007632078929512793, "loss": 1.4943, "step": 1701 }, { "epoch": 0.34, "learning_rate": 0.0007629294262939647, "loss": 1.5224, "step": 1702 }, { "epoch": 0.34, "learning_rate": 0.0007626508468651164, "loss": 1.4845, "step": 1703 }, { "epoch": 0.34, "learning_rate": 0.0007623721547842183, "loss": 1.4951, "step": 1704 }, { "epoch": 0.34, "learning_rate": 0.0007620933501708028, "loss": 1.5068, "step": 1705 }, { "epoch": 0.34, "learning_rate": 0.00076181443314445, "loss": 1.5018, "step": 1706 }, { "epoch": 0.35, "learning_rate": 0.0007615354038247889, "loss": 1.4869, "step": 1707 }, { "epoch": 0.35, "learning_rate": 0.0007612562623314962, "loss": 1.519, "step": 1708 }, { "epoch": 0.35, "learning_rate": 0.0007609770087842969, "loss": 1.5298, "step": 1709 }, { "epoch": 0.35, "learning_rate": 0.0007606976433029639, "loss": 1.4993, "step": 1710 }, { "epoch": 0.35, "learning_rate": 0.0007604181660073184, "loss": 1.4927, "step": 1711 }, { "epoch": 0.35, "learning_rate": 0.0007601385770172293, "loss": 1.5028, "step": 1712 }, { "epoch": 0.35, "learning_rate": 0.0007598588764526136, "loss": 1.4896, "step": 1713 }, { "epoch": 0.35, "learning_rate": 0.0007595790644334358, "loss": 1.4555, "step": 1714 }, { "epoch": 0.35, "learning_rate": 0.0007592991410797086, "loss": 1.4764, "step": 1715 }, { "epoch": 0.35, "learning_rate": 0.0007590191065114924, "loss": 1.5062, "step": 1716 }, { "epoch": 0.35, "learning_rate": 0.000758738960848895, "loss": 1.5279, "step": 1717 }, { "epoch": 0.35, "learning_rate": 0.0007584587042120723, "loss": 1.4457, "step": 1718 }, { "epoch": 0.35, "learning_rate": 0.0007581783367212274, "loss": 1.4952, "step": 1719 }, { "epoch": 0.35, "learning_rate": 0.0007578978584966109, "loss": 1.5168, "step": 1720 }, { "epoch": 0.35, "learning_rate": 0.0007576172696585216, "loss": 1.5132, "step": 1721 }, { "epoch": 0.35, "learning_rate": 0.0007573365703273045, "loss": 1.4947, "step": 1722 }, { "epoch": 0.35, "learning_rate": 0.0007570557606233533, "loss": 1.5291, "step": 1723 }, { "epoch": 0.35, "learning_rate": 0.0007567748406671084, "loss": 1.4822, "step": 1724 }, { "epoch": 0.35, "learning_rate": 0.0007564938105790573, "loss": 1.5022, "step": 1725 }, { "epoch": 0.35, "learning_rate": 0.0007562126704797352, "loss": 1.5218, "step": 1726 }, { "epoch": 0.35, "learning_rate": 0.000755931420489724, "loss": 1.4724, "step": 1727 }, { "epoch": 0.35, "learning_rate": 0.0007556500607296534, "loss": 1.5057, "step": 1728 }, { "epoch": 0.35, "learning_rate": 0.0007553685913201994, "loss": 1.5272, "step": 1729 }, { "epoch": 0.35, "learning_rate": 0.0007550870123820857, "loss": 1.4853, "step": 1730 }, { "epoch": 0.35, "learning_rate": 0.0007548053240360826, "loss": 1.5039, "step": 1731 }, { "epoch": 0.35, "learning_rate": 0.0007545235264030072, "loss": 1.4535, "step": 1732 }, { "epoch": 0.35, "learning_rate": 0.000754241619603724, "loss": 1.4972, "step": 1733 }, { "epoch": 0.35, "learning_rate": 0.0007539596037591438, "loss": 1.5054, "step": 1734 }, { "epoch": 0.35, "learning_rate": 0.0007536774789902246, "loss": 1.5341, "step": 1735 }, { "epoch": 0.35, "learning_rate": 0.0007533952454179707, "loss": 1.452, "step": 1736 }, { "epoch": 0.35, "learning_rate": 0.0007531129031634332, "loss": 1.497, "step": 1737 }, { "epoch": 0.35, "learning_rate": 0.0007528304523477099, "loss": 1.4912, "step": 1738 }, { "epoch": 0.35, "learning_rate": 0.0007525478930919453, "loss": 1.4897, "step": 1739 }, { "epoch": 0.35, "learning_rate": 0.0007522652255173303, "loss": 1.4811, "step": 1740 }, { "epoch": 0.35, "learning_rate": 0.0007519824497451019, "loss": 1.4956, "step": 1741 }, { "epoch": 0.35, "learning_rate": 0.0007516995658965442, "loss": 1.5261, "step": 1742 }, { "epoch": 0.35, "learning_rate": 0.000751416574092987, "loss": 1.479, "step": 1743 }, { "epoch": 0.35, "learning_rate": 0.0007511334744558067, "loss": 1.4753, "step": 1744 }, { "epoch": 0.35, "learning_rate": 0.0007508502671064259, "loss": 1.4946, "step": 1745 }, { "epoch": 0.35, "learning_rate": 0.0007505669521663136, "loss": 1.5335, "step": 1746 }, { "epoch": 0.35, "learning_rate": 0.0007502835297569845, "loss": 1.5294, "step": 1747 }, { "epoch": 0.35, "learning_rate": 0.00075, "loss": 1.4947, "step": 1748 }, { "epoch": 0.35, "learning_rate": 0.000749716363016967, "loss": 1.4865, "step": 1749 }, { "epoch": 0.35, "learning_rate": 0.0007494326189295384, "loss": 1.5085, "step": 1750 }, { "epoch": 0.35, "learning_rate": 0.0007491487678594137, "loss": 1.4393, "step": 1751 }, { "epoch": 0.35, "learning_rate": 0.0007488648099283373, "loss": 1.474, "step": 1752 }, { "epoch": 0.35, "learning_rate": 0.0007485807452581002, "loss": 1.4782, "step": 1753 }, { "epoch": 0.35, "learning_rate": 0.0007482965739705391, "loss": 1.463, "step": 1754 }, { "epoch": 0.35, "learning_rate": 0.0007480122961875359, "loss": 1.508, "step": 1755 }, { "epoch": 0.35, "learning_rate": 0.0007477279120310189, "loss": 1.5167, "step": 1756 }, { "epoch": 0.36, "learning_rate": 0.0007474434216229613, "loss": 1.4875, "step": 1757 }, { "epoch": 0.36, "learning_rate": 0.0007471588250853824, "loss": 1.5112, "step": 1758 }, { "epoch": 0.36, "learning_rate": 0.0007468741225403469, "loss": 1.494, "step": 1759 }, { "epoch": 0.36, "learning_rate": 0.0007465893141099649, "loss": 1.4985, "step": 1760 }, { "epoch": 0.36, "learning_rate": 0.0007463043999163918, "loss": 1.4843, "step": 1761 }, { "epoch": 0.36, "learning_rate": 0.0007460193800818286, "loss": 1.4695, "step": 1762 }, { "epoch": 0.36, "learning_rate": 0.0007457342547285217, "loss": 1.5065, "step": 1763 }, { "epoch": 0.36, "learning_rate": 0.0007454490239787621, "loss": 1.4793, "step": 1764 }, { "epoch": 0.36, "learning_rate": 0.0007451636879548867, "loss": 1.5214, "step": 1765 }, { "epoch": 0.36, "learning_rate": 0.0007448782467792775, "loss": 1.4611, "step": 1766 }, { "epoch": 0.36, "learning_rate": 0.0007445927005743611, "loss": 1.4756, "step": 1767 }, { "epoch": 0.36, "learning_rate": 0.0007443070494626096, "loss": 1.501, "step": 1768 }, { "epoch": 0.36, "learning_rate": 0.0007440212935665401, "loss": 1.5409, "step": 1769 }, { "epoch": 0.36, "learning_rate": 0.0007437354330087143, "loss": 1.4805, "step": 1770 }, { "epoch": 0.36, "learning_rate": 0.0007434494679117391, "loss": 1.511, "step": 1771 }, { "epoch": 0.36, "learning_rate": 0.000743163398398266, "loss": 1.5042, "step": 1772 }, { "epoch": 0.36, "learning_rate": 0.0007428772245909916, "loss": 1.5058, "step": 1773 }, { "epoch": 0.36, "learning_rate": 0.0007425909466126568, "loss": 1.5175, "step": 1774 }, { "epoch": 0.36, "learning_rate": 0.0007423045645860478, "loss": 1.4792, "step": 1775 }, { "epoch": 0.36, "learning_rate": 0.0007420180786339947, "loss": 1.4671, "step": 1776 }, { "epoch": 0.36, "learning_rate": 0.0007417314888793727, "loss": 1.4505, "step": 1777 }, { "epoch": 0.36, "learning_rate": 0.0007414447954451013, "loss": 1.4858, "step": 1778 }, { "epoch": 0.36, "learning_rate": 0.0007411579984541446, "loss": 1.5096, "step": 1779 }, { "epoch": 0.36, "learning_rate": 0.0007408710980295108, "loss": 1.5132, "step": 1780 }, { "epoch": 0.36, "learning_rate": 0.0007405840942942529, "loss": 1.4623, "step": 1781 }, { "epoch": 0.36, "learning_rate": 0.000740296987371468, "loss": 1.4544, "step": 1782 }, { "epoch": 0.36, "learning_rate": 0.0007400097773842975, "loss": 1.5189, "step": 1783 }, { "epoch": 0.36, "learning_rate": 0.0007397224644559266, "loss": 1.5372, "step": 1784 }, { "epoch": 0.36, "learning_rate": 0.0007394350487095857, "loss": 1.4804, "step": 1785 }, { "epoch": 0.36, "learning_rate": 0.0007391475302685479, "loss": 1.5121, "step": 1786 }, { "epoch": 0.36, "learning_rate": 0.0007388599092561315, "loss": 1.4988, "step": 1787 }, { "epoch": 0.36, "learning_rate": 0.0007385721857956982, "loss": 1.4449, "step": 1788 }, { "epoch": 0.36, "learning_rate": 0.000738284360010654, "loss": 1.5033, "step": 1789 }, { "epoch": 0.36, "learning_rate": 0.0007379964320244482, "loss": 1.4744, "step": 1790 }, { "epoch": 0.36, "learning_rate": 0.0007377084019605747, "loss": 1.5309, "step": 1791 }, { "epoch": 0.36, "learning_rate": 0.0007374202699425707, "loss": 1.4867, "step": 1792 }, { "epoch": 0.36, "learning_rate": 0.0007371320360940171, "loss": 1.4815, "step": 1793 }, { "epoch": 0.36, "learning_rate": 0.0007368437005385389, "loss": 1.4928, "step": 1794 }, { "epoch": 0.36, "learning_rate": 0.0007365552633998041, "loss": 1.4467, "step": 1795 }, { "epoch": 0.36, "learning_rate": 0.0007362667248015245, "loss": 1.4689, "step": 1796 }, { "epoch": 0.36, "learning_rate": 0.0007359780848674561, "loss": 1.4865, "step": 1797 }, { "epoch": 0.36, "learning_rate": 0.0007356893437213971, "loss": 1.5001, "step": 1798 }, { "epoch": 0.36, "learning_rate": 0.0007354005014871903, "loss": 1.5012, "step": 1799 }, { "epoch": 0.36, "learning_rate": 0.0007351115582887211, "loss": 1.5134, "step": 1800 }, { "epoch": 0.36, "learning_rate": 0.0007348225142499186, "loss": 1.4938, "step": 1801 }, { "epoch": 0.36, "learning_rate": 0.0007345333694947547, "loss": 1.5065, "step": 1802 }, { "epoch": 0.36, "learning_rate": 0.0007342441241472451, "loss": 1.4863, "step": 1803 }, { "epoch": 0.36, "learning_rate": 0.0007339547783314482, "loss": 1.5337, "step": 1804 }, { "epoch": 0.36, "learning_rate": 0.0007336653321714657, "loss": 1.5012, "step": 1805 }, { "epoch": 0.37, "learning_rate": 0.0007333757857914423, "loss": 1.4671, "step": 1806 }, { "epoch": 0.37, "learning_rate": 0.0007330861393155656, "loss": 1.4909, "step": 1807 }, { "epoch": 0.37, "learning_rate": 0.0007327963928680661, "loss": 1.4903, "step": 1808 }, { "epoch": 0.37, "learning_rate": 0.0007325065465732175, "loss": 1.5238, "step": 1809 }, { "epoch": 0.37, "learning_rate": 0.0007322166005553358, "loss": 1.5246, "step": 1810 }, { "epoch": 0.37, "learning_rate": 0.0007319265549387802, "loss": 1.4574, "step": 1811 }, { "epoch": 0.37, "learning_rate": 0.0007316364098479527, "loss": 1.5062, "step": 1812 }, { "epoch": 0.37, "learning_rate": 0.0007313461654072974, "loss": 1.4609, "step": 1813 }, { "epoch": 0.37, "learning_rate": 0.0007310558217413015, "loss": 1.5024, "step": 1814 }, { "epoch": 0.37, "learning_rate": 0.0007307653789744947, "loss": 1.4557, "step": 1815 }, { "epoch": 0.37, "learning_rate": 0.0007304748372314489, "loss": 1.4968, "step": 1816 }, { "epoch": 0.37, "learning_rate": 0.0007301841966367788, "loss": 1.4954, "step": 1817 }, { "epoch": 0.37, "learning_rate": 0.0007298934573151415, "loss": 1.5211, "step": 1818 }, { "epoch": 0.37, "learning_rate": 0.0007296026193912362, "loss": 1.5192, "step": 1819 }, { "epoch": 0.37, "learning_rate": 0.0007293116829898043, "loss": 1.4854, "step": 1820 }, { "epoch": 0.37, "learning_rate": 0.00072902064823563, "loss": 1.4763, "step": 1821 }, { "epoch": 0.37, "learning_rate": 0.0007287295152535392, "loss": 1.4848, "step": 1822 }, { "epoch": 0.37, "learning_rate": 0.0007284382841684, "loss": 1.485, "step": 1823 }, { "epoch": 0.37, "learning_rate": 0.0007281469551051226, "loss": 1.504, "step": 1824 }, { "epoch": 0.37, "learning_rate": 0.0007278555281886594, "loss": 1.4898, "step": 1825 }, { "epoch": 0.37, "learning_rate": 0.0007275640035440044, "loss": 1.4921, "step": 1826 }, { "epoch": 0.37, "learning_rate": 0.0007272723812961941, "loss": 1.4843, "step": 1827 }, { "epoch": 0.37, "learning_rate": 0.000726980661570306, "loss": 1.4734, "step": 1828 }, { "epoch": 0.37, "learning_rate": 0.0007266888444914605, "loss": 1.4811, "step": 1829 }, { "epoch": 0.37, "learning_rate": 0.0007263969301848187, "loss": 1.4823, "step": 1830 }, { "epoch": 0.37, "learning_rate": 0.000726104918775584, "loss": 1.5057, "step": 1831 }, { "epoch": 0.37, "learning_rate": 0.0007258128103890015, "loss": 1.491, "step": 1832 }, { "epoch": 0.37, "learning_rate": 0.0007255206051503575, "loss": 1.5058, "step": 1833 }, { "epoch": 0.37, "learning_rate": 0.0007252283031849801, "loss": 1.472, "step": 1834 }, { "epoch": 0.37, "learning_rate": 0.0007249359046182388, "loss": 1.5122, "step": 1835 }, { "epoch": 0.37, "learning_rate": 0.0007246434095755449, "loss": 1.456, "step": 1836 }, { "epoch": 0.37, "learning_rate": 0.0007243508181823502, "loss": 1.4842, "step": 1837 }, { "epoch": 0.37, "learning_rate": 0.0007240581305641489, "loss": 1.5164, "step": 1838 }, { "epoch": 0.37, "learning_rate": 0.0007237653468464755, "loss": 1.4905, "step": 1839 }, { "epoch": 0.37, "learning_rate": 0.0007234724671549065, "loss": 1.4885, "step": 1840 }, { "epoch": 0.37, "learning_rate": 0.000723179491615059, "loss": 1.4998, "step": 1841 }, { "epoch": 0.37, "learning_rate": 0.0007228864203525918, "loss": 1.4867, "step": 1842 }, { "epoch": 0.37, "learning_rate": 0.0007225932534932039, "loss": 1.4945, "step": 1843 }, { "epoch": 0.37, "learning_rate": 0.000722299991162636, "loss": 1.4973, "step": 1844 }, { "epoch": 0.37, "learning_rate": 0.0007220066334866697, "loss": 1.5078, "step": 1845 }, { "epoch": 0.37, "learning_rate": 0.0007217131805911271, "loss": 1.5158, "step": 1846 }, { "epoch": 0.37, "learning_rate": 0.0007214196326018715, "loss": 1.4907, "step": 1847 }, { "epoch": 0.37, "learning_rate": 0.0007211259896448069, "loss": 1.4645, "step": 1848 }, { "epoch": 0.37, "learning_rate": 0.0007208322518458778, "loss": 1.5036, "step": 1849 }, { "epoch": 0.37, "learning_rate": 0.0007205384193310699, "loss": 1.4887, "step": 1850 }, { "epoch": 0.37, "learning_rate": 0.0007202444922264089, "loss": 1.4779, "step": 1851 }, { "epoch": 0.37, "learning_rate": 0.0007199504706579617, "loss": 1.4999, "step": 1852 }, { "epoch": 0.37, "learning_rate": 0.0007196563547518349, "loss": 1.5145, "step": 1853 }, { "epoch": 0.37, "learning_rate": 0.0007193621446341765, "loss": 1.4803, "step": 1854 }, { "epoch": 0.38, "learning_rate": 0.0007190678404311743, "loss": 1.459, "step": 1855 }, { "epoch": 0.38, "learning_rate": 0.0007187734422690564, "loss": 1.5015, "step": 1856 }, { "epoch": 0.38, "learning_rate": 0.0007184789502740917, "loss": 1.5137, "step": 1857 }, { "epoch": 0.38, "learning_rate": 0.000718184364572589, "loss": 1.501, "step": 1858 }, { "epoch": 0.38, "learning_rate": 0.0007178896852908971, "loss": 1.5064, "step": 1859 }, { "epoch": 0.38, "learning_rate": 0.0007175949125554055, "loss": 1.4806, "step": 1860 }, { "epoch": 0.38, "learning_rate": 0.0007173000464925432, "loss": 1.4984, "step": 1861 }, { "epoch": 0.38, "learning_rate": 0.0007170050872287797, "loss": 1.4719, "step": 1862 }, { "epoch": 0.38, "learning_rate": 0.0007167100348906241, "loss": 1.4898, "step": 1863 }, { "epoch": 0.38, "learning_rate": 0.0007164148896046256, "loss": 1.49, "step": 1864 }, { "epoch": 0.38, "learning_rate": 0.0007161196514973735, "loss": 1.4631, "step": 1865 }, { "epoch": 0.38, "learning_rate": 0.0007158243206954964, "loss": 1.4922, "step": 1866 }, { "epoch": 0.38, "learning_rate": 0.0007155288973256631, "loss": 1.4962, "step": 1867 }, { "epoch": 0.38, "learning_rate": 0.0007152333815145817, "loss": 1.4535, "step": 1868 }, { "epoch": 0.38, "learning_rate": 0.0007149377733890001, "loss": 1.4924, "step": 1869 }, { "epoch": 0.38, "learning_rate": 0.0007146420730757064, "loss": 1.4749, "step": 1870 }, { "epoch": 0.38, "learning_rate": 0.000714346280701527, "loss": 1.492, "step": 1871 }, { "epoch": 0.38, "learning_rate": 0.0007140503963933291, "loss": 1.4865, "step": 1872 }, { "epoch": 0.38, "learning_rate": 0.0007137544202780185, "loss": 1.4999, "step": 1873 }, { "epoch": 0.38, "learning_rate": 0.0007134583524825405, "loss": 1.4972, "step": 1874 }, { "epoch": 0.38, "learning_rate": 0.0007131621931338798, "loss": 1.4985, "step": 1875 }, { "epoch": 0.38, "learning_rate": 0.0007128659423590605, "loss": 1.4739, "step": 1876 }, { "epoch": 0.38, "learning_rate": 0.0007125696002851458, "loss": 1.4675, "step": 1877 }, { "epoch": 0.38, "learning_rate": 0.000712273167039238, "loss": 1.5255, "step": 1878 }, { "epoch": 0.38, "learning_rate": 0.0007119766427484786, "loss": 1.4556, "step": 1879 }, { "epoch": 0.38, "learning_rate": 0.0007116800275400482, "loss": 1.4701, "step": 1880 }, { "epoch": 0.38, "learning_rate": 0.000711383321541166, "loss": 1.4511, "step": 1881 }, { "epoch": 0.38, "learning_rate": 0.0007110865248790909, "loss": 1.5186, "step": 1882 }, { "epoch": 0.38, "learning_rate": 0.0007107896376811198, "loss": 1.4714, "step": 1883 }, { "epoch": 0.38, "learning_rate": 0.0007104926600745891, "loss": 1.5267, "step": 1884 }, { "epoch": 0.38, "learning_rate": 0.0007101955921868738, "loss": 1.4583, "step": 1885 }, { "epoch": 0.38, "learning_rate": 0.0007098984341453873, "loss": 1.4688, "step": 1886 }, { "epoch": 0.38, "learning_rate": 0.0007096011860775822, "loss": 1.4733, "step": 1887 }, { "epoch": 0.38, "learning_rate": 0.0007093038481109494, "loss": 1.5034, "step": 1888 }, { "epoch": 0.38, "learning_rate": 0.0007090064203730182, "loss": 1.5194, "step": 1889 }, { "epoch": 0.38, "learning_rate": 0.0007087089029913567, "loss": 1.5075, "step": 1890 }, { "epoch": 0.38, "learning_rate": 0.0007084112960935716, "loss": 1.472, "step": 1891 }, { "epoch": 0.38, "learning_rate": 0.0007081135998073072, "loss": 1.4459, "step": 1892 }, { "epoch": 0.38, "learning_rate": 0.0007078158142602473, "loss": 1.45, "step": 1893 }, { "epoch": 0.38, "learning_rate": 0.000707517939580113, "loss": 1.4749, "step": 1894 }, { "epoch": 0.38, "learning_rate": 0.000707219975894664, "loss": 1.4574, "step": 1895 }, { "epoch": 0.38, "learning_rate": 0.0007069219233316983, "loss": 1.4761, "step": 1896 }, { "epoch": 0.38, "learning_rate": 0.000706623782019052, "loss": 1.5092, "step": 1897 }, { "epoch": 0.38, "learning_rate": 0.0007063255520845989, "loss": 1.4688, "step": 1898 }, { "epoch": 0.38, "learning_rate": 0.0007060272336562512, "loss": 1.4887, "step": 1899 }, { "epoch": 0.38, "learning_rate": 0.0007057288268619591, "loss": 1.4932, "step": 1900 }, { "epoch": 0.38, "learning_rate": 0.0007054303318297101, "loss": 1.4537, "step": 1901 }, { "epoch": 0.38, "learning_rate": 0.0007051317486875303, "loss": 1.5131, "step": 1902 }, { "epoch": 0.38, "learning_rate": 0.0007048330775634833, "loss": 1.4824, "step": 1903 }, { "epoch": 0.38, "learning_rate": 0.00070453431858567, "loss": 1.4748, "step": 1904 }, { "epoch": 0.39, "learning_rate": 0.0007042354718822297, "loss": 1.4757, "step": 1905 }, { "epoch": 0.39, "learning_rate": 0.0007039365375813391, "loss": 1.5088, "step": 1906 }, { "epoch": 0.39, "learning_rate": 0.0007036375158112121, "loss": 1.4586, "step": 1907 }, { "epoch": 0.39, "learning_rate": 0.0007033384067001006, "loss": 1.4826, "step": 1908 }, { "epoch": 0.39, "learning_rate": 0.0007030392103762937, "loss": 1.4672, "step": 1909 }, { "epoch": 0.39, "learning_rate": 0.0007027399269681178, "loss": 1.4924, "step": 1910 }, { "epoch": 0.39, "learning_rate": 0.000702440556603937, "loss": 1.4828, "step": 1911 }, { "epoch": 0.39, "learning_rate": 0.0007021410994121524, "loss": 1.4981, "step": 1912 }, { "epoch": 0.39, "learning_rate": 0.0007018415555212026, "loss": 1.4877, "step": 1913 }, { "epoch": 0.39, "learning_rate": 0.0007015419250595632, "loss": 1.4883, "step": 1914 }, { "epoch": 0.39, "learning_rate": 0.0007012422081557467, "loss": 1.4388, "step": 1915 }, { "epoch": 0.39, "learning_rate": 0.0007009424049383032, "loss": 1.497, "step": 1916 }, { "epoch": 0.39, "learning_rate": 0.0007006425155358195, "loss": 1.4697, "step": 1917 }, { "epoch": 0.39, "learning_rate": 0.0007003425400769192, "loss": 1.4596, "step": 1918 }, { "epoch": 0.39, "learning_rate": 0.0007000424786902635, "loss": 1.4839, "step": 1919 }, { "epoch": 0.39, "learning_rate": 0.0006997423315045496, "loss": 1.5108, "step": 1920 }, { "epoch": 0.39, "learning_rate": 0.000699442098648512, "loss": 1.4823, "step": 1921 }, { "epoch": 0.39, "learning_rate": 0.0006991417802509219, "loss": 1.5049, "step": 1922 }, { "epoch": 0.39, "learning_rate": 0.000698841376440587, "loss": 1.4927, "step": 1923 }, { "epoch": 0.39, "learning_rate": 0.0006985408873463517, "loss": 1.4569, "step": 1924 }, { "epoch": 0.39, "learning_rate": 0.0006982403130970972, "loss": 1.5099, "step": 1925 }, { "epoch": 0.39, "learning_rate": 0.0006979396538217411, "loss": 1.5068, "step": 1926 }, { "epoch": 0.39, "learning_rate": 0.0006976389096492372, "loss": 1.5218, "step": 1927 }, { "epoch": 0.39, "learning_rate": 0.0006973380807085763, "loss": 1.4869, "step": 1928 }, { "epoch": 0.39, "learning_rate": 0.0006970371671287847, "loss": 1.4391, "step": 1929 }, { "epoch": 0.39, "learning_rate": 0.0006967361690389258, "loss": 1.4871, "step": 1930 }, { "epoch": 0.39, "learning_rate": 0.0006964350865680991, "loss": 1.4851, "step": 1931 }, { "epoch": 0.39, "learning_rate": 0.0006961339198454398, "loss": 1.4907, "step": 1932 }, { "epoch": 0.39, "learning_rate": 0.0006958326690001196, "loss": 1.4993, "step": 1933 }, { "epoch": 0.39, "learning_rate": 0.0006955313341613464, "loss": 1.4987, "step": 1934 }, { "epoch": 0.39, "learning_rate": 0.0006952299154583639, "loss": 1.5048, "step": 1935 }, { "epoch": 0.39, "learning_rate": 0.0006949284130204519, "loss": 1.4905, "step": 1936 }, { "epoch": 0.39, "learning_rate": 0.0006946268269769261, "loss": 1.5244, "step": 1937 }, { "epoch": 0.39, "learning_rate": 0.0006943251574571379, "loss": 1.5175, "step": 1938 }, { "epoch": 0.39, "learning_rate": 0.0006940234045904746, "loss": 1.4779, "step": 1939 }, { "epoch": 0.39, "learning_rate": 0.0006937215685063594, "loss": 1.5011, "step": 1940 }, { "epoch": 0.39, "learning_rate": 0.0006934196493342511, "loss": 1.4901, "step": 1941 }, { "epoch": 0.39, "learning_rate": 0.0006931176472036438, "loss": 1.4835, "step": 1942 }, { "epoch": 0.39, "learning_rate": 0.000692815562244068, "loss": 1.5027, "step": 1943 }, { "epoch": 0.39, "learning_rate": 0.0006925133945850885, "loss": 1.5125, "step": 1944 }, { "epoch": 0.39, "learning_rate": 0.0006922111443563069, "loss": 1.457, "step": 1945 }, { "epoch": 0.39, "learning_rate": 0.0006919088116873594, "loss": 1.4979, "step": 1946 }, { "epoch": 0.39, "learning_rate": 0.0006916063967079176, "loss": 1.4892, "step": 1947 }, { "epoch": 0.39, "learning_rate": 0.0006913038995476887, "loss": 1.4797, "step": 1948 }, { "epoch": 0.39, "learning_rate": 0.0006910013203364151, "loss": 1.5079, "step": 1949 }, { "epoch": 0.39, "learning_rate": 0.000690698659203874, "loss": 1.5606, "step": 1950 }, { "epoch": 0.39, "learning_rate": 0.000690395916279878, "loss": 1.4821, "step": 1951 }, { "epoch": 0.39, "learning_rate": 0.0006900930916942753, "loss": 1.4811, "step": 1952 }, { "epoch": 0.39, "learning_rate": 0.0006897901855769483, "loss": 1.481, "step": 1953 }, { "epoch": 0.4, "learning_rate": 0.0006894871980578145, "loss": 1.4721, "step": 1954 }, { "epoch": 0.4, "learning_rate": 0.000689184129266827, "loss": 1.5061, "step": 1955 }, { "epoch": 0.4, "learning_rate": 0.0006888809793339729, "loss": 1.4773, "step": 1956 }, { "epoch": 0.4, "learning_rate": 0.0006885777483892746, "loss": 1.5305, "step": 1957 }, { "epoch": 0.4, "learning_rate": 0.0006882744365627892, "loss": 1.5083, "step": 1958 }, { "epoch": 0.4, "learning_rate": 0.0006879710439846083, "loss": 1.5199, "step": 1959 }, { "epoch": 0.4, "learning_rate": 0.0006876675707848582, "loss": 1.4847, "step": 1960 }, { "epoch": 0.4, "learning_rate": 0.0006873640170937001, "loss": 1.4868, "step": 1961 }, { "epoch": 0.4, "learning_rate": 0.0006870603830413291, "loss": 1.4813, "step": 1962 }, { "epoch": 0.4, "learning_rate": 0.0006867566687579754, "loss": 1.473, "step": 1963 }, { "epoch": 0.4, "learning_rate": 0.0006864528743739033, "loss": 1.4908, "step": 1964 }, { "epoch": 0.4, "learning_rate": 0.0006861490000194113, "loss": 1.5116, "step": 1965 }, { "epoch": 0.4, "learning_rate": 0.0006858450458248325, "loss": 1.4904, "step": 1966 }, { "epoch": 0.4, "learning_rate": 0.0006855410119205341, "loss": 1.5301, "step": 1967 }, { "epoch": 0.4, "learning_rate": 0.0006852368984369175, "loss": 1.4656, "step": 1968 }, { "epoch": 0.4, "learning_rate": 0.0006849327055044182, "loss": 1.4927, "step": 1969 }, { "epoch": 0.4, "learning_rate": 0.0006846284332535059, "loss": 1.4737, "step": 1970 }, { "epoch": 0.4, "learning_rate": 0.0006843240818146845, "loss": 1.487, "step": 1971 }, { "epoch": 0.4, "learning_rate": 0.0006840196513184912, "loss": 1.5217, "step": 1972 }, { "epoch": 0.4, "learning_rate": 0.0006837151418954976, "loss": 1.513, "step": 1973 }, { "epoch": 0.4, "learning_rate": 0.0006834105536763094, "loss": 1.4833, "step": 1974 }, { "epoch": 0.4, "learning_rate": 0.0006831058867915654, "loss": 1.4536, "step": 1975 }, { "epoch": 0.4, "learning_rate": 0.0006828011413719385, "loss": 1.4625, "step": 1976 }, { "epoch": 0.4, "learning_rate": 0.0006824963175481356, "loss": 1.4988, "step": 1977 }, { "epoch": 0.4, "learning_rate": 0.0006821914154508966, "loss": 1.5221, "step": 1978 }, { "epoch": 0.4, "learning_rate": 0.0006818864352109953, "loss": 1.4837, "step": 1979 }, { "epoch": 0.4, "learning_rate": 0.0006815813769592394, "loss": 1.4469, "step": 1980 }, { "epoch": 0.4, "learning_rate": 0.0006812762408264692, "loss": 1.4515, "step": 1981 }, { "epoch": 0.4, "learning_rate": 0.000680971026943559, "loss": 1.5124, "step": 1982 }, { "epoch": 0.4, "learning_rate": 0.0006806657354414165, "loss": 1.5391, "step": 1983 }, { "epoch": 0.4, "learning_rate": 0.0006803603664509822, "loss": 1.4982, "step": 1984 }, { "epoch": 0.4, "learning_rate": 0.0006800549201032303, "loss": 1.5174, "step": 1985 }, { "epoch": 0.4, "learning_rate": 0.0006797493965291681, "loss": 1.5013, "step": 1986 }, { "epoch": 0.4, "learning_rate": 0.0006794437958598359, "loss": 1.4927, "step": 1987 }, { "epoch": 0.4, "learning_rate": 0.000679138118226307, "loss": 1.4737, "step": 1988 }, { "epoch": 0.4, "learning_rate": 0.000678832363759688, "loss": 1.4573, "step": 1989 }, { "epoch": 0.4, "learning_rate": 0.0006785265325911181, "loss": 1.4752, "step": 1990 }, { "epoch": 0.4, "learning_rate": 0.0006782206248517695, "loss": 1.4804, "step": 1991 }, { "epoch": 0.4, "learning_rate": 0.0006779146406728477, "loss": 1.505, "step": 1992 }, { "epoch": 0.4, "learning_rate": 0.0006776085801855902, "loss": 1.5015, "step": 1993 }, { "epoch": 0.4, "learning_rate": 0.0006773024435212678, "loss": 1.4686, "step": 1994 }, { "epoch": 0.4, "learning_rate": 0.0006769962308111839, "loss": 1.4893, "step": 1995 }, { "epoch": 0.4, "learning_rate": 0.0006766899421866741, "loss": 1.532, "step": 1996 }, { "epoch": 0.4, "learning_rate": 0.0006763835777791071, "loss": 1.4744, "step": 1997 }, { "epoch": 0.4, "learning_rate": 0.000676077137719884, "loss": 1.4602, "step": 1998 }, { "epoch": 0.4, "learning_rate": 0.0006757706221404378, "loss": 1.4579, "step": 1999 }, { "epoch": 0.4, "learning_rate": 0.0006754640311722348, "loss": 1.5508, "step": 2000 }, { "epoch": 0.4, "learning_rate": 0.0006751573649467728, "loss": 1.4605, "step": 2001 }, { "epoch": 0.4, "learning_rate": 0.0006748506235955825, "loss": 1.4977, "step": 2002 }, { "epoch": 0.4, "learning_rate": 0.0006745438072502263, "loss": 1.4606, "step": 2003 }, { "epoch": 0.41, "learning_rate": 0.0006742369160422993, "loss": 1.4888, "step": 2004 }, { "epoch": 0.41, "learning_rate": 0.0006739299501034282, "loss": 1.4966, "step": 2005 }, { "epoch": 0.41, "learning_rate": 0.000673622909565272, "loss": 1.5207, "step": 2006 }, { "epoch": 0.41, "learning_rate": 0.0006733157945595218, "loss": 1.5105, "step": 2007 }, { "epoch": 0.41, "learning_rate": 0.0006730086052179003, "loss": 1.4963, "step": 2008 }, { "epoch": 0.41, "learning_rate": 0.0006727013416721625, "loss": 1.4951, "step": 2009 }, { "epoch": 0.41, "learning_rate": 0.000672394004054095, "loss": 1.5022, "step": 2010 }, { "epoch": 0.41, "learning_rate": 0.0006720865924955161, "loss": 1.4779, "step": 2011 }, { "epoch": 0.41, "learning_rate": 0.0006717791071282759, "loss": 1.5163, "step": 2012 }, { "epoch": 0.41, "learning_rate": 0.0006714715480842563, "loss": 1.4443, "step": 2013 }, { "epoch": 0.41, "learning_rate": 0.0006711639154953706, "loss": 1.4674, "step": 2014 }, { "epoch": 0.41, "learning_rate": 0.0006708562094935635, "loss": 1.4836, "step": 2015 }, { "epoch": 0.41, "learning_rate": 0.0006705484302108118, "loss": 1.4671, "step": 2016 }, { "epoch": 0.41, "learning_rate": 0.000670240577779123, "loss": 1.4738, "step": 2017 }, { "epoch": 0.41, "learning_rate": 0.0006699326523305363, "loss": 1.4834, "step": 2018 }, { "epoch": 0.41, "learning_rate": 0.0006696246539971226, "loss": 1.4547, "step": 2019 }, { "epoch": 0.41, "learning_rate": 0.0006693165829109832, "loss": 1.5, "step": 2020 }, { "epoch": 0.41, "learning_rate": 0.0006690084392042513, "loss": 1.4933, "step": 2021 }, { "epoch": 0.41, "learning_rate": 0.0006687002230090912, "loss": 1.4712, "step": 2022 }, { "epoch": 0.41, "learning_rate": 0.0006683919344576977, "loss": 1.4961, "step": 2023 }, { "epoch": 0.41, "learning_rate": 0.0006680835736822974, "loss": 1.5292, "step": 2024 }, { "epoch": 0.41, "learning_rate": 0.0006677751408151474, "loss": 1.4902, "step": 2025 }, { "epoch": 0.41, "learning_rate": 0.0006674666359885358, "loss": 1.5067, "step": 2026 }, { "epoch": 0.41, "learning_rate": 0.0006671580593347817, "loss": 1.5111, "step": 2027 }, { "epoch": 0.41, "learning_rate": 0.000666849410986235, "loss": 1.5342, "step": 2028 }, { "epoch": 0.41, "learning_rate": 0.0006665406910752761, "loss": 1.4698, "step": 2029 }, { "epoch": 0.41, "learning_rate": 0.0006662318997343162, "loss": 1.4364, "step": 2030 }, { "epoch": 0.41, "learning_rate": 0.0006659230370957974, "loss": 1.4693, "step": 2031 }, { "epoch": 0.41, "learning_rate": 0.0006656141032921922, "loss": 1.4901, "step": 2032 }, { "epoch": 0.41, "learning_rate": 0.0006653050984560035, "loss": 1.4896, "step": 2033 }, { "epoch": 0.41, "learning_rate": 0.0006649960227197648, "loss": 1.4891, "step": 2034 }, { "epoch": 0.41, "learning_rate": 0.0006646868762160399, "loss": 1.4982, "step": 2035 }, { "epoch": 0.41, "learning_rate": 0.0006643776590774231, "loss": 1.4794, "step": 2036 }, { "epoch": 0.41, "learning_rate": 0.0006640683714365388, "loss": 1.4905, "step": 2037 }, { "epoch": 0.41, "learning_rate": 0.0006637590134260422, "loss": 1.488, "step": 2038 }, { "epoch": 0.41, "learning_rate": 0.0006634495851786178, "loss": 1.4862, "step": 2039 }, { "epoch": 0.41, "learning_rate": 0.0006631400868269807, "loss": 1.4917, "step": 2040 }, { "epoch": 0.41, "learning_rate": 0.0006628305185038763, "loss": 1.4777, "step": 2041 }, { "epoch": 0.41, "learning_rate": 0.0006625208803420796, "loss": 1.4873, "step": 2042 }, { "epoch": 0.41, "learning_rate": 0.0006622111724743957, "loss": 1.482, "step": 2043 }, { "epoch": 0.41, "learning_rate": 0.0006619013950336597, "loss": 1.496, "step": 2044 }, { "epoch": 0.41, "learning_rate": 0.0006615915481527361, "loss": 1.4806, "step": 2045 }, { "epoch": 0.41, "learning_rate": 0.00066128163196452, "loss": 1.4581, "step": 2046 }, { "epoch": 0.41, "learning_rate": 0.0006609716466019355, "loss": 1.4598, "step": 2047 }, { "epoch": 0.41, "learning_rate": 0.0006606615921979366, "loss": 1.4948, "step": 2048 }, { "epoch": 0.41, "learning_rate": 0.0006603514688855071, "loss": 1.5093, "step": 2049 }, { "epoch": 0.41, "learning_rate": 0.00066004127679766, "loss": 1.4534, "step": 2050 }, { "epoch": 0.41, "learning_rate": 0.0006597310160674382, "loss": 1.482, "step": 2051 }, { "epoch": 0.41, "learning_rate": 0.0006594206868279134, "loss": 1.5063, "step": 2052 }, { "epoch": 0.42, "learning_rate": 0.0006591102892121877, "loss": 1.5027, "step": 2053 }, { "epoch": 0.42, "learning_rate": 0.0006587998233533916, "loss": 1.5147, "step": 2054 }, { "epoch": 0.42, "learning_rate": 0.0006584892893846852, "loss": 1.5312, "step": 2055 }, { "epoch": 0.42, "learning_rate": 0.000658178687439258, "loss": 1.4755, "step": 2056 }, { "epoch": 0.42, "learning_rate": 0.0006578680176503283, "loss": 1.4959, "step": 2057 }, { "epoch": 0.42, "learning_rate": 0.0006575572801511437, "loss": 1.4532, "step": 2058 }, { "epoch": 0.42, "learning_rate": 0.0006572464750749809, "loss": 1.4577, "step": 2059 }, { "epoch": 0.42, "learning_rate": 0.0006569356025551454, "loss": 1.4957, "step": 2060 }, { "epoch": 0.42, "learning_rate": 0.0006566246627249719, "loss": 1.4525, "step": 2061 }, { "epoch": 0.42, "learning_rate": 0.0006563136557178236, "loss": 1.4904, "step": 2062 }, { "epoch": 0.42, "learning_rate": 0.0006560025816670927, "loss": 1.471, "step": 2063 }, { "epoch": 0.42, "learning_rate": 0.0006556914407062004, "loss": 1.5157, "step": 2064 }, { "epoch": 0.42, "learning_rate": 0.000655380232968596, "loss": 1.5033, "step": 2065 }, { "epoch": 0.42, "learning_rate": 0.0006550689585877583, "loss": 1.5178, "step": 2066 }, { "epoch": 0.42, "learning_rate": 0.0006547576176971937, "loss": 1.4585, "step": 2067 }, { "epoch": 0.42, "learning_rate": 0.000654446210430438, "loss": 1.4702, "step": 2068 }, { "epoch": 0.42, "learning_rate": 0.0006541347369210548, "loss": 1.5129, "step": 2069 }, { "epoch": 0.42, "learning_rate": 0.0006538231973026364, "loss": 1.5134, "step": 2070 }, { "epoch": 0.42, "learning_rate": 0.0006535115917088037, "loss": 1.504, "step": 2071 }, { "epoch": 0.42, "learning_rate": 0.0006531999202732055, "loss": 1.5098, "step": 2072 }, { "epoch": 0.42, "learning_rate": 0.0006528881831295188, "loss": 1.4669, "step": 2073 }, { "epoch": 0.42, "learning_rate": 0.0006525763804114494, "loss": 1.4592, "step": 2074 }, { "epoch": 0.42, "learning_rate": 0.0006522645122527304, "loss": 1.4732, "step": 2075 }, { "epoch": 0.42, "learning_rate": 0.0006519525787871234, "loss": 1.4947, "step": 2076 }, { "epoch": 0.42, "learning_rate": 0.0006516405801484183, "loss": 1.4654, "step": 2077 }, { "epoch": 0.42, "learning_rate": 0.0006513285164704323, "loss": 1.4725, "step": 2078 }, { "epoch": 0.42, "learning_rate": 0.0006510163878870108, "loss": 1.4666, "step": 2079 }, { "epoch": 0.42, "learning_rate": 0.0006507041945320273, "loss": 1.481, "step": 2080 }, { "epoch": 0.42, "learning_rate": 0.0006503919365393827, "loss": 1.4883, "step": 2081 }, { "epoch": 0.42, "learning_rate": 0.0006500796140430057, "loss": 1.4548, "step": 2082 }, { "epoch": 0.42, "learning_rate": 0.000649767227176853, "loss": 1.4612, "step": 2083 }, { "epoch": 0.42, "learning_rate": 0.0006494547760749084, "loss": 1.4941, "step": 2084 }, { "epoch": 0.42, "learning_rate": 0.0006491422608711834, "loss": 1.4697, "step": 2085 }, { "epoch": 0.42, "learning_rate": 0.0006488296816997174, "loss": 1.5044, "step": 2086 }, { "epoch": 0.42, "learning_rate": 0.0006485170386945766, "loss": 1.4741, "step": 2087 }, { "epoch": 0.42, "learning_rate": 0.000648204331989855, "loss": 1.467, "step": 2088 }, { "epoch": 0.42, "learning_rate": 0.0006478915617196739, "loss": 1.5041, "step": 2089 }, { "epoch": 0.42, "learning_rate": 0.0006475787280181818, "loss": 1.461, "step": 2090 }, { "epoch": 0.42, "learning_rate": 0.000647265831019554, "loss": 1.4918, "step": 2091 }, { "epoch": 0.42, "learning_rate": 0.000646952870857994, "loss": 1.4483, "step": 2092 }, { "epoch": 0.42, "learning_rate": 0.0006466398476677313, "loss": 1.482, "step": 2093 }, { "epoch": 0.42, "learning_rate": 0.0006463267615830228, "loss": 1.4774, "step": 2094 }, { "epoch": 0.42, "learning_rate": 0.0006460136127381526, "loss": 1.4856, "step": 2095 }, { "epoch": 0.42, "learning_rate": 0.0006457004012674315, "loss": 1.4631, "step": 2096 }, { "epoch": 0.42, "learning_rate": 0.0006453871273051974, "loss": 1.4534, "step": 2097 }, { "epoch": 0.42, "learning_rate": 0.0006450737909858146, "loss": 1.4635, "step": 2098 }, { "epoch": 0.42, "learning_rate": 0.0006447603924436743, "loss": 1.4382, "step": 2099 }, { "epoch": 0.42, "learning_rate": 0.0006444469318131947, "loss": 1.4683, "step": 2100 }, { "epoch": 0.42, "learning_rate": 0.0006441334092288201, "loss": 1.4681, "step": 2101 }, { "epoch": 0.42, "learning_rate": 0.0006438198248250219, "loss": 1.4885, "step": 2102 }, { "epoch": 0.43, "learning_rate": 0.0006435061787362975, "loss": 1.4968, "step": 2103 }, { "epoch": 0.43, "learning_rate": 0.0006431924710971714, "loss": 1.4601, "step": 2104 }, { "epoch": 0.43, "learning_rate": 0.0006428787020421937, "loss": 1.4928, "step": 2105 }, { "epoch": 0.43, "learning_rate": 0.0006425648717059417, "loss": 1.466, "step": 2106 }, { "epoch": 0.43, "learning_rate": 0.0006422509802230181, "loss": 1.5203, "step": 2107 }, { "epoch": 0.43, "learning_rate": 0.0006419370277280527, "loss": 1.4984, "step": 2108 }, { "epoch": 0.43, "learning_rate": 0.0006416230143557007, "loss": 1.4831, "step": 2109 }, { "epoch": 0.43, "learning_rate": 0.0006413089402406437, "loss": 1.4742, "step": 2110 }, { "epoch": 0.43, "learning_rate": 0.0006409948055175898, "loss": 1.4675, "step": 2111 }, { "epoch": 0.43, "learning_rate": 0.0006406806103212725, "loss": 1.4905, "step": 2112 }, { "epoch": 0.43, "learning_rate": 0.0006403663547864513, "loss": 1.4725, "step": 2113 }, { "epoch": 0.43, "learning_rate": 0.0006400520390479119, "loss": 1.4847, "step": 2114 }, { "epoch": 0.43, "learning_rate": 0.0006397376632404657, "loss": 1.4772, "step": 2115 }, { "epoch": 0.43, "learning_rate": 0.0006394232274989494, "loss": 1.5061, "step": 2116 }, { "epoch": 0.43, "learning_rate": 0.0006391087319582263, "loss": 1.4963, "step": 2117 }, { "epoch": 0.43, "learning_rate": 0.0006387941767531846, "loss": 1.4374, "step": 2118 }, { "epoch": 0.43, "learning_rate": 0.0006384795620187385, "loss": 1.4562, "step": 2119 }, { "epoch": 0.43, "learning_rate": 0.0006381648878898274, "loss": 1.4966, "step": 2120 }, { "epoch": 0.43, "learning_rate": 0.0006378501545014164, "loss": 1.4846, "step": 2121 }, { "epoch": 0.43, "learning_rate": 0.000637535361988496, "loss": 1.4936, "step": 2122 }, { "epoch": 0.43, "learning_rate": 0.000637220510486082, "loss": 1.4406, "step": 2123 }, { "epoch": 0.43, "learning_rate": 0.0006369056001292155, "loss": 1.4596, "step": 2124 }, { "epoch": 0.43, "learning_rate": 0.000636590631052963, "loss": 1.4961, "step": 2125 }, { "epoch": 0.43, "learning_rate": 0.000636275603392416, "loss": 1.4521, "step": 2126 }, { "epoch": 0.43, "learning_rate": 0.000635960517282691, "loss": 1.4726, "step": 2127 }, { "epoch": 0.43, "learning_rate": 0.00063564537285893, "loss": 1.5007, "step": 2128 }, { "epoch": 0.43, "learning_rate": 0.0006353301702562999, "loss": 1.4725, "step": 2129 }, { "epoch": 0.43, "learning_rate": 0.0006350149096099921, "loss": 1.5076, "step": 2130 }, { "epoch": 0.43, "learning_rate": 0.0006346995910552233, "loss": 1.4812, "step": 2131 }, { "epoch": 0.43, "learning_rate": 0.000634384214727235, "loss": 1.458, "step": 2132 }, { "epoch": 0.43, "learning_rate": 0.0006340687807612933, "loss": 1.4894, "step": 2133 }, { "epoch": 0.43, "learning_rate": 0.0006337532892926892, "loss": 1.4486, "step": 2134 }, { "epoch": 0.43, "learning_rate": 0.0006334377404567386, "loss": 1.5386, "step": 2135 }, { "epoch": 0.43, "learning_rate": 0.0006331221343887814, "loss": 1.4982, "step": 2136 }, { "epoch": 0.43, "learning_rate": 0.0006328064712241824, "loss": 1.4786, "step": 2137 }, { "epoch": 0.43, "learning_rate": 0.000632490751098331, "loss": 1.523, "step": 2138 }, { "epoch": 0.43, "learning_rate": 0.0006321749741466406, "loss": 1.4761, "step": 2139 }, { "epoch": 0.43, "learning_rate": 0.0006318591405045495, "loss": 1.4787, "step": 2140 }, { "epoch": 0.43, "learning_rate": 0.0006315432503075201, "loss": 1.4561, "step": 2141 }, { "epoch": 0.43, "learning_rate": 0.0006312273036910389, "loss": 1.4622, "step": 2142 }, { "epoch": 0.43, "learning_rate": 0.0006309113007906169, "loss": 1.5139, "step": 2143 }, { "epoch": 0.43, "learning_rate": 0.0006305952417417889, "loss": 1.5118, "step": 2144 }, { "epoch": 0.43, "learning_rate": 0.000630279126680114, "loss": 1.5093, "step": 2145 }, { "epoch": 0.43, "learning_rate": 0.0006299629557411752, "loss": 1.4873, "step": 2146 }, { "epoch": 0.43, "learning_rate": 0.0006296467290605797, "loss": 1.4973, "step": 2147 }, { "epoch": 0.43, "learning_rate": 0.0006293304467739584, "loss": 1.4432, "step": 2148 }, { "epoch": 0.43, "learning_rate": 0.000629014109016966, "loss": 1.4995, "step": 2149 }, { "epoch": 0.43, "learning_rate": 0.0006286977159252812, "loss": 1.4661, "step": 2150 }, { "epoch": 0.43, "learning_rate": 0.0006283812676346063, "loss": 1.4973, "step": 2151 }, { "epoch": 0.44, "learning_rate": 0.0006280647642806673, "loss": 1.4576, "step": 2152 }, { "epoch": 0.44, "learning_rate": 0.0006277482059992138, "loss": 1.4454, "step": 2153 }, { "epoch": 0.44, "learning_rate": 0.000627431592926019, "loss": 1.5093, "step": 2154 }, { "epoch": 0.44, "learning_rate": 0.0006271149251968793, "loss": 1.5046, "step": 2155 }, { "epoch": 0.44, "learning_rate": 0.0006267982029476152, "loss": 1.4705, "step": 2156 }, { "epoch": 0.44, "learning_rate": 0.0006264814263140701, "loss": 1.4873, "step": 2157 }, { "epoch": 0.44, "learning_rate": 0.0006261645954321109, "loss": 1.4665, "step": 2158 }, { "epoch": 0.44, "learning_rate": 0.0006258477104376276, "loss": 1.4685, "step": 2159 }, { "epoch": 0.44, "learning_rate": 0.0006255307714665334, "loss": 1.4477, "step": 2160 }, { "epoch": 0.44, "learning_rate": 0.0006252137786547647, "loss": 1.4889, "step": 2161 }, { "epoch": 0.44, "learning_rate": 0.0006248967321382814, "loss": 1.5146, "step": 2162 }, { "epoch": 0.44, "learning_rate": 0.0006245796320530659, "loss": 1.4806, "step": 2163 }, { "epoch": 0.44, "learning_rate": 0.0006242624785351235, "loss": 1.4712, "step": 2164 }, { "epoch": 0.44, "learning_rate": 0.0006239452717204831, "loss": 1.516, "step": 2165 }, { "epoch": 0.44, "learning_rate": 0.0006236280117451958, "loss": 1.5098, "step": 2166 }, { "epoch": 0.44, "learning_rate": 0.0006233106987453358, "loss": 1.5054, "step": 2167 }, { "epoch": 0.44, "learning_rate": 0.0006229933328569998, "loss": 1.4812, "step": 2168 }, { "epoch": 0.44, "learning_rate": 0.0006226759142163077, "loss": 1.4645, "step": 2169 }, { "epoch": 0.44, "learning_rate": 0.0006223584429594014, "loss": 1.4944, "step": 2170 }, { "epoch": 0.44, "learning_rate": 0.0006220409192224458, "loss": 1.493, "step": 2171 }, { "epoch": 0.44, "learning_rate": 0.0006217233431416279, "loss": 1.4598, "step": 2172 }, { "epoch": 0.44, "learning_rate": 0.0006214057148531578, "loss": 1.4952, "step": 2173 }, { "epoch": 0.44, "learning_rate": 0.0006210880344932674, "loss": 1.4892, "step": 2174 }, { "epoch": 0.44, "learning_rate": 0.0006207703021982113, "loss": 1.4792, "step": 2175 }, { "epoch": 0.44, "learning_rate": 0.0006204525181042659, "loss": 1.466, "step": 2176 }, { "epoch": 0.44, "learning_rate": 0.0006201346823477303, "loss": 1.5182, "step": 2177 }, { "epoch": 0.44, "learning_rate": 0.0006198167950649258, "loss": 1.4658, "step": 2178 }, { "epoch": 0.44, "learning_rate": 0.0006194988563921952, "loss": 1.4998, "step": 2179 }, { "epoch": 0.44, "learning_rate": 0.000619180866465904, "loss": 1.537, "step": 2180 }, { "epoch": 0.44, "learning_rate": 0.0006188628254224394, "loss": 1.5147, "step": 2181 }, { "epoch": 0.44, "learning_rate": 0.0006185447333982103, "loss": 1.4781, "step": 2182 }, { "epoch": 0.44, "learning_rate": 0.0006182265905296479, "loss": 1.4629, "step": 2183 }, { "epoch": 0.44, "learning_rate": 0.0006179083969532051, "loss": 1.4629, "step": 2184 }, { "epoch": 0.44, "learning_rate": 0.0006175901528053564, "loss": 1.4557, "step": 2185 }, { "epoch": 0.44, "learning_rate": 0.0006172718582225978, "loss": 1.5023, "step": 2186 }, { "epoch": 0.44, "learning_rate": 0.0006169535133414473, "loss": 1.4893, "step": 2187 }, { "epoch": 0.44, "learning_rate": 0.0006166351182984446, "loss": 1.4367, "step": 2188 }, { "epoch": 0.44, "learning_rate": 0.0006163166732301506, "loss": 1.5071, "step": 2189 }, { "epoch": 0.44, "learning_rate": 0.0006159981782731474, "loss": 1.4573, "step": 2190 }, { "epoch": 0.44, "learning_rate": 0.0006156796335640391, "loss": 1.5025, "step": 2191 }, { "epoch": 0.44, "learning_rate": 0.0006153610392394508, "loss": 1.4738, "step": 2192 }, { "epoch": 0.44, "learning_rate": 0.0006150423954360292, "loss": 1.4851, "step": 2193 }, { "epoch": 0.44, "learning_rate": 0.0006147237022904415, "loss": 1.5088, "step": 2194 }, { "epoch": 0.44, "learning_rate": 0.0006144049599393766, "loss": 1.4568, "step": 2195 }, { "epoch": 0.44, "learning_rate": 0.0006140861685195449, "loss": 1.491, "step": 2196 }, { "epoch": 0.44, "learning_rate": 0.0006137673281676768, "loss": 1.4776, "step": 2197 }, { "epoch": 0.44, "learning_rate": 0.0006134484390205246, "loss": 1.4586, "step": 2198 }, { "epoch": 0.44, "learning_rate": 0.0006131295012148612, "loss": 1.4581, "step": 2199 }, { "epoch": 0.44, "learning_rate": 0.0006128105148874801, "loss": 1.5126, "step": 2200 }, { "epoch": 0.44, "learning_rate": 0.0006124914801751961, "loss": 1.5297, "step": 2201 }, { "epoch": 0.45, "learning_rate": 0.0006121723972148444, "loss": 1.4962, "step": 2202 }, { "epoch": 0.45, "learning_rate": 0.0006118532661432812, "loss": 1.4809, "step": 2203 }, { "epoch": 0.45, "learning_rate": 0.0006115340870973829, "loss": 1.4883, "step": 2204 }, { "epoch": 0.45, "learning_rate": 0.000611214860214047, "loss": 1.4802, "step": 2205 }, { "epoch": 0.45, "learning_rate": 0.0006108955856301911, "loss": 1.5032, "step": 2206 }, { "epoch": 0.45, "learning_rate": 0.0006105762634827535, "loss": 1.5322, "step": 2207 }, { "epoch": 0.45, "learning_rate": 0.0006102568939086926, "loss": 1.4784, "step": 2208 }, { "epoch": 0.45, "learning_rate": 0.0006099374770449876, "loss": 1.505, "step": 2209 }, { "epoch": 0.45, "learning_rate": 0.0006096180130286376, "loss": 1.5235, "step": 2210 }, { "epoch": 0.45, "learning_rate": 0.000609298501996662, "loss": 1.4753, "step": 2211 }, { "epoch": 0.45, "learning_rate": 0.0006089789440861006, "loss": 1.4751, "step": 2212 }, { "epoch": 0.45, "learning_rate": 0.000608659339434013, "loss": 1.4906, "step": 2213 }, { "epoch": 0.45, "learning_rate": 0.0006083396881774789, "loss": 1.4797, "step": 2214 }, { "epoch": 0.45, "learning_rate": 0.0006080199904535984, "loss": 1.4604, "step": 2215 }, { "epoch": 0.45, "learning_rate": 0.0006077002463994907, "loss": 1.4976, "step": 2216 }, { "epoch": 0.45, "learning_rate": 0.0006073804561522958, "loss": 1.482, "step": 2217 }, { "epoch": 0.45, "learning_rate": 0.0006070606198491727, "loss": 1.4752, "step": 2218 }, { "epoch": 0.45, "learning_rate": 0.0006067407376273009, "loss": 1.4541, "step": 2219 }, { "epoch": 0.45, "learning_rate": 0.0006064208096238791, "loss": 1.4693, "step": 2220 }, { "epoch": 0.45, "learning_rate": 0.0006061008359761256, "loss": 1.4517, "step": 2221 }, { "epoch": 0.45, "learning_rate": 0.0006057808168212787, "loss": 1.502, "step": 2222 }, { "epoch": 0.45, "learning_rate": 0.0006054607522965958, "loss": 1.4661, "step": 2223 }, { "epoch": 0.45, "learning_rate": 0.000605140642539354, "loss": 1.4449, "step": 2224 }, { "epoch": 0.45, "learning_rate": 0.0006048204876868495, "loss": 1.4816, "step": 2225 }, { "epoch": 0.45, "learning_rate": 0.0006045002878763983, "loss": 1.5061, "step": 2226 }, { "epoch": 0.45, "learning_rate": 0.0006041800432453353, "loss": 1.4824, "step": 2227 }, { "epoch": 0.45, "learning_rate": 0.0006038597539310148, "loss": 1.4917, "step": 2228 }, { "epoch": 0.45, "learning_rate": 0.0006035394200708104, "loss": 1.4881, "step": 2229 }, { "epoch": 0.45, "learning_rate": 0.0006032190418021145, "loss": 1.4418, "step": 2230 }, { "epoch": 0.45, "learning_rate": 0.0006028986192623386, "loss": 1.4587, "step": 2231 }, { "epoch": 0.45, "learning_rate": 0.0006025781525889133, "loss": 1.482, "step": 2232 }, { "epoch": 0.45, "learning_rate": 0.0006022576419192882, "loss": 1.4499, "step": 2233 }, { "epoch": 0.45, "learning_rate": 0.0006019370873909315, "loss": 1.4934, "step": 2234 }, { "epoch": 0.45, "learning_rate": 0.0006016164891413305, "loss": 1.4862, "step": 2235 }, { "epoch": 0.45, "learning_rate": 0.0006012958473079914, "loss": 1.4586, "step": 2236 }, { "epoch": 0.45, "learning_rate": 0.0006009751620284383, "loss": 1.4931, "step": 2237 }, { "epoch": 0.45, "learning_rate": 0.0006006544334402148, "loss": 1.4458, "step": 2238 }, { "epoch": 0.45, "learning_rate": 0.0006003336616808827, "loss": 1.4916, "step": 2239 }, { "epoch": 0.45, "learning_rate": 0.0006000128468880223, "loss": 1.5023, "step": 2240 }, { "epoch": 0.45, "learning_rate": 0.0005996919891992323, "loss": 1.4431, "step": 2241 }, { "epoch": 0.45, "learning_rate": 0.0005993710887521302, "loss": 1.4909, "step": 2242 }, { "epoch": 0.45, "learning_rate": 0.0005990501456843513, "loss": 1.5081, "step": 2243 }, { "epoch": 0.45, "learning_rate": 0.0005987291601335494, "loss": 1.5, "step": 2244 }, { "epoch": 0.45, "learning_rate": 0.0005984081322373968, "loss": 1.5022, "step": 2245 }, { "epoch": 0.45, "learning_rate": 0.0005980870621335835, "loss": 1.4659, "step": 2246 }, { "epoch": 0.45, "learning_rate": 0.0005977659499598178, "loss": 1.4731, "step": 2247 }, { "epoch": 0.45, "learning_rate": 0.0005974447958538262, "loss": 1.5209, "step": 2248 }, { "epoch": 0.45, "learning_rate": 0.0005971235999533531, "loss": 1.4772, "step": 2249 }, { "epoch": 0.45, "learning_rate": 0.0005968023623961605, "loss": 1.4759, "step": 2250 }, { "epoch": 0.46, "learning_rate": 0.0005964810833200287, "loss": 1.4641, "step": 2251 }, { "epoch": 0.46, "learning_rate": 0.0005961597628627557, "loss": 1.4788, "step": 2252 }, { "epoch": 0.46, "learning_rate": 0.000595838401162157, "loss": 1.4798, "step": 2253 }, { "epoch": 0.46, "learning_rate": 0.0005955169983560663, "loss": 1.4716, "step": 2254 }, { "epoch": 0.46, "learning_rate": 0.0005951955545823342, "loss": 1.5126, "step": 2255 }, { "epoch": 0.46, "learning_rate": 0.0005948740699788295, "loss": 1.508, "step": 2256 }, { "epoch": 0.46, "learning_rate": 0.0005945525446834386, "loss": 1.4785, "step": 2257 }, { "epoch": 0.46, "learning_rate": 0.0005942309788340644, "loss": 1.4711, "step": 2258 }, { "epoch": 0.46, "learning_rate": 0.0005939093725686282, "loss": 1.4653, "step": 2259 }, { "epoch": 0.46, "learning_rate": 0.0005935877260250684, "loss": 1.4995, "step": 2260 }, { "epoch": 0.46, "learning_rate": 0.0005932660393413403, "loss": 1.4584, "step": 2261 }, { "epoch": 0.46, "learning_rate": 0.0005929443126554168, "loss": 1.499, "step": 2262 }, { "epoch": 0.46, "learning_rate": 0.0005926225461052877, "loss": 1.4612, "step": 2263 }, { "epoch": 0.46, "learning_rate": 0.0005923007398289603, "loss": 1.4702, "step": 2264 }, { "epoch": 0.46, "learning_rate": 0.0005919788939644583, "loss": 1.4907, "step": 2265 }, { "epoch": 0.46, "learning_rate": 0.000591657008649823, "loss": 1.4908, "step": 2266 }, { "epoch": 0.46, "learning_rate": 0.0005913350840231124, "loss": 1.4772, "step": 2267 }, { "epoch": 0.46, "learning_rate": 0.0005910131202224011, "loss": 1.4913, "step": 2268 }, { "epoch": 0.46, "learning_rate": 0.0005906911173857809, "loss": 1.4795, "step": 2269 }, { "epoch": 0.46, "learning_rate": 0.00059036907565136, "loss": 1.4646, "step": 2270 }, { "epoch": 0.46, "learning_rate": 0.0005900469951572636, "loss": 1.5158, "step": 2271 }, { "epoch": 0.46, "learning_rate": 0.0005897248760416334, "loss": 1.4538, "step": 2272 }, { "epoch": 0.46, "learning_rate": 0.0005894027184426274, "loss": 1.4706, "step": 2273 }, { "epoch": 0.46, "learning_rate": 0.0005890805224984205, "loss": 1.4907, "step": 2274 }, { "epoch": 0.46, "learning_rate": 0.0005887582883472041, "loss": 1.492, "step": 2275 }, { "epoch": 0.46, "learning_rate": 0.0005884360161271854, "loss": 1.4883, "step": 2276 }, { "epoch": 0.46, "learning_rate": 0.0005881137059765885, "loss": 1.4851, "step": 2277 }, { "epoch": 0.46, "learning_rate": 0.0005877913580336536, "loss": 1.4964, "step": 2278 }, { "epoch": 0.46, "learning_rate": 0.000587468972436637, "loss": 1.4884, "step": 2279 }, { "epoch": 0.46, "learning_rate": 0.0005871465493238112, "loss": 1.4639, "step": 2280 }, { "epoch": 0.46, "learning_rate": 0.0005868240888334653, "loss": 1.414, "step": 2281 }, { "epoch": 0.46, "learning_rate": 0.0005865015911039033, "loss": 1.467, "step": 2282 }, { "epoch": 0.46, "learning_rate": 0.0005861790562734459, "loss": 1.473, "step": 2283 }, { "epoch": 0.46, "learning_rate": 0.0005858564844804302, "loss": 1.4665, "step": 2284 }, { "epoch": 0.46, "learning_rate": 0.000585533875863208, "loss": 1.4412, "step": 2285 }, { "epoch": 0.46, "learning_rate": 0.0005852112305601477, "loss": 1.4683, "step": 2286 }, { "epoch": 0.46, "learning_rate": 0.0005848885487096334, "loss": 1.4898, "step": 2287 }, { "epoch": 0.46, "learning_rate": 0.0005845658304500644, "loss": 1.4672, "step": 2288 }, { "epoch": 0.46, "learning_rate": 0.000584243075919856, "loss": 1.5013, "step": 2289 }, { "epoch": 0.46, "learning_rate": 0.0005839202852574392, "loss": 1.461, "step": 2290 }, { "epoch": 0.46, "learning_rate": 0.00058359745860126, "loss": 1.4751, "step": 2291 }, { "epoch": 0.46, "learning_rate": 0.0005832745960897802, "loss": 1.4827, "step": 2292 }, { "epoch": 0.46, "learning_rate": 0.0005829516978614769, "loss": 1.4388, "step": 2293 }, { "epoch": 0.46, "learning_rate": 0.0005826287640548425, "loss": 1.4603, "step": 2294 }, { "epoch": 0.46, "learning_rate": 0.0005823057948083847, "loss": 1.4631, "step": 2295 }, { "epoch": 0.46, "learning_rate": 0.0005819827902606261, "loss": 1.4738, "step": 2296 }, { "epoch": 0.46, "learning_rate": 0.0005816597505501052, "loss": 1.452, "step": 2297 }, { "epoch": 0.46, "learning_rate": 0.0005813366758153746, "loss": 1.4729, "step": 2298 }, { "epoch": 0.46, "learning_rate": 0.0005810135661950028, "loss": 1.4497, "step": 2299 }, { "epoch": 0.46, "learning_rate": 0.0005806904218275727, "loss": 1.4466, "step": 2300 }, { "epoch": 0.47, "learning_rate": 0.000580367242851682, "loss": 1.507, "step": 2301 }, { "epoch": 0.47, "learning_rate": 0.000580044029405944, "loss": 1.4821, "step": 2302 }, { "epoch": 0.47, "learning_rate": 0.0005797207816289861, "loss": 1.4658, "step": 2303 }, { "epoch": 0.47, "learning_rate": 0.0005793974996594506, "loss": 1.4578, "step": 2304 }, { "epoch": 0.47, "learning_rate": 0.0005790741836359944, "loss": 1.4916, "step": 2305 }, { "epoch": 0.47, "learning_rate": 0.0005787508336972893, "loss": 1.4797, "step": 2306 }, { "epoch": 0.47, "learning_rate": 0.0005784274499820213, "loss": 1.4515, "step": 2307 }, { "epoch": 0.47, "learning_rate": 0.0005781040326288911, "loss": 1.4918, "step": 2308 }, { "epoch": 0.47, "learning_rate": 0.0005777805817766137, "loss": 1.5152, "step": 2309 }, { "epoch": 0.47, "learning_rate": 0.0005774570975639186, "loss": 1.4825, "step": 2310 }, { "epoch": 0.47, "learning_rate": 0.0005771335801295495, "loss": 1.4703, "step": 2311 }, { "epoch": 0.47, "learning_rate": 0.0005768100296122644, "loss": 1.4708, "step": 2312 }, { "epoch": 0.47, "learning_rate": 0.0005764864461508354, "loss": 1.4849, "step": 2313 }, { "epoch": 0.47, "learning_rate": 0.000576162829884049, "loss": 1.4758, "step": 2314 }, { "epoch": 0.47, "learning_rate": 0.0005758391809507055, "loss": 1.4801, "step": 2315 }, { "epoch": 0.47, "learning_rate": 0.000575515499489619, "loss": 1.4771, "step": 2316 }, { "epoch": 0.47, "learning_rate": 0.0005751917856396181, "loss": 1.4812, "step": 2317 }, { "epoch": 0.47, "learning_rate": 0.0005748680395395451, "loss": 1.4749, "step": 2318 }, { "epoch": 0.47, "learning_rate": 0.0005745442613282559, "loss": 1.4489, "step": 2319 }, { "epoch": 0.47, "learning_rate": 0.0005742204511446203, "loss": 1.4428, "step": 2320 }, { "epoch": 0.47, "learning_rate": 0.000573896609127522, "loss": 1.4609, "step": 2321 }, { "epoch": 0.47, "learning_rate": 0.000573572735415858, "loss": 1.4881, "step": 2322 }, { "epoch": 0.47, "learning_rate": 0.0005732488301485395, "loss": 1.4685, "step": 2323 }, { "epoch": 0.47, "learning_rate": 0.0005729248934644903, "loss": 1.444, "step": 2324 }, { "epoch": 0.47, "learning_rate": 0.0005726009255026484, "loss": 1.4503, "step": 2325 }, { "epoch": 0.47, "learning_rate": 0.0005722769264019652, "loss": 1.472, "step": 2326 }, { "epoch": 0.47, "learning_rate": 0.0005719528963014052, "loss": 1.4877, "step": 2327 }, { "epoch": 0.47, "learning_rate": 0.0005716288353399461, "loss": 1.4765, "step": 2328 }, { "epoch": 0.47, "learning_rate": 0.0005713047436565792, "loss": 1.5134, "step": 2329 }, { "epoch": 0.47, "learning_rate": 0.0005709806213903086, "loss": 1.4694, "step": 2330 }, { "epoch": 0.47, "learning_rate": 0.0005706564686801519, "loss": 1.4911, "step": 2331 }, { "epoch": 0.47, "learning_rate": 0.0005703322856651393, "loss": 1.4608, "step": 2332 }, { "epoch": 0.47, "learning_rate": 0.0005700080724843147, "loss": 1.4565, "step": 2333 }, { "epoch": 0.47, "learning_rate": 0.000569683829276734, "loss": 1.4675, "step": 2334 }, { "epoch": 0.47, "learning_rate": 0.0005693595561814665, "loss": 1.4686, "step": 2335 }, { "epoch": 0.47, "learning_rate": 0.0005690352533375948, "loss": 1.4446, "step": 2336 }, { "epoch": 0.47, "learning_rate": 0.0005687109208842131, "loss": 1.4986, "step": 2337 }, { "epoch": 0.47, "learning_rate": 0.0005683865589604294, "loss": 1.4799, "step": 2338 }, { "epoch": 0.47, "learning_rate": 0.0005680621677053637, "loss": 1.5109, "step": 2339 }, { "epoch": 0.47, "learning_rate": 0.0005677377472581488, "loss": 1.4754, "step": 2340 }, { "epoch": 0.47, "learning_rate": 0.0005674132977579302, "loss": 1.4828, "step": 2341 }, { "epoch": 0.47, "learning_rate": 0.0005670888193438653, "loss": 1.478, "step": 2342 }, { "epoch": 0.47, "learning_rate": 0.0005667643121551244, "loss": 1.4238, "step": 2343 }, { "epoch": 0.47, "learning_rate": 0.0005664397763308902, "loss": 1.5151, "step": 2344 }, { "epoch": 0.47, "learning_rate": 0.0005661152120103573, "loss": 1.4779, "step": 2345 }, { "epoch": 0.47, "learning_rate": 0.0005657906193327325, "loss": 1.4812, "step": 2346 }, { "epoch": 0.47, "learning_rate": 0.0005654659984372351, "loss": 1.4193, "step": 2347 }, { "epoch": 0.47, "learning_rate": 0.0005651413494630966, "loss": 1.4835, "step": 2348 }, { "epoch": 0.47, "learning_rate": 0.00056481667254956, "loss": 1.4801, "step": 2349 }, { "epoch": 0.48, "learning_rate": 0.0005644919678358806, "loss": 1.4704, "step": 2350 }, { "epoch": 0.48, "learning_rate": 0.000564167235461326, "loss": 1.4643, "step": 2351 }, { "epoch": 0.48, "learning_rate": 0.0005638424755651748, "loss": 1.4782, "step": 2352 }, { "epoch": 0.48, "learning_rate": 0.000563517688286718, "loss": 1.4826, "step": 2353 }, { "epoch": 0.48, "learning_rate": 0.0005631928737652584, "loss": 1.4616, "step": 2354 }, { "epoch": 0.48, "learning_rate": 0.0005628680321401101, "loss": 1.4819, "step": 2355 }, { "epoch": 0.48, "learning_rate": 0.0005625431635505993, "loss": 1.4482, "step": 2356 }, { "epoch": 0.48, "learning_rate": 0.0005622182681360633, "loss": 1.4435, "step": 2357 }, { "epoch": 0.48, "learning_rate": 0.0005618933460358513, "loss": 1.4667, "step": 2358 }, { "epoch": 0.48, "learning_rate": 0.0005615683973893234, "loss": 1.4311, "step": 2359 }, { "epoch": 0.48, "learning_rate": 0.0005612434223358518, "loss": 1.4698, "step": 2360 }, { "epoch": 0.48, "learning_rate": 0.0005609184210148195, "loss": 1.4366, "step": 2361 }, { "epoch": 0.48, "learning_rate": 0.0005605933935656208, "loss": 1.4962, "step": 2362 }, { "epoch": 0.48, "learning_rate": 0.0005602683401276614, "loss": 1.4849, "step": 2363 }, { "epoch": 0.48, "learning_rate": 0.0005599432608403586, "loss": 1.4696, "step": 2364 }, { "epoch": 0.48, "learning_rate": 0.0005596181558431394, "loss": 1.4673, "step": 2365 }, { "epoch": 0.48, "learning_rate": 0.0005592930252754432, "loss": 1.4628, "step": 2366 }, { "epoch": 0.48, "learning_rate": 0.0005589678692767199, "loss": 1.4496, "step": 2367 }, { "epoch": 0.48, "learning_rate": 0.0005586426879864301, "loss": 1.5075, "step": 2368 }, { "epoch": 0.48, "learning_rate": 0.0005583174815440454, "loss": 1.462, "step": 2369 }, { "epoch": 0.48, "learning_rate": 0.0005579922500890483, "loss": 1.4617, "step": 2370 }, { "epoch": 0.48, "learning_rate": 0.000557666993760932, "loss": 1.4323, "step": 2371 }, { "epoch": 0.48, "learning_rate": 0.0005573417126992003, "loss": 1.4968, "step": 2372 }, { "epoch": 0.48, "learning_rate": 0.0005570164070433675, "loss": 1.4511, "step": 2373 }, { "epoch": 0.48, "learning_rate": 0.0005566910769329584, "loss": 1.4786, "step": 2374 }, { "epoch": 0.48, "learning_rate": 0.0005563657225075088, "loss": 1.4995, "step": 2375 }, { "epoch": 0.48, "learning_rate": 0.0005560403439065643, "loss": 1.484, "step": 2376 }, { "epoch": 0.48, "learning_rate": 0.0005557149412696814, "loss": 1.4749, "step": 2377 }, { "epoch": 0.48, "learning_rate": 0.0005553895147364264, "loss": 1.439, "step": 2378 }, { "epoch": 0.48, "learning_rate": 0.0005550640644463765, "loss": 1.4483, "step": 2379 }, { "epoch": 0.48, "learning_rate": 0.0005547385905391181, "loss": 1.4641, "step": 2380 }, { "epoch": 0.48, "learning_rate": 0.0005544130931542488, "loss": 1.4645, "step": 2381 }, { "epoch": 0.48, "learning_rate": 0.0005540875724313756, "loss": 1.4622, "step": 2382 }, { "epoch": 0.48, "learning_rate": 0.0005537620285101158, "loss": 1.4736, "step": 2383 }, { "epoch": 0.48, "learning_rate": 0.0005534364615300964, "loss": 1.527, "step": 2384 }, { "epoch": 0.48, "learning_rate": 0.0005531108716309548, "loss": 1.4752, "step": 2385 }, { "epoch": 0.48, "learning_rate": 0.0005527852589523372, "loss": 1.4828, "step": 2386 }, { "epoch": 0.48, "learning_rate": 0.0005524596236339009, "loss": 1.4671, "step": 2387 }, { "epoch": 0.48, "learning_rate": 0.000552133965815312, "loss": 1.4569, "step": 2388 }, { "epoch": 0.48, "learning_rate": 0.0005518082856362464, "loss": 1.4841, "step": 2389 }, { "epoch": 0.48, "learning_rate": 0.0005514825832363899, "loss": 1.4556, "step": 2390 }, { "epoch": 0.48, "learning_rate": 0.0005511568587554375, "loss": 1.4513, "step": 2391 }, { "epoch": 0.48, "learning_rate": 0.0005508311123330939, "loss": 1.4598, "step": 2392 }, { "epoch": 0.48, "learning_rate": 0.0005505053441090729, "loss": 1.4941, "step": 2393 }, { "epoch": 0.48, "learning_rate": 0.0005501795542230982, "loss": 1.4966, "step": 2394 }, { "epoch": 0.48, "learning_rate": 0.000549853742814902, "loss": 1.4872, "step": 2395 }, { "epoch": 0.48, "learning_rate": 0.0005495279100242266, "loss": 1.4361, "step": 2396 }, { "epoch": 0.48, "learning_rate": 0.0005492020559908229, "loss": 1.4818, "step": 2397 }, { "epoch": 0.48, "learning_rate": 0.0005488761808544509, "loss": 1.4588, "step": 2398 }, { "epoch": 0.48, "learning_rate": 0.00054855028475488, "loss": 1.4529, "step": 2399 }, { "epoch": 0.49, "learning_rate": 0.0005482243678318885, "loss": 1.4212, "step": 2400 }, { "epoch": 0.49, "learning_rate": 0.0005478984302252632, "loss": 1.5158, "step": 2401 }, { "epoch": 0.49, "learning_rate": 0.0005475724720748002, "loss": 1.4631, "step": 2402 }, { "epoch": 0.49, "learning_rate": 0.0005472464935203045, "loss": 1.451, "step": 2403 }, { "epoch": 0.49, "learning_rate": 0.0005469204947015897, "loss": 1.4778, "step": 2404 }, { "epoch": 0.49, "learning_rate": 0.0005465944757584777, "loss": 1.4522, "step": 2405 }, { "epoch": 0.49, "learning_rate": 0.0005462684368308, "loss": 1.4756, "step": 2406 }, { "epoch": 0.49, "learning_rate": 0.0005459423780583955, "loss": 1.5033, "step": 2407 }, { "epoch": 0.49, "learning_rate": 0.0005456162995811123, "loss": 1.4226, "step": 2408 }, { "epoch": 0.49, "learning_rate": 0.0005452902015388069, "loss": 1.4733, "step": 2409 }, { "epoch": 0.49, "learning_rate": 0.0005449640840713443, "loss": 1.4768, "step": 2410 }, { "epoch": 0.49, "learning_rate": 0.0005446379473185972, "loss": 1.4935, "step": 2411 }, { "epoch": 0.49, "learning_rate": 0.0005443117914204475, "loss": 1.4805, "step": 2412 }, { "epoch": 0.49, "learning_rate": 0.0005439856165167844, "loss": 1.4409, "step": 2413 }, { "epoch": 0.49, "learning_rate": 0.0005436594227475061, "loss": 1.5164, "step": 2414 }, { "epoch": 0.49, "learning_rate": 0.0005433332102525182, "loss": 1.4887, "step": 2415 }, { "epoch": 0.49, "learning_rate": 0.0005430069791717346, "loss": 1.4557, "step": 2416 }, { "epoch": 0.49, "learning_rate": 0.0005426807296450776, "loss": 1.4333, "step": 2417 }, { "epoch": 0.49, "learning_rate": 0.0005423544618124763, "loss": 1.4762, "step": 2418 }, { "epoch": 0.49, "learning_rate": 0.0005420281758138687, "loss": 1.4845, "step": 2419 }, { "epoch": 0.49, "learning_rate": 0.0005417018717892004, "loss": 1.4615, "step": 2420 }, { "epoch": 0.49, "learning_rate": 0.0005413755498784243, "loss": 1.4506, "step": 2421 }, { "epoch": 0.49, "learning_rate": 0.0005410492102215015, "loss": 1.4753, "step": 2422 }, { "epoch": 0.49, "learning_rate": 0.0005407228529584, "loss": 1.4603, "step": 2423 }, { "epoch": 0.49, "learning_rate": 0.0005403964782290963, "loss": 1.5056, "step": 2424 }, { "epoch": 0.49, "learning_rate": 0.0005400700861735735, "loss": 1.4237, "step": 2425 }, { "epoch": 0.49, "learning_rate": 0.0005397436769318229, "loss": 1.4349, "step": 2426 }, { "epoch": 0.49, "learning_rate": 0.0005394172506438423, "loss": 1.4695, "step": 2427 }, { "epoch": 0.49, "learning_rate": 0.0005390908074496377, "loss": 1.4691, "step": 2428 }, { "epoch": 0.49, "learning_rate": 0.000538764347489222, "loss": 1.4679, "step": 2429 }, { "epoch": 0.49, "learning_rate": 0.0005384378709026149, "loss": 1.4747, "step": 2430 }, { "epoch": 0.49, "learning_rate": 0.0005381113778298439, "loss": 1.5051, "step": 2431 }, { "epoch": 0.49, "learning_rate": 0.000537784868410943, "loss": 1.4788, "step": 2432 }, { "epoch": 0.49, "learning_rate": 0.0005374583427859535, "loss": 1.4794, "step": 2433 }, { "epoch": 0.49, "learning_rate": 0.0005371318010949237, "loss": 1.5035, "step": 2434 }, { "epoch": 0.49, "learning_rate": 0.0005368052434779088, "loss": 1.4964, "step": 2435 }, { "epoch": 0.49, "learning_rate": 0.0005364786700749705, "loss": 1.4597, "step": 2436 }, { "epoch": 0.49, "learning_rate": 0.0005361520810261779, "loss": 1.4464, "step": 2437 }, { "epoch": 0.49, "learning_rate": 0.0005358254764716059, "loss": 1.4867, "step": 2438 }, { "epoch": 0.49, "learning_rate": 0.0005354988565513367, "loss": 1.5204, "step": 2439 }, { "epoch": 0.49, "learning_rate": 0.0005351722214054592, "loss": 1.4233, "step": 2440 }, { "epoch": 0.49, "learning_rate": 0.0005348455711740684, "loss": 1.49, "step": 2441 }, { "epoch": 0.49, "learning_rate": 0.0005345189059972659, "loss": 1.452, "step": 2442 }, { "epoch": 0.49, "learning_rate": 0.0005341922260151599, "loss": 1.4779, "step": 2443 }, { "epoch": 0.49, "learning_rate": 0.0005338655313678649, "loss": 1.4778, "step": 2444 }, { "epoch": 0.49, "learning_rate": 0.0005335388221955012, "loss": 1.4743, "step": 2445 }, { "epoch": 0.49, "learning_rate": 0.0005332120986381962, "loss": 1.4969, "step": 2446 }, { "epoch": 0.49, "learning_rate": 0.0005328853608360828, "loss": 1.443, "step": 2447 }, { "epoch": 0.49, "learning_rate": 0.0005325586089293, "loss": 1.4811, "step": 2448 }, { "epoch": 0.5, "learning_rate": 0.0005322318430579934, "loss": 1.4557, "step": 2449 }, { "epoch": 0.5, "learning_rate": 0.0005319050633623141, "loss": 1.452, "step": 2450 }, { "epoch": 0.5, "learning_rate": 0.0005315782699824194, "loss": 1.4442, "step": 2451 }, { "epoch": 0.5, "learning_rate": 0.0005312514630584724, "loss": 1.5058, "step": 2452 }, { "epoch": 0.5, "learning_rate": 0.0005309246427306417, "loss": 1.5127, "step": 2453 }, { "epoch": 0.5, "learning_rate": 0.0005305978091391019, "loss": 1.4687, "step": 2454 }, { "epoch": 0.5, "learning_rate": 0.0005302709624240336, "loss": 1.4746, "step": 2455 }, { "epoch": 0.5, "learning_rate": 0.0005299441027256226, "loss": 1.4854, "step": 2456 }, { "epoch": 0.5, "learning_rate": 0.0005296172301840602, "loss": 1.485, "step": 2457 }, { "epoch": 0.5, "learning_rate": 0.0005292903449395437, "loss": 1.4605, "step": 2458 }, { "epoch": 0.5, "learning_rate": 0.0005289634471322752, "loss": 1.4587, "step": 2459 }, { "epoch": 0.5, "learning_rate": 0.0005286365369024629, "loss": 1.4702, "step": 2460 }, { "epoch": 0.5, "learning_rate": 0.0005283096143903199, "loss": 1.4413, "step": 2461 }, { "epoch": 0.5, "learning_rate": 0.0005279826797360644, "loss": 1.4886, "step": 2462 }, { "epoch": 0.5, "learning_rate": 0.0005276557330799204, "loss": 1.4494, "step": 2463 }, { "epoch": 0.5, "learning_rate": 0.0005273287745621162, "loss": 1.5099, "step": 2464 }, { "epoch": 0.5, "learning_rate": 0.0005270018043228861, "loss": 1.4713, "step": 2465 }, { "epoch": 0.5, "learning_rate": 0.0005266748225024689, "loss": 1.4667, "step": 2466 }, { "epoch": 0.5, "learning_rate": 0.0005263478292411085, "loss": 1.4451, "step": 2467 }, { "epoch": 0.5, "learning_rate": 0.0005260208246790537, "loss": 1.5005, "step": 2468 }, { "epoch": 0.5, "learning_rate": 0.0005256938089565579, "loss": 1.4925, "step": 2469 }, { "epoch": 0.5, "learning_rate": 0.0005253667822138799, "loss": 1.4965, "step": 2470 }, { "epoch": 0.5, "learning_rate": 0.0005250397445912824, "loss": 1.4843, "step": 2471 }, { "epoch": 0.5, "learning_rate": 0.0005247126962290335, "loss": 1.4751, "step": 2472 }, { "epoch": 0.5, "learning_rate": 0.0005243856372674057, "loss": 1.5007, "step": 2473 }, { "epoch": 0.5, "learning_rate": 0.0005240585678466755, "loss": 1.492, "step": 2474 }, { "epoch": 0.5, "learning_rate": 0.0005237314881071248, "loss": 1.4818, "step": 2475 }, { "epoch": 0.5, "learning_rate": 0.0005234043981890394, "loss": 1.5116, "step": 2476 }, { "epoch": 0.5, "learning_rate": 0.0005230772982327093, "loss": 1.4386, "step": 2477 }, { "epoch": 0.5, "learning_rate": 0.0005227501883784294, "loss": 1.4553, "step": 2478 }, { "epoch": 0.5, "learning_rate": 0.0005224230687664981, "loss": 1.4506, "step": 2479 }, { "epoch": 0.5, "learning_rate": 0.0005220959395372185, "loss": 1.4589, "step": 2480 }, { "epoch": 0.5, "learning_rate": 0.0005217688008308979, "loss": 1.476, "step": 2481 }, { "epoch": 0.5, "learning_rate": 0.0005214416527878475, "loss": 1.461, "step": 2482 }, { "epoch": 0.5, "learning_rate": 0.0005211144955483821, "loss": 1.5251, "step": 2483 }, { "epoch": 0.5, "learning_rate": 0.0005207873292528211, "loss": 1.5105, "step": 2484 }, { "epoch": 0.5, "learning_rate": 0.0005204601540414872, "loss": 1.467, "step": 2485 }, { "epoch": 0.5, "learning_rate": 0.0005201329700547076, "loss": 1.4481, "step": 2486 }, { "epoch": 0.5, "learning_rate": 0.0005198057774328127, "loss": 1.4628, "step": 2487 }, { "epoch": 0.5, "learning_rate": 0.0005194785763161367, "loss": 1.4858, "step": 2488 }, { "epoch": 0.5, "learning_rate": 0.0005191513668450177, "loss": 1.4943, "step": 2489 }, { "epoch": 0.5, "learning_rate": 0.0005188241491597971, "loss": 1.4713, "step": 2490 }, { "epoch": 0.5, "learning_rate": 0.00051849692340082, "loss": 1.4867, "step": 2491 }, { "epoch": 0.5, "learning_rate": 0.0005181696897084351, "loss": 1.5134, "step": 2492 }, { "epoch": 0.5, "learning_rate": 0.0005178424482229938, "loss": 1.4525, "step": 2493 }, { "epoch": 0.5, "learning_rate": 0.0005175151990848515, "loss": 1.4404, "step": 2494 }, { "epoch": 0.5, "learning_rate": 0.0005171879424343671, "loss": 1.4776, "step": 2495 }, { "epoch": 0.5, "learning_rate": 0.000516860678411902, "loss": 1.4772, "step": 2496 }, { "epoch": 0.5, "learning_rate": 0.0005165334071578212, "loss": 1.4767, "step": 2497 }, { "epoch": 0.51, "learning_rate": 0.0005162061288124929, "loss": 1.4569, "step": 2498 }, { "epoch": 0.51, "learning_rate": 0.0005158788435162878, "loss": 1.4425, "step": 2499 }, { "epoch": 0.51, "learning_rate": 0.0005155515514095802, "loss": 1.4719, "step": 2500 }, { "epoch": 0.51, "learning_rate": 0.0005152242526327471, "loss": 1.4808, "step": 2501 }, { "epoch": 0.51, "learning_rate": 0.000514896947326168, "loss": 1.4595, "step": 2502 }, { "epoch": 0.51, "learning_rate": 0.0005145696356302257, "loss": 1.4963, "step": 2503 }, { "epoch": 0.51, "learning_rate": 0.0005142423176853058, "loss": 1.4459, "step": 2504 }, { "epoch": 0.51, "learning_rate": 0.000513914993631796, "loss": 1.4748, "step": 2505 }, { "epoch": 0.51, "learning_rate": 0.0005135876636100869, "loss": 1.497, "step": 2506 }, { "epoch": 0.51, "learning_rate": 0.0005132603277605722, "loss": 1.4909, "step": 2507 }, { "epoch": 0.51, "learning_rate": 0.000512932986223647, "loss": 1.454, "step": 2508 }, { "epoch": 0.51, "learning_rate": 0.0005126056391397099, "loss": 1.4566, "step": 2509 }, { "epoch": 0.51, "learning_rate": 0.0005122782866491613, "loss": 1.4888, "step": 2510 }, { "epoch": 0.51, "learning_rate": 0.0005119509288924041, "loss": 1.4769, "step": 2511 }, { "epoch": 0.51, "learning_rate": 0.0005116235660098433, "loss": 1.4448, "step": 2512 }, { "epoch": 0.51, "learning_rate": 0.0005112961981418864, "loss": 1.4931, "step": 2513 }, { "epoch": 0.51, "learning_rate": 0.0005109688254289427, "loss": 1.5049, "step": 2514 }, { "epoch": 0.51, "learning_rate": 0.0005106414480114238, "loss": 1.4509, "step": 2515 }, { "epoch": 0.51, "learning_rate": 0.0005103140660297432, "loss": 1.4548, "step": 2516 }, { "epoch": 0.51, "learning_rate": 0.0005099866796243163, "loss": 1.4572, "step": 2517 }, { "epoch": 0.51, "learning_rate": 0.0005096592889355608, "loss": 1.4758, "step": 2518 }, { "epoch": 0.51, "learning_rate": 0.0005093318941038957, "loss": 1.4585, "step": 2519 }, { "epoch": 0.51, "learning_rate": 0.0005090044952697421, "loss": 1.4739, "step": 2520 }, { "epoch": 0.51, "learning_rate": 0.0005086770925735225, "loss": 1.4769, "step": 2521 }, { "epoch": 0.51, "learning_rate": 0.0005083496861556619, "loss": 1.4633, "step": 2522 }, { "epoch": 0.51, "learning_rate": 0.0005080222761565856, "loss": 1.4518, "step": 2523 }, { "epoch": 0.51, "learning_rate": 0.0005076948627167215, "loss": 1.477, "step": 2524 }, { "epoch": 0.51, "learning_rate": 0.0005073674459764985, "loss": 1.4812, "step": 2525 }, { "epoch": 0.51, "learning_rate": 0.0005070400260763469, "loss": 1.4489, "step": 2526 }, { "epoch": 0.51, "learning_rate": 0.0005067126031566987, "loss": 1.498, "step": 2527 }, { "epoch": 0.51, "learning_rate": 0.000506385177357987, "loss": 1.4996, "step": 2528 }, { "epoch": 0.51, "learning_rate": 0.0005060577488206457, "loss": 1.5003, "step": 2529 }, { "epoch": 0.51, "learning_rate": 0.0005057303176851107, "loss": 1.4947, "step": 2530 }, { "epoch": 0.51, "learning_rate": 0.0005054028840918184, "loss": 1.4468, "step": 2531 }, { "epoch": 0.51, "learning_rate": 0.0005050754481812064, "loss": 1.43, "step": 2532 }, { "epoch": 0.51, "learning_rate": 0.0005047480100937133, "loss": 1.462, "step": 2533 }, { "epoch": 0.51, "learning_rate": 0.000504420569969779, "loss": 1.4341, "step": 2534 }, { "epoch": 0.51, "learning_rate": 0.0005040931279498435, "loss": 1.4488, "step": 2535 }, { "epoch": 0.51, "learning_rate": 0.0005037656841743482, "loss": 1.4819, "step": 2536 }, { "epoch": 0.51, "learning_rate": 0.0005034382387837354, "loss": 1.5048, "step": 2537 }, { "epoch": 0.51, "learning_rate": 0.0005031107919184475, "loss": 1.4659, "step": 2538 }, { "epoch": 0.51, "learning_rate": 0.0005027833437189278, "loss": 1.4751, "step": 2539 }, { "epoch": 0.51, "learning_rate": 0.0005024558943256202, "loss": 1.4883, "step": 2540 }, { "epoch": 0.51, "learning_rate": 0.0005021284438789694, "loss": 1.427, "step": 2541 }, { "epoch": 0.51, "learning_rate": 0.00050180099251942, "loss": 1.4299, "step": 2542 }, { "epoch": 0.51, "learning_rate": 0.0005014735403874174, "loss": 1.4899, "step": 2543 }, { "epoch": 0.51, "learning_rate": 0.0005011460876234072, "loss": 1.5004, "step": 2544 }, { "epoch": 0.51, "learning_rate": 0.0005008186343678352, "loss": 1.4819, "step": 2545 }, { "epoch": 0.51, "learning_rate": 0.0005004911807611475, "loss": 1.5011, "step": 2546 }, { "epoch": 0.51, "learning_rate": 0.0005001637269437903, "loss": 1.471, "step": 2547 }, { "epoch": 0.52, "learning_rate": 0.0004998362730562099, "loss": 1.4206, "step": 2548 }, { "epoch": 0.52, "learning_rate": 0.0004995088192388527, "loss": 1.4835, "step": 2549 }, { "epoch": 0.52, "learning_rate": 0.000499181365632165, "loss": 1.4421, "step": 2550 }, { "epoch": 0.52, "learning_rate": 0.0004988539123765928, "loss": 1.4679, "step": 2551 }, { "epoch": 0.52, "learning_rate": 0.0004985264596125826, "loss": 1.446, "step": 2552 }, { "epoch": 0.52, "learning_rate": 0.00049819900748058, "loss": 1.4369, "step": 2553 }, { "epoch": 0.52, "learning_rate": 0.0004978715561210307, "loss": 1.4614, "step": 2554 }, { "epoch": 0.52, "learning_rate": 0.0004975441056743799, "loss": 1.4282, "step": 2555 }, { "epoch": 0.52, "learning_rate": 0.0004972166562810723, "loss": 1.4676, "step": 2556 }, { "epoch": 0.52, "learning_rate": 0.0004968892080815527, "loss": 1.4628, "step": 2557 }, { "epoch": 0.52, "learning_rate": 0.0004965617612162647, "loss": 1.4489, "step": 2558 }, { "epoch": 0.52, "learning_rate": 0.0004962343158256517, "loss": 1.4651, "step": 2559 }, { "epoch": 0.52, "learning_rate": 0.0004959068720501568, "loss": 1.4784, "step": 2560 }, { "epoch": 0.52, "learning_rate": 0.0004955794300302213, "loss": 1.4583, "step": 2561 }, { "epoch": 0.52, "learning_rate": 0.0004952519899062868, "loss": 1.469, "step": 2562 }, { "epoch": 0.52, "learning_rate": 0.0004949245518187938, "loss": 1.4629, "step": 2563 }, { "epoch": 0.52, "learning_rate": 0.0004945971159081816, "loss": 1.4817, "step": 2564 }, { "epoch": 0.52, "learning_rate": 0.0004942696823148894, "loss": 1.4877, "step": 2565 }, { "epoch": 0.52, "learning_rate": 0.0004939422511793545, "loss": 1.4634, "step": 2566 }, { "epoch": 0.52, "learning_rate": 0.0004936148226420133, "loss": 1.4698, "step": 2567 }, { "epoch": 0.52, "learning_rate": 0.0004932873968433014, "loss": 1.473, "step": 2568 }, { "epoch": 0.52, "learning_rate": 0.0004929599739236532, "loss": 1.4701, "step": 2569 }, { "epoch": 0.52, "learning_rate": 0.0004926325540235017, "loss": 1.4925, "step": 2570 }, { "epoch": 0.52, "learning_rate": 0.0004923051372832786, "loss": 1.5094, "step": 2571 }, { "epoch": 0.52, "learning_rate": 0.0004919777238434146, "loss": 1.4749, "step": 2572 }, { "epoch": 0.52, "learning_rate": 0.0004916503138443384, "loss": 1.452, "step": 2573 }, { "epoch": 0.52, "learning_rate": 0.0004913229074264775, "loss": 1.4815, "step": 2574 }, { "epoch": 0.52, "learning_rate": 0.0004909955047302581, "loss": 1.5039, "step": 2575 }, { "epoch": 0.52, "learning_rate": 0.0004906681058961044, "loss": 1.4883, "step": 2576 }, { "epoch": 0.52, "learning_rate": 0.0004903407110644392, "loss": 1.4902, "step": 2577 }, { "epoch": 0.52, "learning_rate": 0.0004900133203756839, "loss": 1.5079, "step": 2578 }, { "epoch": 0.52, "learning_rate": 0.000489685933970257, "loss": 1.4465, "step": 2579 }, { "epoch": 0.52, "learning_rate": 0.0004893585519885763, "loss": 1.4776, "step": 2580 }, { "epoch": 0.52, "learning_rate": 0.0004890311745710574, "loss": 1.4803, "step": 2581 }, { "epoch": 0.52, "learning_rate": 0.0004887038018581136, "loss": 1.4642, "step": 2582 }, { "epoch": 0.52, "learning_rate": 0.0004883764339901566, "loss": 1.4698, "step": 2583 }, { "epoch": 0.52, "learning_rate": 0.0004880490711075959, "loss": 1.4888, "step": 2584 }, { "epoch": 0.52, "learning_rate": 0.0004877217133508388, "loss": 1.4536, "step": 2585 }, { "epoch": 0.52, "learning_rate": 0.0004873943608602901, "loss": 1.4745, "step": 2586 }, { "epoch": 0.52, "learning_rate": 0.000487067013776353, "loss": 1.4906, "step": 2587 }, { "epoch": 0.52, "learning_rate": 0.00048673967223942794, "loss": 1.4708, "step": 2588 }, { "epoch": 0.52, "learning_rate": 0.000486412336389913, "loss": 1.5103, "step": 2589 }, { "epoch": 0.52, "learning_rate": 0.000486085006368204, "loss": 1.4746, "step": 2590 }, { "epoch": 0.52, "learning_rate": 0.0004857576823146944, "loss": 1.4665, "step": 2591 }, { "epoch": 0.52, "learning_rate": 0.00048543036436977434, "loss": 1.5329, "step": 2592 }, { "epoch": 0.52, "learning_rate": 0.0004851030526738321, "loss": 1.4483, "step": 2593 }, { "epoch": 0.52, "learning_rate": 0.000484775747367253, "loss": 1.4604, "step": 2594 }, { "epoch": 0.52, "learning_rate": 0.00048444844859041976, "loss": 1.5035, "step": 2595 }, { "epoch": 0.52, "learning_rate": 0.0004841211564837121, "loss": 1.4805, "step": 2596 }, { "epoch": 0.53, "learning_rate": 0.0004837938711875073, "loss": 1.4722, "step": 2597 }, { "epoch": 0.53, "learning_rate": 0.0004834665928421789, "loss": 1.5175, "step": 2598 }, { "epoch": 0.53, "learning_rate": 0.0004831393215880981, "loss": 1.4742, "step": 2599 }, { "epoch": 0.53, "learning_rate": 0.00048281205756563303, "loss": 1.4737, "step": 2600 }, { "epoch": 0.53, "learning_rate": 0.00048248480091514853, "loss": 1.4965, "step": 2601 }, { "epoch": 0.53, "learning_rate": 0.0004821575517770064, "loss": 1.458, "step": 2602 }, { "epoch": 0.53, "learning_rate": 0.0004818303102915652, "loss": 1.4559, "step": 2603 }, { "epoch": 0.53, "learning_rate": 0.00048150307659918006, "loss": 1.4451, "step": 2604 }, { "epoch": 0.53, "learning_rate": 0.00048117585084020295, "loss": 1.5044, "step": 2605 }, { "epoch": 0.53, "learning_rate": 0.00048084863315498236, "loss": 1.4676, "step": 2606 }, { "epoch": 0.53, "learning_rate": 0.00048052142368386325, "loss": 1.483, "step": 2607 }, { "epoch": 0.53, "learning_rate": 0.0004801942225671873, "loss": 1.444, "step": 2608 }, { "epoch": 0.53, "learning_rate": 0.0004798670299452926, "loss": 1.4732, "step": 2609 }, { "epoch": 0.53, "learning_rate": 0.0004795398459585129, "loss": 1.4906, "step": 2610 }, { "epoch": 0.53, "learning_rate": 0.0004792126707471791, "loss": 1.447, "step": 2611 }, { "epoch": 0.53, "learning_rate": 0.000478885504451618, "loss": 1.4594, "step": 2612 }, { "epoch": 0.53, "learning_rate": 0.00047855834721215255, "loss": 1.462, "step": 2613 }, { "epoch": 0.53, "learning_rate": 0.000478231199169102, "loss": 1.4426, "step": 2614 }, { "epoch": 0.53, "learning_rate": 0.00047790406046278145, "loss": 1.4237, "step": 2615 }, { "epoch": 0.53, "learning_rate": 0.000477576931233502, "loss": 1.4704, "step": 2616 }, { "epoch": 0.53, "learning_rate": 0.0004772498116215708, "loss": 1.475, "step": 2617 }, { "epoch": 0.53, "learning_rate": 0.00047692270176729067, "loss": 1.4476, "step": 2618 }, { "epoch": 0.53, "learning_rate": 0.00047659560181096067, "loss": 1.4813, "step": 2619 }, { "epoch": 0.53, "learning_rate": 0.00047626851189287516, "loss": 1.4933, "step": 2620 }, { "epoch": 0.53, "learning_rate": 0.00047594143215332464, "loss": 1.4534, "step": 2621 }, { "epoch": 0.53, "learning_rate": 0.0004756143627325946, "loss": 1.4633, "step": 2622 }, { "epoch": 0.53, "learning_rate": 0.0004752873037709666, "loss": 1.4472, "step": 2623 }, { "epoch": 0.53, "learning_rate": 0.0004749602554087177, "loss": 1.4077, "step": 2624 }, { "epoch": 0.53, "learning_rate": 0.0004746332177861202, "loss": 1.4511, "step": 2625 }, { "epoch": 0.53, "learning_rate": 0.0004743061910434421, "loss": 1.4795, "step": 2626 }, { "epoch": 0.53, "learning_rate": 0.00047397917532094653, "loss": 1.4378, "step": 2627 }, { "epoch": 0.53, "learning_rate": 0.0004736521707588916, "loss": 1.4814, "step": 2628 }, { "epoch": 0.53, "learning_rate": 0.0004733251774975312, "loss": 1.4297, "step": 2629 }, { "epoch": 0.53, "learning_rate": 0.0004729981956771139, "loss": 1.5143, "step": 2630 }, { "epoch": 0.53, "learning_rate": 0.0004726712254378839, "loss": 1.4763, "step": 2631 }, { "epoch": 0.53, "learning_rate": 0.0004723442669200798, "loss": 1.4874, "step": 2632 }, { "epoch": 0.53, "learning_rate": 0.00047201732026393574, "loss": 1.4459, "step": 2633 }, { "epoch": 0.53, "learning_rate": 0.0004716903856096803, "loss": 1.4695, "step": 2634 }, { "epoch": 0.53, "learning_rate": 0.0004713634630975372, "loss": 1.4686, "step": 2635 }, { "epoch": 0.53, "learning_rate": 0.00047103655286772483, "loss": 1.4578, "step": 2636 }, { "epoch": 0.53, "learning_rate": 0.00047070965506045644, "loss": 1.4652, "step": 2637 }, { "epoch": 0.53, "learning_rate": 0.00047038276981593987, "loss": 1.4869, "step": 2638 }, { "epoch": 0.53, "learning_rate": 0.0004700558972743777, "loss": 1.4859, "step": 2639 }, { "epoch": 0.53, "learning_rate": 0.0004697290375759666, "loss": 1.5187, "step": 2640 }, { "epoch": 0.53, "learning_rate": 0.0004694021908608982, "loss": 1.4752, "step": 2641 }, { "epoch": 0.53, "learning_rate": 0.00046907535726935847, "loss": 1.4757, "step": 2642 }, { "epoch": 0.53, "learning_rate": 0.0004687485369415277, "loss": 1.4654, "step": 2643 }, { "epoch": 0.53, "learning_rate": 0.0004684217300175805, "loss": 1.4805, "step": 2644 }, { "epoch": 0.53, "learning_rate": 0.00046809493663768577, "loss": 1.4479, "step": 2645 }, { "epoch": 0.53, "learning_rate": 0.0004677681569420066, "loss": 1.4402, "step": 2646 }, { "epoch": 0.54, "learning_rate": 0.00046744139107070005, "loss": 1.472, "step": 2647 }, { "epoch": 0.54, "learning_rate": 0.00046711463916391735, "loss": 1.4782, "step": 2648 }, { "epoch": 0.54, "learning_rate": 0.0004667879013618038, "loss": 1.422, "step": 2649 }, { "epoch": 0.54, "learning_rate": 0.00046646117780449876, "loss": 1.5073, "step": 2650 }, { "epoch": 0.54, "learning_rate": 0.00046613446863213515, "loss": 1.5035, "step": 2651 }, { "epoch": 0.54, "learning_rate": 0.0004658077739848402, "loss": 1.4838, "step": 2652 }, { "epoch": 0.54, "learning_rate": 0.0004654810940027342, "loss": 1.465, "step": 2653 }, { "epoch": 0.54, "learning_rate": 0.00046515442882593175, "loss": 1.4587, "step": 2654 }, { "epoch": 0.54, "learning_rate": 0.00046482777859454085, "loss": 1.4625, "step": 2655 }, { "epoch": 0.54, "learning_rate": 0.0004645011434486633, "loss": 1.4708, "step": 2656 }, { "epoch": 0.54, "learning_rate": 0.00046417452352839417, "loss": 1.4596, "step": 2657 }, { "epoch": 0.54, "learning_rate": 0.00046384791897382237, "loss": 1.4842, "step": 2658 }, { "epoch": 0.54, "learning_rate": 0.00046352132992502957, "loss": 1.4734, "step": 2659 }, { "epoch": 0.54, "learning_rate": 0.0004631947565220913, "loss": 1.4723, "step": 2660 }, { "epoch": 0.54, "learning_rate": 0.0004628681989050763, "loss": 1.4522, "step": 2661 }, { "epoch": 0.54, "learning_rate": 0.00046254165721404655, "loss": 1.4648, "step": 2662 }, { "epoch": 0.54, "learning_rate": 0.0004622151315890571, "loss": 1.4332, "step": 2663 }, { "epoch": 0.54, "learning_rate": 0.0004618886221701563, "loss": 1.4698, "step": 2664 }, { "epoch": 0.54, "learning_rate": 0.00046156212909738523, "loss": 1.4793, "step": 2665 }, { "epoch": 0.54, "learning_rate": 0.00046123565251077815, "loss": 1.4935, "step": 2666 }, { "epoch": 0.54, "learning_rate": 0.0004609091925503623, "loss": 1.466, "step": 2667 }, { "epoch": 0.54, "learning_rate": 0.0004605827493561577, "loss": 1.49, "step": 2668 }, { "epoch": 0.54, "learning_rate": 0.0004602563230681772, "loss": 1.4346, "step": 2669 }, { "epoch": 0.54, "learning_rate": 0.0004599299138264267, "loss": 1.4332, "step": 2670 }, { "epoch": 0.54, "learning_rate": 0.00045960352177090393, "loss": 1.4413, "step": 2671 }, { "epoch": 0.54, "learning_rate": 0.00045927714704160005, "loss": 1.4575, "step": 2672 }, { "epoch": 0.54, "learning_rate": 0.0004589507897784987, "loss": 1.4526, "step": 2673 }, { "epoch": 0.54, "learning_rate": 0.00045862445012157573, "loss": 1.4964, "step": 2674 }, { "epoch": 0.54, "learning_rate": 0.0004582981282107996, "loss": 1.4498, "step": 2675 }, { "epoch": 0.54, "learning_rate": 0.0004579718241861314, "loss": 1.4625, "step": 2676 }, { "epoch": 0.54, "learning_rate": 0.0004576455381875238, "loss": 1.4512, "step": 2677 }, { "epoch": 0.54, "learning_rate": 0.0004573192703549226, "loss": 1.4388, "step": 2678 }, { "epoch": 0.54, "learning_rate": 0.0004569930208282653, "loss": 1.473, "step": 2679 }, { "epoch": 0.54, "learning_rate": 0.00045666678974748176, "loss": 1.4887, "step": 2680 }, { "epoch": 0.54, "learning_rate": 0.0004563405772524939, "loss": 1.4767, "step": 2681 }, { "epoch": 0.54, "learning_rate": 0.0004560143834832157, "loss": 1.5001, "step": 2682 }, { "epoch": 0.54, "learning_rate": 0.0004556882085795527, "loss": 1.4793, "step": 2683 }, { "epoch": 0.54, "learning_rate": 0.0004553620526814029, "loss": 1.4638, "step": 2684 }, { "epoch": 0.54, "learning_rate": 0.0004550359159286559, "loss": 1.4492, "step": 2685 }, { "epoch": 0.54, "learning_rate": 0.0004547097984611931, "loss": 1.4662, "step": 2686 }, { "epoch": 0.54, "learning_rate": 0.00045438370041888775, "loss": 1.4367, "step": 2687 }, { "epoch": 0.54, "learning_rate": 0.0004540576219416048, "loss": 1.4429, "step": 2688 }, { "epoch": 0.54, "learning_rate": 0.0004537315631692002, "loss": 1.4673, "step": 2689 }, { "epoch": 0.54, "learning_rate": 0.0004534055242415223, "loss": 1.464, "step": 2690 }, { "epoch": 0.54, "learning_rate": 0.00045307950529841043, "loss": 1.4048, "step": 2691 }, { "epoch": 0.54, "learning_rate": 0.0004527535064796955, "loss": 1.5079, "step": 2692 }, { "epoch": 0.54, "learning_rate": 0.0004524275279251998, "loss": 1.4229, "step": 2693 }, { "epoch": 0.54, "learning_rate": 0.00045210156977473705, "loss": 1.4634, "step": 2694 }, { "epoch": 0.54, "learning_rate": 0.0004517756321681117, "loss": 1.4987, "step": 2695 }, { "epoch": 0.55, "learning_rate": 0.00045144971524512006, "loss": 1.4392, "step": 2696 }, { "epoch": 0.55, "learning_rate": 0.00045112381914554913, "loss": 1.4387, "step": 2697 }, { "epoch": 0.55, "learning_rate": 0.00045079794400917716, "loss": 1.4741, "step": 2698 }, { "epoch": 0.55, "learning_rate": 0.0004504720899757734, "loss": 1.438, "step": 2699 }, { "epoch": 0.55, "learning_rate": 0.0004501462571850981, "loss": 1.4504, "step": 2700 }, { "epoch": 0.55, "learning_rate": 0.000449820445776902, "loss": 1.4355, "step": 2701 }, { "epoch": 0.55, "learning_rate": 0.0004494946558909271, "loss": 1.4748, "step": 2702 }, { "epoch": 0.55, "learning_rate": 0.00044916888766690624, "loss": 1.4729, "step": 2703 }, { "epoch": 0.55, "learning_rate": 0.0004488431412445625, "loss": 1.4335, "step": 2704 }, { "epoch": 0.55, "learning_rate": 0.0004485174167636101, "loss": 1.4585, "step": 2705 }, { "epoch": 0.55, "learning_rate": 0.0004481917143637537, "loss": 1.495, "step": 2706 }, { "epoch": 0.55, "learning_rate": 0.0004478660341846881, "loss": 1.4534, "step": 2707 }, { "epoch": 0.55, "learning_rate": 0.00044754037636609916, "loss": 1.4181, "step": 2708 }, { "epoch": 0.55, "learning_rate": 0.00044721474104766277, "loss": 1.4884, "step": 2709 }, { "epoch": 0.55, "learning_rate": 0.0004468891283690454, "loss": 1.5079, "step": 2710 }, { "epoch": 0.55, "learning_rate": 0.0004465635384699037, "loss": 1.4571, "step": 2711 }, { "epoch": 0.55, "learning_rate": 0.00044623797148988424, "loss": 1.5198, "step": 2712 }, { "epoch": 0.55, "learning_rate": 0.00044591242756862446, "loss": 1.4659, "step": 2713 }, { "epoch": 0.55, "learning_rate": 0.0004455869068457513, "loss": 1.4505, "step": 2714 }, { "epoch": 0.55, "learning_rate": 0.00044526140946088194, "loss": 1.4235, "step": 2715 }, { "epoch": 0.55, "learning_rate": 0.00044493593555362365, "loss": 1.4642, "step": 2716 }, { "epoch": 0.55, "learning_rate": 0.0004446104852635735, "loss": 1.4693, "step": 2717 }, { "epoch": 0.55, "learning_rate": 0.00044428505873031856, "loss": 1.5188, "step": 2718 }, { "epoch": 0.55, "learning_rate": 0.00044395965609343583, "loss": 1.4328, "step": 2719 }, { "epoch": 0.55, "learning_rate": 0.0004436342774924914, "loss": 1.4454, "step": 2720 }, { "epoch": 0.55, "learning_rate": 0.00044330892306704175, "loss": 1.4564, "step": 2721 }, { "epoch": 0.55, "learning_rate": 0.0004429835929566327, "loss": 1.4798, "step": 2722 }, { "epoch": 0.55, "learning_rate": 0.0004426582873007998, "loss": 1.4819, "step": 2723 }, { "epoch": 0.55, "learning_rate": 0.00044233300623906796, "loss": 1.4564, "step": 2724 }, { "epoch": 0.55, "learning_rate": 0.0004420077499109517, "loss": 1.4591, "step": 2725 }, { "epoch": 0.55, "learning_rate": 0.00044168251845595464, "loss": 1.4884, "step": 2726 }, { "epoch": 0.55, "learning_rate": 0.00044135731201357, "loss": 1.481, "step": 2727 }, { "epoch": 0.55, "learning_rate": 0.00044103213072328013, "loss": 1.4555, "step": 2728 }, { "epoch": 0.55, "learning_rate": 0.0004407069747245568, "loss": 1.4679, "step": 2729 }, { "epoch": 0.55, "learning_rate": 0.00044038184415686057, "loss": 1.4424, "step": 2730 }, { "epoch": 0.55, "learning_rate": 0.00044005673915964167, "loss": 1.4693, "step": 2731 }, { "epoch": 0.55, "learning_rate": 0.00043973165987233853, "loss": 1.4794, "step": 2732 }, { "epoch": 0.55, "learning_rate": 0.0004394066064343793, "loss": 1.4491, "step": 2733 }, { "epoch": 0.55, "learning_rate": 0.0004390815789851806, "loss": 1.4789, "step": 2734 }, { "epoch": 0.55, "learning_rate": 0.00043875657766414823, "loss": 1.487, "step": 2735 }, { "epoch": 0.55, "learning_rate": 0.00043843160261067656, "loss": 1.4742, "step": 2736 }, { "epoch": 0.55, "learning_rate": 0.0004381066539641488, "loss": 1.4939, "step": 2737 }, { "epoch": 0.55, "learning_rate": 0.0004377817318639368, "loss": 1.4682, "step": 2738 }, { "epoch": 0.55, "learning_rate": 0.0004374568364494007, "loss": 1.4578, "step": 2739 }, { "epoch": 0.55, "learning_rate": 0.0004371319678598899, "loss": 1.4386, "step": 2740 }, { "epoch": 0.55, "learning_rate": 0.0004368071262347417, "loss": 1.4526, "step": 2741 }, { "epoch": 0.55, "learning_rate": 0.00043648231171328206, "loss": 1.4506, "step": 2742 }, { "epoch": 0.55, "learning_rate": 0.00043615752443482543, "loss": 1.5004, "step": 2743 }, { "epoch": 0.55, "learning_rate": 0.00043583276453867427, "loss": 1.4676, "step": 2744 }, { "epoch": 0.55, "learning_rate": 0.00043550803216411944, "loss": 1.4537, "step": 2745 }, { "epoch": 0.56, "learning_rate": 0.00043518332745044015, "loss": 1.4359, "step": 2746 }, { "epoch": 0.56, "learning_rate": 0.0004348586505369035, "loss": 1.4542, "step": 2747 }, { "epoch": 0.56, "learning_rate": 0.00043453400156276493, "loss": 1.502, "step": 2748 }, { "epoch": 0.56, "learning_rate": 0.0004342093806672678, "loss": 1.4625, "step": 2749 }, { "epoch": 0.56, "learning_rate": 0.000433884787989643, "loss": 1.4773, "step": 2750 }, { "epoch": 0.56, "learning_rate": 0.0004335602236691099, "loss": 1.4527, "step": 2751 }, { "epoch": 0.56, "learning_rate": 0.00043323568784487553, "loss": 1.459, "step": 2752 }, { "epoch": 0.56, "learning_rate": 0.0004329111806561347, "loss": 1.4442, "step": 2753 }, { "epoch": 0.56, "learning_rate": 0.00043258670224206983, "loss": 1.47, "step": 2754 }, { "epoch": 0.56, "learning_rate": 0.00043226225274185115, "loss": 1.4373, "step": 2755 }, { "epoch": 0.56, "learning_rate": 0.00043193783229463634, "loss": 1.4546, "step": 2756 }, { "epoch": 0.56, "learning_rate": 0.00043161344103957066, "loss": 1.4343, "step": 2757 }, { "epoch": 0.56, "learning_rate": 0.00043128907911578696, "loss": 1.4672, "step": 2758 }, { "epoch": 0.56, "learning_rate": 0.0004309647466624054, "loss": 1.4657, "step": 2759 }, { "epoch": 0.56, "learning_rate": 0.00043064044381853345, "loss": 1.4594, "step": 2760 }, { "epoch": 0.56, "learning_rate": 0.00043031617072326634, "loss": 1.451, "step": 2761 }, { "epoch": 0.56, "learning_rate": 0.00042999192751568563, "loss": 1.4173, "step": 2762 }, { "epoch": 0.56, "learning_rate": 0.00042966771433486074, "loss": 1.4419, "step": 2763 }, { "epoch": 0.56, "learning_rate": 0.0004293435313198482, "loss": 1.4703, "step": 2764 }, { "epoch": 0.56, "learning_rate": 0.00042901937860969144, "loss": 1.4805, "step": 2765 }, { "epoch": 0.56, "learning_rate": 0.0004286952563434208, "loss": 1.4793, "step": 2766 }, { "epoch": 0.56, "learning_rate": 0.00042837116466005413, "loss": 1.4161, "step": 2767 }, { "epoch": 0.56, "learning_rate": 0.000428047103698595, "loss": 1.471, "step": 2768 }, { "epoch": 0.56, "learning_rate": 0.00042772307359803486, "loss": 1.4459, "step": 2769 }, { "epoch": 0.56, "learning_rate": 0.00042739907449735167, "loss": 1.485, "step": 2770 }, { "epoch": 0.56, "learning_rate": 0.00042707510653550987, "loss": 1.4649, "step": 2771 }, { "epoch": 0.56, "learning_rate": 0.00042675116985146065, "loss": 1.4932, "step": 2772 }, { "epoch": 0.56, "learning_rate": 0.0004264272645841419, "loss": 1.4557, "step": 2773 }, { "epoch": 0.56, "learning_rate": 0.0004261033908724782, "loss": 1.476, "step": 2774 }, { "epoch": 0.56, "learning_rate": 0.00042577954885537986, "loss": 1.4463, "step": 2775 }, { "epoch": 0.56, "learning_rate": 0.00042545573867174425, "loss": 1.4538, "step": 2776 }, { "epoch": 0.56, "learning_rate": 0.000425131960460455, "loss": 1.4574, "step": 2777 }, { "epoch": 0.56, "learning_rate": 0.00042480821436038186, "loss": 1.5038, "step": 2778 }, { "epoch": 0.56, "learning_rate": 0.000424484500510381, "loss": 1.482, "step": 2779 }, { "epoch": 0.56, "learning_rate": 0.0004241608190492948, "loss": 1.4749, "step": 2780 }, { "epoch": 0.56, "learning_rate": 0.0004238371701159511, "loss": 1.4771, "step": 2781 }, { "epoch": 0.56, "learning_rate": 0.0004235135538491646, "loss": 1.497, "step": 2782 }, { "epoch": 0.56, "learning_rate": 0.0004231899703877356, "loss": 1.3925, "step": 2783 }, { "epoch": 0.56, "learning_rate": 0.0004228664198704505, "loss": 1.4672, "step": 2784 }, { "epoch": 0.56, "learning_rate": 0.00042254290243608136, "loss": 1.4397, "step": 2785 }, { "epoch": 0.56, "learning_rate": 0.0004222194182233863, "loss": 1.4709, "step": 2786 }, { "epoch": 0.56, "learning_rate": 0.000421895967371109, "loss": 1.4759, "step": 2787 }, { "epoch": 0.56, "learning_rate": 0.00042157255001797875, "loss": 1.4844, "step": 2788 }, { "epoch": 0.56, "learning_rate": 0.0004212491663027107, "loss": 1.4496, "step": 2789 }, { "epoch": 0.56, "learning_rate": 0.0004209258163640056, "loss": 1.4381, "step": 2790 }, { "epoch": 0.56, "learning_rate": 0.0004206025003405494, "loss": 1.4415, "step": 2791 }, { "epoch": 0.56, "learning_rate": 0.000420279218371014, "loss": 1.4883, "step": 2792 }, { "epoch": 0.56, "learning_rate": 0.00041995597059405604, "loss": 1.4435, "step": 2793 }, { "epoch": 0.56, "learning_rate": 0.00041963275714831797, "loss": 1.4428, "step": 2794 }, { "epoch": 0.57, "learning_rate": 0.00041930957817242745, "loss": 1.4383, "step": 2795 }, { "epoch": 0.57, "learning_rate": 0.00041898643380499723, "loss": 1.449, "step": 2796 }, { "epoch": 0.57, "learning_rate": 0.00041866332418462527, "loss": 1.4859, "step": 2797 }, { "epoch": 0.57, "learning_rate": 0.000418340249449895, "loss": 1.4688, "step": 2798 }, { "epoch": 0.57, "learning_rate": 0.00041801720973937395, "loss": 1.4591, "step": 2799 }, { "epoch": 0.57, "learning_rate": 0.0004176942051916155, "loss": 1.4966, "step": 2800 }, { "epoch": 0.57, "learning_rate": 0.0004173712359451576, "loss": 1.4742, "step": 2801 }, { "epoch": 0.57, "learning_rate": 0.0004170483021385232, "loss": 1.4549, "step": 2802 }, { "epoch": 0.57, "learning_rate": 0.00041672540391021984, "loss": 1.4188, "step": 2803 }, { "epoch": 0.57, "learning_rate": 0.00041640254139874015, "loss": 1.4819, "step": 2804 }, { "epoch": 0.57, "learning_rate": 0.0004160797147425609, "loss": 1.4558, "step": 2805 }, { "epoch": 0.57, "learning_rate": 0.000415756924080144, "loss": 1.4554, "step": 2806 }, { "epoch": 0.57, "learning_rate": 0.0004154341695499357, "loss": 1.478, "step": 2807 }, { "epoch": 0.57, "learning_rate": 0.0004151114512903667, "loss": 1.4086, "step": 2808 }, { "epoch": 0.57, "learning_rate": 0.0004147887694398523, "loss": 1.4573, "step": 2809 }, { "epoch": 0.57, "learning_rate": 0.0004144661241367922, "loss": 1.4779, "step": 2810 }, { "epoch": 0.57, "learning_rate": 0.00041414351551957, "loss": 1.4742, "step": 2811 }, { "epoch": 0.57, "learning_rate": 0.0004138209437265541, "loss": 1.4296, "step": 2812 }, { "epoch": 0.57, "learning_rate": 0.00041349840889609693, "loss": 1.4733, "step": 2813 }, { "epoch": 0.57, "learning_rate": 0.00041317591116653486, "loss": 1.4364, "step": 2814 }, { "epoch": 0.57, "learning_rate": 0.00041285345067618867, "loss": 1.4801, "step": 2815 }, { "epoch": 0.57, "learning_rate": 0.0004125310275633631, "loss": 1.4698, "step": 2816 }, { "epoch": 0.57, "learning_rate": 0.0004122086419663465, "loss": 1.4873, "step": 2817 }, { "epoch": 0.57, "learning_rate": 0.00041188629402341155, "loss": 1.5089, "step": 2818 }, { "epoch": 0.57, "learning_rate": 0.0004115639838728147, "loss": 1.4308, "step": 2819 }, { "epoch": 0.57, "learning_rate": 0.000411241711652796, "loss": 1.4832, "step": 2820 }, { "epoch": 0.57, "learning_rate": 0.00041091947750157944, "loss": 1.5058, "step": 2821 }, { "epoch": 0.57, "learning_rate": 0.00041059728155737286, "loss": 1.4616, "step": 2822 }, { "epoch": 0.57, "learning_rate": 0.0004102751239583669, "loss": 1.4633, "step": 2823 }, { "epoch": 0.57, "learning_rate": 0.0004099530048427366, "loss": 1.4391, "step": 2824 }, { "epoch": 0.57, "learning_rate": 0.00040963092434864013, "loss": 1.4863, "step": 2825 }, { "epoch": 0.57, "learning_rate": 0.0004093088826142192, "loss": 1.4958, "step": 2826 }, { "epoch": 0.57, "learning_rate": 0.00040898687977759893, "loss": 1.468, "step": 2827 }, { "epoch": 0.57, "learning_rate": 0.0004086649159768878, "loss": 1.437, "step": 2828 }, { "epoch": 0.57, "learning_rate": 0.0004083429913501771, "loss": 1.4269, "step": 2829 }, { "epoch": 0.57, "learning_rate": 0.0004080211060355418, "loss": 1.457, "step": 2830 }, { "epoch": 0.57, "learning_rate": 0.00040769926017103985, "loss": 1.422, "step": 2831 }, { "epoch": 0.57, "learning_rate": 0.00040737745389471234, "loss": 1.4336, "step": 2832 }, { "epoch": 0.57, "learning_rate": 0.0004070556873445833, "loss": 1.4824, "step": 2833 }, { "epoch": 0.57, "learning_rate": 0.0004067339606586599, "loss": 1.4835, "step": 2834 }, { "epoch": 0.57, "learning_rate": 0.0004064122739749318, "loss": 1.46, "step": 2835 }, { "epoch": 0.57, "learning_rate": 0.0004060906274313719, "loss": 1.4541, "step": 2836 }, { "epoch": 0.57, "learning_rate": 0.0004057690211659357, "loss": 1.424, "step": 2837 }, { "epoch": 0.57, "learning_rate": 0.00040544745531656155, "loss": 1.4395, "step": 2838 }, { "epoch": 0.57, "learning_rate": 0.0004051259300211704, "loss": 1.4081, "step": 2839 }, { "epoch": 0.57, "learning_rate": 0.00040480444541766573, "loss": 1.4719, "step": 2840 }, { "epoch": 0.57, "learning_rate": 0.00040448300164393393, "loss": 1.4848, "step": 2841 }, { "epoch": 0.57, "learning_rate": 0.0004041615988378431, "loss": 1.4598, "step": 2842 }, { "epoch": 0.57, "learning_rate": 0.0004038402371372444, "loss": 1.4631, "step": 2843 }, { "epoch": 0.57, "learning_rate": 0.0004035189166799713, "loss": 1.4442, "step": 2844 }, { "epoch": 0.58, "learning_rate": 0.0004031976376038395, "loss": 1.4716, "step": 2845 }, { "epoch": 0.58, "learning_rate": 0.00040287640004664694, "loss": 1.4647, "step": 2846 }, { "epoch": 0.58, "learning_rate": 0.0004025552041461738, "loss": 1.4514, "step": 2847 }, { "epoch": 0.58, "learning_rate": 0.0004022340500401822, "loss": 1.4695, "step": 2848 }, { "epoch": 0.58, "learning_rate": 0.00040191293786641656, "loss": 1.4502, "step": 2849 }, { "epoch": 0.58, "learning_rate": 0.0004015918677626033, "loss": 1.4327, "step": 2850 }, { "epoch": 0.58, "learning_rate": 0.0004012708398664506, "loss": 1.464, "step": 2851 }, { "epoch": 0.58, "learning_rate": 0.0004009498543156487, "loss": 1.4463, "step": 2852 }, { "epoch": 0.58, "learning_rate": 0.00040062891124787, "loss": 1.4584, "step": 2853 }, { "epoch": 0.58, "learning_rate": 0.00040030801080076777, "loss": 1.4804, "step": 2854 }, { "epoch": 0.58, "learning_rate": 0.0003999871531119779, "loss": 1.4891, "step": 2855 }, { "epoch": 0.58, "learning_rate": 0.00039966633831911737, "loss": 1.4705, "step": 2856 }, { "epoch": 0.58, "learning_rate": 0.0003993455665597852, "loss": 1.4584, "step": 2857 }, { "epoch": 0.58, "learning_rate": 0.0003990248379715617, "loss": 1.4343, "step": 2858 }, { "epoch": 0.58, "learning_rate": 0.0003987041526920089, "loss": 1.4622, "step": 2859 }, { "epoch": 0.58, "learning_rate": 0.00039838351085866953, "loss": 1.4763, "step": 2860 }, { "epoch": 0.58, "learning_rate": 0.00039806291260906864, "loss": 1.4553, "step": 2861 }, { "epoch": 0.58, "learning_rate": 0.000397742358080712, "loss": 1.3943, "step": 2862 }, { "epoch": 0.58, "learning_rate": 0.00039742184741108685, "loss": 1.5135, "step": 2863 }, { "epoch": 0.58, "learning_rate": 0.0003971013807376616, "loss": 1.4438, "step": 2864 }, { "epoch": 0.58, "learning_rate": 0.00039678095819788573, "loss": 1.467, "step": 2865 }, { "epoch": 0.58, "learning_rate": 0.0003964605799291897, "loss": 1.5005, "step": 2866 }, { "epoch": 0.58, "learning_rate": 0.0003961402460689852, "loss": 1.4495, "step": 2867 }, { "epoch": 0.58, "learning_rate": 0.00039581995675466475, "loss": 1.4519, "step": 2868 }, { "epoch": 0.58, "learning_rate": 0.00039549971212360173, "loss": 1.4553, "step": 2869 }, { "epoch": 0.58, "learning_rate": 0.0003951795123131505, "loss": 1.4933, "step": 2870 }, { "epoch": 0.58, "learning_rate": 0.00039485935746064627, "loss": 1.485, "step": 2871 }, { "epoch": 0.58, "learning_rate": 0.00039453924770340435, "loss": 1.4854, "step": 2872 }, { "epoch": 0.58, "learning_rate": 0.0003942191831787214, "loss": 1.4445, "step": 2873 }, { "epoch": 0.58, "learning_rate": 0.00039389916402387436, "loss": 1.4596, "step": 2874 }, { "epoch": 0.58, "learning_rate": 0.0003935791903761209, "loss": 1.4864, "step": 2875 }, { "epoch": 0.58, "learning_rate": 0.00039325926237269906, "loss": 1.4235, "step": 2876 }, { "epoch": 0.58, "learning_rate": 0.0003929393801508273, "loss": 1.4989, "step": 2877 }, { "epoch": 0.58, "learning_rate": 0.00039261954384770437, "loss": 1.4498, "step": 2878 }, { "epoch": 0.58, "learning_rate": 0.00039229975360050934, "loss": 1.4385, "step": 2879 }, { "epoch": 0.58, "learning_rate": 0.00039198000954640175, "loss": 1.4764, "step": 2880 }, { "epoch": 0.58, "learning_rate": 0.00039166031182252104, "loss": 1.4513, "step": 2881 }, { "epoch": 0.58, "learning_rate": 0.00039134066056598706, "loss": 1.4341, "step": 2882 }, { "epoch": 0.58, "learning_rate": 0.00039102105591389956, "loss": 1.4566, "step": 2883 }, { "epoch": 0.58, "learning_rate": 0.0003907014980033381, "loss": 1.4446, "step": 2884 }, { "epoch": 0.58, "learning_rate": 0.0003903819869713625, "loss": 1.4153, "step": 2885 }, { "epoch": 0.58, "learning_rate": 0.0003900625229550125, "loss": 1.4675, "step": 2886 }, { "epoch": 0.58, "learning_rate": 0.0003897431060913074, "loss": 1.4471, "step": 2887 }, { "epoch": 0.58, "learning_rate": 0.00038942373651724657, "loss": 1.4671, "step": 2888 }, { "epoch": 0.58, "learning_rate": 0.00038910441436980906, "loss": 1.4648, "step": 2889 }, { "epoch": 0.58, "learning_rate": 0.00038878513978595316, "loss": 1.4828, "step": 2890 }, { "epoch": 0.58, "learning_rate": 0.00038846591290261716, "loss": 1.4456, "step": 2891 }, { "epoch": 0.58, "learning_rate": 0.00038814673385671896, "loss": 1.4744, "step": 2892 }, { "epoch": 0.58, "learning_rate": 0.00038782760278515565, "loss": 1.4592, "step": 2893 }, { "epoch": 0.59, "learning_rate": 0.00038750851982480403, "loss": 1.4461, "step": 2894 }, { "epoch": 0.59, "learning_rate": 0.0003871894851125201, "loss": 1.4561, "step": 2895 }, { "epoch": 0.59, "learning_rate": 0.00038687049878513905, "loss": 1.4325, "step": 2896 }, { "epoch": 0.59, "learning_rate": 0.00038655156097947555, "loss": 1.4685, "step": 2897 }, { "epoch": 0.59, "learning_rate": 0.00038623267183232325, "loss": 1.4623, "step": 2898 }, { "epoch": 0.59, "learning_rate": 0.0003859138314804552, "loss": 1.4805, "step": 2899 }, { "epoch": 0.59, "learning_rate": 0.0003855950400606233, "loss": 1.4781, "step": 2900 }, { "epoch": 0.59, "learning_rate": 0.00038527629770955855, "loss": 1.4591, "step": 2901 }, { "epoch": 0.59, "learning_rate": 0.000384957604563971, "loss": 1.4885, "step": 2902 }, { "epoch": 0.59, "learning_rate": 0.0003846389607605492, "loss": 1.4338, "step": 2903 }, { "epoch": 0.59, "learning_rate": 0.0003843203664359609, "loss": 1.4131, "step": 2904 }, { "epoch": 0.59, "learning_rate": 0.00038400182172685266, "loss": 1.4518, "step": 2905 }, { "epoch": 0.59, "learning_rate": 0.0003836833267698495, "loss": 1.418, "step": 2906 }, { "epoch": 0.59, "learning_rate": 0.0003833648817015554, "loss": 1.4758, "step": 2907 }, { "epoch": 0.59, "learning_rate": 0.0003830464866585527, "loss": 1.4717, "step": 2908 }, { "epoch": 0.59, "learning_rate": 0.00038272814177740236, "loss": 1.4451, "step": 2909 }, { "epoch": 0.59, "learning_rate": 0.00038240984719464376, "loss": 1.4726, "step": 2910 }, { "epoch": 0.59, "learning_rate": 0.0003820916030467949, "loss": 1.4691, "step": 2911 }, { "epoch": 0.59, "learning_rate": 0.00038177340947035207, "loss": 1.4652, "step": 2912 }, { "epoch": 0.59, "learning_rate": 0.00038145526660178963, "loss": 1.489, "step": 2913 }, { "epoch": 0.59, "learning_rate": 0.00038113717457756083, "loss": 1.4793, "step": 2914 }, { "epoch": 0.59, "learning_rate": 0.0003808191335340961, "loss": 1.5374, "step": 2915 }, { "epoch": 0.59, "learning_rate": 0.00038050114360780484, "loss": 1.461, "step": 2916 }, { "epoch": 0.59, "learning_rate": 0.0003801832049350743, "loss": 1.4753, "step": 2917 }, { "epoch": 0.59, "learning_rate": 0.0003798653176522696, "loss": 1.4726, "step": 2918 }, { "epoch": 0.59, "learning_rate": 0.0003795474818957341, "loss": 1.4282, "step": 2919 }, { "epoch": 0.59, "learning_rate": 0.000379229697801789, "loss": 1.4606, "step": 2920 }, { "epoch": 0.59, "learning_rate": 0.0003789119655067327, "loss": 1.4397, "step": 2921 }, { "epoch": 0.59, "learning_rate": 0.00037859428514684223, "loss": 1.4812, "step": 2922 }, { "epoch": 0.59, "learning_rate": 0.0003782766568583721, "loss": 1.4969, "step": 2923 }, { "epoch": 0.59, "learning_rate": 0.0003779590807775544, "loss": 1.4543, "step": 2924 }, { "epoch": 0.59, "learning_rate": 0.00037764155704059866, "loss": 1.4797, "step": 2925 }, { "epoch": 0.59, "learning_rate": 0.0003773240857836925, "loss": 1.458, "step": 2926 }, { "epoch": 0.59, "learning_rate": 0.00037700666714300023, "loss": 1.4402, "step": 2927 }, { "epoch": 0.59, "learning_rate": 0.00037668930125466427, "loss": 1.4929, "step": 2928 }, { "epoch": 0.59, "learning_rate": 0.00037637198825480424, "loss": 1.4483, "step": 2929 }, { "epoch": 0.59, "learning_rate": 0.00037605472827951684, "loss": 1.4358, "step": 2930 }, { "epoch": 0.59, "learning_rate": 0.0003757375214648764, "loss": 1.4877, "step": 2931 }, { "epoch": 0.59, "learning_rate": 0.0003754203679469344, "loss": 1.4441, "step": 2932 }, { "epoch": 0.59, "learning_rate": 0.0003751032678617187, "loss": 1.4222, "step": 2933 }, { "epoch": 0.59, "learning_rate": 0.0003747862213452353, "loss": 1.4845, "step": 2934 }, { "epoch": 0.59, "learning_rate": 0.0003744692285334668, "loss": 1.4437, "step": 2935 }, { "epoch": 0.59, "learning_rate": 0.0003741522895623725, "loss": 1.498, "step": 2936 }, { "epoch": 0.59, "learning_rate": 0.0003738354045678891, "loss": 1.4389, "step": 2937 }, { "epoch": 0.59, "learning_rate": 0.0003735185736859299, "loss": 1.486, "step": 2938 }, { "epoch": 0.59, "learning_rate": 0.0003732017970523848, "loss": 1.4795, "step": 2939 }, { "epoch": 0.59, "learning_rate": 0.0003728850748031207, "loss": 1.4805, "step": 2940 }, { "epoch": 0.59, "learning_rate": 0.00037256840707398123, "loss": 1.4049, "step": 2941 }, { "epoch": 0.59, "learning_rate": 0.0003722517940007863, "loss": 1.4315, "step": 2942 }, { "epoch": 0.59, "learning_rate": 0.0003719352357193327, "loss": 1.4638, "step": 2943 }, { "epoch": 0.6, "learning_rate": 0.0003716187323653939, "loss": 1.455, "step": 2944 }, { "epoch": 0.6, "learning_rate": 0.0003713022840747189, "loss": 1.4291, "step": 2945 }, { "epoch": 0.6, "learning_rate": 0.0003709858909830341, "loss": 1.4713, "step": 2946 }, { "epoch": 0.6, "learning_rate": 0.0003706695532260417, "loss": 1.4346, "step": 2947 }, { "epoch": 0.6, "learning_rate": 0.0003703532709394203, "loss": 1.4862, "step": 2948 }, { "epoch": 0.6, "learning_rate": 0.0003700370442588248, "loss": 1.4495, "step": 2949 }, { "epoch": 0.6, "learning_rate": 0.0003697208733198862, "loss": 1.4676, "step": 2950 }, { "epoch": 0.6, "learning_rate": 0.0003694047582582113, "loss": 1.4695, "step": 2951 }, { "epoch": 0.6, "learning_rate": 0.00036908869920938326, "loss": 1.4427, "step": 2952 }, { "epoch": 0.6, "learning_rate": 0.00036877269630896113, "loss": 1.4498, "step": 2953 }, { "epoch": 0.6, "learning_rate": 0.00036845674969247994, "loss": 1.4435, "step": 2954 }, { "epoch": 0.6, "learning_rate": 0.0003681408594954505, "loss": 1.4267, "step": 2955 }, { "epoch": 0.6, "learning_rate": 0.00036782502585335956, "loss": 1.4615, "step": 2956 }, { "epoch": 0.6, "learning_rate": 0.00036750924890166926, "loss": 1.4686, "step": 2957 }, { "epoch": 0.6, "learning_rate": 0.0003671935287758177, "loss": 1.479, "step": 2958 }, { "epoch": 0.6, "learning_rate": 0.0003668778656112187, "loss": 1.4655, "step": 2959 }, { "epoch": 0.6, "learning_rate": 0.00036656225954326147, "loss": 1.4442, "step": 2960 }, { "epoch": 0.6, "learning_rate": 0.0003662467107073107, "loss": 1.4799, "step": 2961 }, { "epoch": 0.6, "learning_rate": 0.0003659312192387069, "loss": 1.4577, "step": 2962 }, { "epoch": 0.6, "learning_rate": 0.0003656157852727652, "loss": 1.4428, "step": 2963 }, { "epoch": 0.6, "learning_rate": 0.00036530040894477685, "loss": 1.4944, "step": 2964 }, { "epoch": 0.6, "learning_rate": 0.000364985090390008, "loss": 1.4748, "step": 2965 }, { "epoch": 0.6, "learning_rate": 0.0003646698297437001, "loss": 1.4851, "step": 2966 }, { "epoch": 0.6, "learning_rate": 0.00036435462714106987, "loss": 1.4469, "step": 2967 }, { "epoch": 0.6, "learning_rate": 0.0003640394827173089, "loss": 1.4371, "step": 2968 }, { "epoch": 0.6, "learning_rate": 0.0003637243966075842, "loss": 1.4357, "step": 2969 }, { "epoch": 0.6, "learning_rate": 0.0003634093689470371, "loss": 1.4293, "step": 2970 }, { "epoch": 0.6, "learning_rate": 0.00036309439987078455, "loss": 1.4711, "step": 2971 }, { "epoch": 0.6, "learning_rate": 0.0003627794895139181, "loss": 1.414, "step": 2972 }, { "epoch": 0.6, "learning_rate": 0.0003624646380115041, "loss": 1.4393, "step": 2973 }, { "epoch": 0.6, "learning_rate": 0.00036214984549858367, "loss": 1.4671, "step": 2974 }, { "epoch": 0.6, "learning_rate": 0.00036183511211017284, "loss": 1.4536, "step": 2975 }, { "epoch": 0.6, "learning_rate": 0.00036152043798126166, "loss": 1.4648, "step": 2976 }, { "epoch": 0.6, "learning_rate": 0.0003612058232468154, "loss": 1.4456, "step": 2977 }, { "epoch": 0.6, "learning_rate": 0.0003608912680417737, "loss": 1.4149, "step": 2978 }, { "epoch": 0.6, "learning_rate": 0.0003605767725010505, "loss": 1.4637, "step": 2979 }, { "epoch": 0.6, "learning_rate": 0.00036026233675953435, "loss": 1.4926, "step": 2980 }, { "epoch": 0.6, "learning_rate": 0.00035994796095208825, "loss": 1.4857, "step": 2981 }, { "epoch": 0.6, "learning_rate": 0.0003596336452135488, "loss": 1.4546, "step": 2982 }, { "epoch": 0.6, "learning_rate": 0.0003593193896787277, "loss": 1.4797, "step": 2983 }, { "epoch": 0.6, "learning_rate": 0.00035900519448241023, "loss": 1.4481, "step": 2984 }, { "epoch": 0.6, "learning_rate": 0.0003586910597593563, "loss": 1.423, "step": 2985 }, { "epoch": 0.6, "learning_rate": 0.00035837698564429945, "loss": 1.4983, "step": 2986 }, { "epoch": 0.6, "learning_rate": 0.0003580629722719475, "loss": 1.5208, "step": 2987 }, { "epoch": 0.6, "learning_rate": 0.00035774901977698195, "loss": 1.4182, "step": 2988 }, { "epoch": 0.6, "learning_rate": 0.0003574351282940584, "loss": 1.4947, "step": 2989 }, { "epoch": 0.6, "learning_rate": 0.0003571212979578062, "loss": 1.4532, "step": 2990 }, { "epoch": 0.6, "learning_rate": 0.00035680752890282865, "loss": 1.4463, "step": 2991 }, { "epoch": 0.6, "learning_rate": 0.0003564938212637023, "loss": 1.4426, "step": 2992 }, { "epoch": 0.61, "learning_rate": 0.00035618017517497825, "loss": 1.459, "step": 2993 }, { "epoch": 0.61, "learning_rate": 0.00035586659077118, "loss": 1.4209, "step": 2994 }, { "epoch": 0.61, "learning_rate": 0.0003555530681868054, "loss": 1.4675, "step": 2995 }, { "epoch": 0.61, "learning_rate": 0.0003552396075563257, "loss": 1.4382, "step": 2996 }, { "epoch": 0.61, "learning_rate": 0.0003549262090141855, "loss": 1.4495, "step": 2997 }, { "epoch": 0.61, "learning_rate": 0.00035461287269480257, "loss": 1.4424, "step": 2998 }, { "epoch": 0.61, "learning_rate": 0.00035429959873256844, "loss": 1.4648, "step": 2999 }, { "epoch": 0.61, "learning_rate": 0.0003539863872618474, "loss": 1.4383, "step": 3000 }, { "epoch": 0.61, "learning_rate": 0.00035367323841697723, "loss": 1.4621, "step": 3001 }, { "epoch": 0.61, "learning_rate": 0.0003533601523322688, "loss": 1.4341, "step": 3002 }, { "epoch": 0.61, "learning_rate": 0.000353047129142006, "loss": 1.481, "step": 3003 }, { "epoch": 0.61, "learning_rate": 0.00035273416898044585, "loss": 1.4522, "step": 3004 }, { "epoch": 0.61, "learning_rate": 0.00035242127198181853, "loss": 1.4414, "step": 3005 }, { "epoch": 0.61, "learning_rate": 0.0003521084382803262, "loss": 1.4216, "step": 3006 }, { "epoch": 0.61, "learning_rate": 0.00035179566801014515, "loss": 1.4291, "step": 3007 }, { "epoch": 0.61, "learning_rate": 0.0003514829613054236, "loss": 1.4307, "step": 3008 }, { "epoch": 0.61, "learning_rate": 0.0003511703183002827, "loss": 1.5083, "step": 3009 }, { "epoch": 0.61, "learning_rate": 0.0003508577391288166, "loss": 1.4675, "step": 3010 }, { "epoch": 0.61, "learning_rate": 0.0003505452239250918, "loss": 1.4601, "step": 3011 }, { "epoch": 0.61, "learning_rate": 0.00035023277282314713, "loss": 1.4257, "step": 3012 }, { "epoch": 0.61, "learning_rate": 0.0003499203859569943, "loss": 1.3892, "step": 3013 }, { "epoch": 0.61, "learning_rate": 0.00034960806346061735, "loss": 1.4654, "step": 3014 }, { "epoch": 0.61, "learning_rate": 0.00034929580546797277, "loss": 1.443, "step": 3015 }, { "epoch": 0.61, "learning_rate": 0.00034898361211298923, "loss": 1.4636, "step": 3016 }, { "epoch": 0.61, "learning_rate": 0.0003486714835295679, "loss": 1.434, "step": 3017 }, { "epoch": 0.61, "learning_rate": 0.0003483594198515818, "loss": 1.4353, "step": 3018 }, { "epoch": 0.61, "learning_rate": 0.00034804742121287657, "loss": 1.4648, "step": 3019 }, { "epoch": 0.61, "learning_rate": 0.0003477354877472697, "loss": 1.4575, "step": 3020 }, { "epoch": 0.61, "learning_rate": 0.0003474236195885507, "loss": 1.4663, "step": 3021 }, { "epoch": 0.61, "learning_rate": 0.0003471118168704811, "loss": 1.4847, "step": 3022 }, { "epoch": 0.61, "learning_rate": 0.00034680007972679475, "loss": 1.4784, "step": 3023 }, { "epoch": 0.61, "learning_rate": 0.0003464884082911964, "loss": 1.4708, "step": 3024 }, { "epoch": 0.61, "learning_rate": 0.00034617680269736367, "loss": 1.4552, "step": 3025 }, { "epoch": 0.61, "learning_rate": 0.00034586526307894534, "loss": 1.4683, "step": 3026 }, { "epoch": 0.61, "learning_rate": 0.00034555378956956215, "loss": 1.4886, "step": 3027 }, { "epoch": 0.61, "learning_rate": 0.00034524238230280633, "loss": 1.4848, "step": 3028 }, { "epoch": 0.61, "learning_rate": 0.0003449310414122417, "loss": 1.4636, "step": 3029 }, { "epoch": 0.61, "learning_rate": 0.00034461976703140394, "loss": 1.4739, "step": 3030 }, { "epoch": 0.61, "learning_rate": 0.0003443085592937997, "loss": 1.446, "step": 3031 }, { "epoch": 0.61, "learning_rate": 0.00034399741833290734, "loss": 1.4528, "step": 3032 }, { "epoch": 0.61, "learning_rate": 0.0003436863442821765, "loss": 1.4325, "step": 3033 }, { "epoch": 0.61, "learning_rate": 0.0003433753372750281, "loss": 1.4762, "step": 3034 }, { "epoch": 0.61, "learning_rate": 0.0003430643974448545, "loss": 1.4404, "step": 3035 }, { "epoch": 0.61, "learning_rate": 0.0003427535249250192, "loss": 1.4459, "step": 3036 }, { "epoch": 0.61, "learning_rate": 0.0003424427198488564, "loss": 1.473, "step": 3037 }, { "epoch": 0.61, "learning_rate": 0.0003421319823496718, "loss": 1.4589, "step": 3038 }, { "epoch": 0.61, "learning_rate": 0.000341821312560742, "loss": 1.4584, "step": 3039 }, { "epoch": 0.61, "learning_rate": 0.00034151071061531475, "loss": 1.4633, "step": 3040 }, { "epoch": 0.61, "learning_rate": 0.00034120017664660836, "loss": 1.4726, "step": 3041 }, { "epoch": 0.61, "learning_rate": 0.0003408897107878124, "loss": 1.4279, "step": 3042 }, { "epoch": 0.62, "learning_rate": 0.0003405793131720866, "loss": 1.4327, "step": 3043 }, { "epoch": 0.62, "learning_rate": 0.000340268983932562, "loss": 1.494, "step": 3044 }, { "epoch": 0.62, "learning_rate": 0.0003399587232023401, "loss": 1.4651, "step": 3045 }, { "epoch": 0.62, "learning_rate": 0.000339648531114493, "loss": 1.4729, "step": 3046 }, { "epoch": 0.62, "learning_rate": 0.0003393384078020634, "loss": 1.4677, "step": 3047 }, { "epoch": 0.62, "learning_rate": 0.0003390283533980646, "loss": 1.4587, "step": 3048 }, { "epoch": 0.62, "learning_rate": 0.0003387183680354801, "loss": 1.4394, "step": 3049 }, { "epoch": 0.62, "learning_rate": 0.00033840845184726387, "loss": 1.4648, "step": 3050 }, { "epoch": 0.62, "learning_rate": 0.0003380986049663405, "loss": 1.4591, "step": 3051 }, { "epoch": 0.62, "learning_rate": 0.00033778882752560436, "loss": 1.463, "step": 3052 }, { "epoch": 0.62, "learning_rate": 0.0003374791196579204, "loss": 1.458, "step": 3053 }, { "epoch": 0.62, "learning_rate": 0.0003371694814961238, "loss": 1.4379, "step": 3054 }, { "epoch": 0.62, "learning_rate": 0.00033685991317301936, "loss": 1.4732, "step": 3055 }, { "epoch": 0.62, "learning_rate": 0.00033655041482138234, "loss": 1.4451, "step": 3056 }, { "epoch": 0.62, "learning_rate": 0.00033624098657395786, "loss": 1.4435, "step": 3057 }, { "epoch": 0.62, "learning_rate": 0.0003359316285634611, "loss": 1.4361, "step": 3058 }, { "epoch": 0.62, "learning_rate": 0.00033562234092257695, "loss": 1.4386, "step": 3059 }, { "epoch": 0.62, "learning_rate": 0.00033531312378396025, "loss": 1.4321, "step": 3060 }, { "epoch": 0.62, "learning_rate": 0.00033500397728023537, "loss": 1.4622, "step": 3061 }, { "epoch": 0.62, "learning_rate": 0.0003346949015439966, "loss": 1.4607, "step": 3062 }, { "epoch": 0.62, "learning_rate": 0.0003343858967078079, "loss": 1.4264, "step": 3063 }, { "epoch": 0.62, "learning_rate": 0.00033407696290420265, "loss": 1.4426, "step": 3064 }, { "epoch": 0.62, "learning_rate": 0.00033376810026568385, "loss": 1.4741, "step": 3065 }, { "epoch": 0.62, "learning_rate": 0.0003334593089247241, "loss": 1.4675, "step": 3066 }, { "epoch": 0.62, "learning_rate": 0.00033315058901376516, "loss": 1.4716, "step": 3067 }, { "epoch": 0.62, "learning_rate": 0.00033284194066521836, "loss": 1.4562, "step": 3068 }, { "epoch": 0.62, "learning_rate": 0.00033253336401146424, "loss": 1.4234, "step": 3069 }, { "epoch": 0.62, "learning_rate": 0.00033222485918485265, "loss": 1.4483, "step": 3070 }, { "epoch": 0.62, "learning_rate": 0.0003319164263177026, "loss": 1.4471, "step": 3071 }, { "epoch": 0.62, "learning_rate": 0.00033160806554230243, "loss": 1.425, "step": 3072 }, { "epoch": 0.62, "learning_rate": 0.00033129977699090896, "loss": 1.4575, "step": 3073 }, { "epoch": 0.62, "learning_rate": 0.0003309915607957487, "loss": 1.4336, "step": 3074 }, { "epoch": 0.62, "learning_rate": 0.0003306834170890168, "loss": 1.4512, "step": 3075 }, { "epoch": 0.62, "learning_rate": 0.00033037534600287744, "loss": 1.4597, "step": 3076 }, { "epoch": 0.62, "learning_rate": 0.00033006734766946355, "loss": 1.4542, "step": 3077 }, { "epoch": 0.62, "learning_rate": 0.00032975942222087713, "loss": 1.5007, "step": 3078 }, { "epoch": 0.62, "learning_rate": 0.00032945156978918835, "loss": 1.481, "step": 3079 }, { "epoch": 0.62, "learning_rate": 0.0003291437905064365, "loss": 1.5101, "step": 3080 }, { "epoch": 0.62, "learning_rate": 0.0003288360845046295, "loss": 1.4624, "step": 3081 }, { "epoch": 0.62, "learning_rate": 0.0003285284519157438, "loss": 1.4124, "step": 3082 }, { "epoch": 0.62, "learning_rate": 0.0003282208928717241, "loss": 1.4976, "step": 3083 }, { "epoch": 0.62, "learning_rate": 0.00032791340750448414, "loss": 1.4444, "step": 3084 }, { "epoch": 0.62, "learning_rate": 0.00032760599594590525, "loss": 1.5009, "step": 3085 }, { "epoch": 0.62, "learning_rate": 0.0003272986583278376, "loss": 1.4708, "step": 3086 }, { "epoch": 0.62, "learning_rate": 0.0003269913947820998, "loss": 1.489, "step": 3087 }, { "epoch": 0.62, "learning_rate": 0.0003266842054404783, "loss": 1.4522, "step": 3088 }, { "epoch": 0.62, "learning_rate": 0.00032637709043472805, "loss": 1.4506, "step": 3089 }, { "epoch": 0.62, "learning_rate": 0.0003260700498965721, "loss": 1.4564, "step": 3090 }, { "epoch": 0.62, "learning_rate": 0.0003257630839577009, "loss": 1.3944, "step": 3091 }, { "epoch": 0.63, "learning_rate": 0.0003254561927497738, "loss": 1.461, "step": 3092 }, { "epoch": 0.63, "learning_rate": 0.0003251493764044176, "loss": 1.4673, "step": 3093 }, { "epoch": 0.63, "learning_rate": 0.00032484263505322717, "loss": 1.4424, "step": 3094 }, { "epoch": 0.63, "learning_rate": 0.0003245359688277653, "loss": 1.4256, "step": 3095 }, { "epoch": 0.63, "learning_rate": 0.0003242293778595622, "loss": 1.4842, "step": 3096 }, { "epoch": 0.63, "learning_rate": 0.00032392286228011627, "loss": 1.4316, "step": 3097 }, { "epoch": 0.63, "learning_rate": 0.00032361642222089295, "loss": 1.4808, "step": 3098 }, { "epoch": 0.63, "learning_rate": 0.000323310057813326, "loss": 1.4435, "step": 3099 }, { "epoch": 0.63, "learning_rate": 0.00032300376918881625, "loss": 1.4487, "step": 3100 }, { "epoch": 0.63, "learning_rate": 0.00032269755647873217, "loss": 1.427, "step": 3101 }, { "epoch": 0.63, "learning_rate": 0.0003223914198144098, "loss": 1.4921, "step": 3102 }, { "epoch": 0.63, "learning_rate": 0.00032208535932715245, "loss": 1.455, "step": 3103 }, { "epoch": 0.63, "learning_rate": 0.0003217793751482305, "loss": 1.4258, "step": 3104 }, { "epoch": 0.63, "learning_rate": 0.00032147346740888207, "loss": 1.4731, "step": 3105 }, { "epoch": 0.63, "learning_rate": 0.0003211676362403121, "loss": 1.47, "step": 3106 }, { "epoch": 0.63, "learning_rate": 0.00032086188177369305, "loss": 1.4638, "step": 3107 }, { "epoch": 0.63, "learning_rate": 0.00032055620414016414, "loss": 1.4684, "step": 3108 }, { "epoch": 0.63, "learning_rate": 0.00032025060347083196, "loss": 1.4236, "step": 3109 }, { "epoch": 0.63, "learning_rate": 0.0003199450798967697, "loss": 1.4584, "step": 3110 }, { "epoch": 0.63, "learning_rate": 0.00031963963354901786, "loss": 1.4533, "step": 3111 }, { "epoch": 0.63, "learning_rate": 0.0003193342645585836, "loss": 1.4335, "step": 3112 }, { "epoch": 0.63, "learning_rate": 0.000319028973056441, "loss": 1.4756, "step": 3113 }, { "epoch": 0.63, "learning_rate": 0.0003187237591735308, "loss": 1.4782, "step": 3114 }, { "epoch": 0.63, "learning_rate": 0.0003184186230407608, "loss": 1.4841, "step": 3115 }, { "epoch": 0.63, "learning_rate": 0.0003181135647890047, "loss": 1.502, "step": 3116 }, { "epoch": 0.63, "learning_rate": 0.0003178085845491035, "loss": 1.4719, "step": 3117 }, { "epoch": 0.63, "learning_rate": 0.00031750368245186446, "loss": 1.4458, "step": 3118 }, { "epoch": 0.63, "learning_rate": 0.0003171988586280614, "loss": 1.4611, "step": 3119 }, { "epoch": 0.63, "learning_rate": 0.00031689411320843456, "loss": 1.4645, "step": 3120 }, { "epoch": 0.63, "learning_rate": 0.0003165894463236907, "loss": 1.4212, "step": 3121 }, { "epoch": 0.63, "learning_rate": 0.00031628485810450234, "loss": 1.4457, "step": 3122 }, { "epoch": 0.63, "learning_rate": 0.0003159803486815089, "loss": 1.4223, "step": 3123 }, { "epoch": 0.63, "learning_rate": 0.0003156759181853156, "loss": 1.4044, "step": 3124 }, { "epoch": 0.63, "learning_rate": 0.00031537156674649403, "loss": 1.4679, "step": 3125 }, { "epoch": 0.63, "learning_rate": 0.0003150672944955818, "loss": 1.4517, "step": 3126 }, { "epoch": 0.63, "learning_rate": 0.00031476310156308273, "loss": 1.4481, "step": 3127 }, { "epoch": 0.63, "learning_rate": 0.00031445898807946616, "loss": 1.4671, "step": 3128 }, { "epoch": 0.63, "learning_rate": 0.0003141549541751677, "loss": 1.4588, "step": 3129 }, { "epoch": 0.63, "learning_rate": 0.00031385099998058885, "loss": 1.4381, "step": 3130 }, { "epoch": 0.63, "learning_rate": 0.0003135471256260968, "loss": 1.4612, "step": 3131 }, { "epoch": 0.63, "learning_rate": 0.00031324333124202453, "loss": 1.4554, "step": 3132 }, { "epoch": 0.63, "learning_rate": 0.00031293961695867103, "loss": 1.4859, "step": 3133 }, { "epoch": 0.63, "learning_rate": 0.0003126359829063001, "loss": 1.4881, "step": 3134 }, { "epoch": 0.63, "learning_rate": 0.0003123324292151418, "loss": 1.4874, "step": 3135 }, { "epoch": 0.63, "learning_rate": 0.00031202895601539185, "loss": 1.4629, "step": 3136 }, { "epoch": 0.63, "learning_rate": 0.0003117255634372109, "loss": 1.4721, "step": 3137 }, { "epoch": 0.63, "learning_rate": 0.0003114222516107255, "loss": 1.4548, "step": 3138 }, { "epoch": 0.63, "learning_rate": 0.00031111902066602727, "loss": 1.4839, "step": 3139 }, { "epoch": 0.63, "learning_rate": 0.00031081587073317316, "loss": 1.4028, "step": 3140 }, { "epoch": 0.63, "learning_rate": 0.0003105128019421855, "loss": 1.4472, "step": 3141 }, { "epoch": 0.64, "learning_rate": 0.0003102098144230519, "loss": 1.452, "step": 3142 }, { "epoch": 0.64, "learning_rate": 0.0003099069083057247, "loss": 1.4547, "step": 3143 }, { "epoch": 0.64, "learning_rate": 0.00030960408372012187, "loss": 1.4549, "step": 3144 }, { "epoch": 0.64, "learning_rate": 0.0003093013407961263, "loss": 1.4603, "step": 3145 }, { "epoch": 0.64, "learning_rate": 0.0003089986796635851, "loss": 1.4665, "step": 3146 }, { "epoch": 0.64, "learning_rate": 0.0003086961004523114, "loss": 1.4379, "step": 3147 }, { "epoch": 0.64, "learning_rate": 0.00030839360329208243, "loss": 1.4675, "step": 3148 }, { "epoch": 0.64, "learning_rate": 0.0003080911883126407, "loss": 1.4774, "step": 3149 }, { "epoch": 0.64, "learning_rate": 0.0003077888556436931, "loss": 1.4615, "step": 3150 }, { "epoch": 0.64, "learning_rate": 0.0003074866054149116, "loss": 1.4516, "step": 3151 }, { "epoch": 0.64, "learning_rate": 0.0003071844377559323, "loss": 1.4496, "step": 3152 }, { "epoch": 0.64, "learning_rate": 0.0003068823527963562, "loss": 1.451, "step": 3153 }, { "epoch": 0.64, "learning_rate": 0.0003065803506657491, "loss": 1.44, "step": 3154 }, { "epoch": 0.64, "learning_rate": 0.0003062784314936407, "loss": 1.4505, "step": 3155 }, { "epoch": 0.64, "learning_rate": 0.00030597659540952546, "loss": 1.4738, "step": 3156 }, { "epoch": 0.64, "learning_rate": 0.00030567484254286217, "loss": 1.4453, "step": 3157 }, { "epoch": 0.64, "learning_rate": 0.00030537317302307406, "loss": 1.4066, "step": 3158 }, { "epoch": 0.64, "learning_rate": 0.00030507158697954816, "loss": 1.4521, "step": 3159 }, { "epoch": 0.64, "learning_rate": 0.0003047700845416361, "loss": 1.4327, "step": 3160 }, { "epoch": 0.64, "learning_rate": 0.0003044686658386537, "loss": 1.4396, "step": 3161 }, { "epoch": 0.64, "learning_rate": 0.0003041673309998805, "loss": 1.4461, "step": 3162 }, { "epoch": 0.64, "learning_rate": 0.0003038660801545603, "loss": 1.4488, "step": 3163 }, { "epoch": 0.64, "learning_rate": 0.00030356491343190117, "loss": 1.472, "step": 3164 }, { "epoch": 0.64, "learning_rate": 0.0003032638309610742, "loss": 1.4441, "step": 3165 }, { "epoch": 0.64, "learning_rate": 0.0003029628328712154, "loss": 1.4767, "step": 3166 }, { "epoch": 0.64, "learning_rate": 0.0003026619192914238, "loss": 1.4836, "step": 3167 }, { "epoch": 0.64, "learning_rate": 0.00030236109035076275, "loss": 1.4684, "step": 3168 }, { "epoch": 0.64, "learning_rate": 0.0003020603461782589, "loss": 1.4903, "step": 3169 }, { "epoch": 0.64, "learning_rate": 0.0003017596869029028, "loss": 1.4533, "step": 3170 }, { "epoch": 0.64, "learning_rate": 0.00030145911265364836, "loss": 1.4576, "step": 3171 }, { "epoch": 0.64, "learning_rate": 0.00030115862355941314, "loss": 1.4401, "step": 3172 }, { "epoch": 0.64, "learning_rate": 0.0003008582197490782, "loss": 1.4832, "step": 3173 }, { "epoch": 0.64, "learning_rate": 0.000300557901351488, "loss": 1.456, "step": 3174 }, { "epoch": 0.64, "learning_rate": 0.0003002576684954504, "loss": 1.4483, "step": 3175 }, { "epoch": 0.64, "learning_rate": 0.00029995752130973665, "loss": 1.4729, "step": 3176 }, { "epoch": 0.64, "learning_rate": 0.0002996574599230808, "loss": 1.4439, "step": 3177 }, { "epoch": 0.64, "learning_rate": 0.0002993574844641807, "loss": 1.4466, "step": 3178 }, { "epoch": 0.64, "learning_rate": 0.00029905759506169684, "loss": 1.4384, "step": 3179 }, { "epoch": 0.64, "learning_rate": 0.00029875779184425333, "loss": 1.4816, "step": 3180 }, { "epoch": 0.64, "learning_rate": 0.00029845807494043687, "loss": 1.4099, "step": 3181 }, { "epoch": 0.64, "learning_rate": 0.0002981584444787975, "loss": 1.4341, "step": 3182 }, { "epoch": 0.64, "learning_rate": 0.0002978589005878476, "loss": 1.4306, "step": 3183 }, { "epoch": 0.64, "learning_rate": 0.00029755944339606306, "loss": 1.455, "step": 3184 }, { "epoch": 0.64, "learning_rate": 0.00029726007303188223, "loss": 1.4579, "step": 3185 }, { "epoch": 0.64, "learning_rate": 0.0002969607896237064, "loss": 1.4347, "step": 3186 }, { "epoch": 0.64, "learning_rate": 0.0002966615932998994, "loss": 1.4843, "step": 3187 }, { "epoch": 0.64, "learning_rate": 0.00029636248418878796, "loss": 1.441, "step": 3188 }, { "epoch": 0.64, "learning_rate": 0.000296063462418661, "loss": 1.4391, "step": 3189 }, { "epoch": 0.64, "learning_rate": 0.00029576452811777034, "loss": 1.4399, "step": 3190 }, { "epoch": 0.65, "learning_rate": 0.00029546568141433004, "loss": 1.5057, "step": 3191 }, { "epoch": 0.65, "learning_rate": 0.0002951669224365169, "loss": 1.4633, "step": 3192 }, { "epoch": 0.65, "learning_rate": 0.0002948682513124697, "loss": 1.4525, "step": 3193 }, { "epoch": 0.65, "learning_rate": 0.0002945696681702901, "loss": 1.4369, "step": 3194 }, { "epoch": 0.65, "learning_rate": 0.0002942711731380411, "loss": 1.4269, "step": 3195 }, { "epoch": 0.65, "learning_rate": 0.0002939727663437488, "loss": 1.4731, "step": 3196 }, { "epoch": 0.65, "learning_rate": 0.0002936744479154011, "loss": 1.4827, "step": 3197 }, { "epoch": 0.65, "learning_rate": 0.00029337621798094805, "loss": 1.4523, "step": 3198 }, { "epoch": 0.65, "learning_rate": 0.00029307807666830165, "loss": 1.4576, "step": 3199 }, { "epoch": 0.65, "learning_rate": 0.000292780024105336, "loss": 1.44, "step": 3200 }, { "epoch": 0.65, "learning_rate": 0.00029248206041988713, "loss": 1.4691, "step": 3201 }, { "epoch": 0.65, "learning_rate": 0.0002921841857397528, "loss": 1.4535, "step": 3202 }, { "epoch": 0.65, "learning_rate": 0.0002918864001926929, "loss": 1.466, "step": 3203 }, { "epoch": 0.65, "learning_rate": 0.00029158870390642866, "loss": 1.4336, "step": 3204 }, { "epoch": 0.65, "learning_rate": 0.00029129109700864336, "loss": 1.4496, "step": 3205 }, { "epoch": 0.65, "learning_rate": 0.0002909935796269819, "loss": 1.4346, "step": 3206 }, { "epoch": 0.65, "learning_rate": 0.0002906961518890507, "loss": 1.4685, "step": 3207 }, { "epoch": 0.65, "learning_rate": 0.0002903988139224177, "loss": 1.4899, "step": 3208 }, { "epoch": 0.65, "learning_rate": 0.0002901015658546126, "loss": 1.459, "step": 3209 }, { "epoch": 0.65, "learning_rate": 0.00028980440781312616, "loss": 1.4478, "step": 3210 }, { "epoch": 0.65, "learning_rate": 0.0002895073399254108, "loss": 1.4977, "step": 3211 }, { "epoch": 0.65, "learning_rate": 0.0002892103623188803, "loss": 1.4447, "step": 3212 }, { "epoch": 0.65, "learning_rate": 0.0002889134751209092, "loss": 1.4465, "step": 3213 }, { "epoch": 0.65, "learning_rate": 0.000288616678458834, "loss": 1.4785, "step": 3214 }, { "epoch": 0.65, "learning_rate": 0.00028831997245995185, "loss": 1.4747, "step": 3215 }, { "epoch": 0.65, "learning_rate": 0.0002880233572515213, "loss": 1.5056, "step": 3216 }, { "epoch": 0.65, "learning_rate": 0.00028772683296076197, "loss": 1.4518, "step": 3217 }, { "epoch": 0.65, "learning_rate": 0.0002874303997148543, "loss": 1.4243, "step": 3218 }, { "epoch": 0.65, "learning_rate": 0.0002871340576409396, "loss": 1.4342, "step": 3219 }, { "epoch": 0.65, "learning_rate": 0.00028683780686612027, "loss": 1.4319, "step": 3220 }, { "epoch": 0.65, "learning_rate": 0.0002865416475174596, "loss": 1.4771, "step": 3221 }, { "epoch": 0.65, "learning_rate": 0.00028624557972198154, "loss": 1.421, "step": 3222 }, { "epoch": 0.65, "learning_rate": 0.00028594960360667085, "loss": 1.4532, "step": 3223 }, { "epoch": 0.65, "learning_rate": 0.00028565371929847286, "loss": 1.4659, "step": 3224 }, { "epoch": 0.65, "learning_rate": 0.00028535792692429386, "loss": 1.469, "step": 3225 }, { "epoch": 0.65, "learning_rate": 0.0002850622266109999, "loss": 1.4213, "step": 3226 }, { "epoch": 0.65, "learning_rate": 0.0002847666184854185, "loss": 1.4201, "step": 3227 }, { "epoch": 0.65, "learning_rate": 0.0002844711026743371, "loss": 1.3898, "step": 3228 }, { "epoch": 0.65, "learning_rate": 0.0002841756793045036, "loss": 1.4381, "step": 3229 }, { "epoch": 0.65, "learning_rate": 0.00028388034850262647, "loss": 1.4588, "step": 3230 }, { "epoch": 0.65, "learning_rate": 0.00028358511039537447, "loss": 1.4317, "step": 3231 }, { "epoch": 0.65, "learning_rate": 0.00028328996510937603, "loss": 1.4777, "step": 3232 }, { "epoch": 0.65, "learning_rate": 0.0002829949127712205, "loss": 1.4566, "step": 3233 }, { "epoch": 0.65, "learning_rate": 0.00028269995350745694, "loss": 1.4203, "step": 3234 }, { "epoch": 0.65, "learning_rate": 0.00028240508744459465, "loss": 1.485, "step": 3235 }, { "epoch": 0.65, "learning_rate": 0.000282110314709103, "loss": 1.4334, "step": 3236 }, { "epoch": 0.65, "learning_rate": 0.0002818156354274111, "loss": 1.4285, "step": 3237 }, { "epoch": 0.65, "learning_rate": 0.0002815210497259083, "loss": 1.4413, "step": 3238 }, { "epoch": 0.65, "learning_rate": 0.00028122655773094377, "loss": 1.4364, "step": 3239 }, { "epoch": 0.66, "learning_rate": 0.0002809321595688259, "loss": 1.4782, "step": 3240 }, { "epoch": 0.66, "learning_rate": 0.00028063785536582356, "loss": 1.4395, "step": 3241 }, { "epoch": 0.66, "learning_rate": 0.0002803436452481651, "loss": 1.4693, "step": 3242 }, { "epoch": 0.66, "learning_rate": 0.00028004952934203834, "loss": 1.4611, "step": 3243 }, { "epoch": 0.66, "learning_rate": 0.00027975550777359104, "loss": 1.432, "step": 3244 }, { "epoch": 0.66, "learning_rate": 0.00027946158066893004, "loss": 1.4354, "step": 3245 }, { "epoch": 0.66, "learning_rate": 0.000279167748154122, "loss": 1.474, "step": 3246 }, { "epoch": 0.66, "learning_rate": 0.00027887401035519313, "loss": 1.4873, "step": 3247 }, { "epoch": 0.66, "learning_rate": 0.0002785803673981284, "loss": 1.4422, "step": 3248 }, { "epoch": 0.66, "learning_rate": 0.00027828681940887304, "loss": 1.422, "step": 3249 }, { "epoch": 0.66, "learning_rate": 0.00027799336651333046, "loss": 1.3836, "step": 3250 }, { "epoch": 0.66, "learning_rate": 0.0002777000088373641, "loss": 1.4327, "step": 3251 }, { "epoch": 0.66, "learning_rate": 0.0002774067465067962, "loss": 1.4602, "step": 3252 }, { "epoch": 0.66, "learning_rate": 0.00027711357964740836, "loss": 1.4562, "step": 3253 }, { "epoch": 0.66, "learning_rate": 0.00027682050838494086, "loss": 1.4699, "step": 3254 }, { "epoch": 0.66, "learning_rate": 0.00027652753284509367, "loss": 1.4598, "step": 3255 }, { "epoch": 0.66, "learning_rate": 0.0002762346531535246, "loss": 1.4157, "step": 3256 }, { "epoch": 0.66, "learning_rate": 0.00027594186943585126, "loss": 1.4759, "step": 3257 }, { "epoch": 0.66, "learning_rate": 0.0002756491818176498, "loss": 1.4592, "step": 3258 }, { "epoch": 0.66, "learning_rate": 0.0002753565904244552, "loss": 1.4746, "step": 3259 }, { "epoch": 0.66, "learning_rate": 0.00027506409538176113, "loss": 1.4866, "step": 3260 }, { "epoch": 0.66, "learning_rate": 0.0002747716968150201, "loss": 1.4393, "step": 3261 }, { "epoch": 0.66, "learning_rate": 0.00027447939484964267, "loss": 1.4781, "step": 3262 }, { "epoch": 0.66, "learning_rate": 0.0002741871896109986, "loss": 1.451, "step": 3263 }, { "epoch": 0.66, "learning_rate": 0.0002738950812244161, "loss": 1.4238, "step": 3264 }, { "epoch": 0.66, "learning_rate": 0.00027360306981518144, "loss": 1.4552, "step": 3265 }, { "epoch": 0.66, "learning_rate": 0.0002733111555085397, "loss": 1.473, "step": 3266 }, { "epoch": 0.66, "learning_rate": 0.00027301933842969415, "loss": 1.439, "step": 3267 }, { "epoch": 0.66, "learning_rate": 0.00027272761870380624, "loss": 1.4364, "step": 3268 }, { "epoch": 0.66, "learning_rate": 0.00027243599645599575, "loss": 1.4539, "step": 3269 }, { "epoch": 0.66, "learning_rate": 0.00027214447181134085, "loss": 1.4804, "step": 3270 }, { "epoch": 0.66, "learning_rate": 0.0002718530448948775, "loss": 1.4502, "step": 3271 }, { "epoch": 0.66, "learning_rate": 0.0002715617158316002, "loss": 1.432, "step": 3272 }, { "epoch": 0.66, "learning_rate": 0.0002712704847464609, "loss": 1.4719, "step": 3273 }, { "epoch": 0.66, "learning_rate": 0.00027097935176437, "loss": 1.4244, "step": 3274 }, { "epoch": 0.66, "learning_rate": 0.0002706883170101957, "loss": 1.4562, "step": 3275 }, { "epoch": 0.66, "learning_rate": 0.0002703973806087638, "loss": 1.4443, "step": 3276 }, { "epoch": 0.66, "learning_rate": 0.00027010654268485847, "loss": 1.4457, "step": 3277 }, { "epoch": 0.66, "learning_rate": 0.00026981580336322107, "loss": 1.4375, "step": 3278 }, { "epoch": 0.66, "learning_rate": 0.0002695251627685512, "loss": 1.4681, "step": 3279 }, { "epoch": 0.66, "learning_rate": 0.0002692346210255055, "loss": 1.4649, "step": 3280 }, { "epoch": 0.66, "learning_rate": 0.00026894417825869857, "loss": 1.4606, "step": 3281 }, { "epoch": 0.66, "learning_rate": 0.00026865383459270266, "loss": 1.4423, "step": 3282 }, { "epoch": 0.66, "learning_rate": 0.0002683635901520474, "loss": 1.4286, "step": 3283 }, { "epoch": 0.66, "learning_rate": 0.0002680734450612197, "loss": 1.4499, "step": 3284 }, { "epoch": 0.66, "learning_rate": 0.0002677833994446642, "loss": 1.4456, "step": 3285 }, { "epoch": 0.66, "learning_rate": 0.00026749345342678266, "loss": 1.4689, "step": 3286 }, { "epoch": 0.66, "learning_rate": 0.00026720360713193396, "loss": 1.4548, "step": 3287 }, { "epoch": 0.66, "learning_rate": 0.0002669138606844345, "loss": 1.4571, "step": 3288 }, { "epoch": 0.66, "learning_rate": 0.00026662421420855777, "loss": 1.3784, "step": 3289 }, { "epoch": 0.67, "learning_rate": 0.00026633466782853435, "loss": 1.4261, "step": 3290 }, { "epoch": 0.67, "learning_rate": 0.00026604522166855175, "loss": 1.5217, "step": 3291 }, { "epoch": 0.67, "learning_rate": 0.0002657558758527551, "loss": 1.4627, "step": 3292 }, { "epoch": 0.67, "learning_rate": 0.00026546663050524546, "loss": 1.432, "step": 3293 }, { "epoch": 0.67, "learning_rate": 0.0002651774857500816, "loss": 1.4819, "step": 3294 }, { "epoch": 0.67, "learning_rate": 0.000264888441711279, "loss": 1.4592, "step": 3295 }, { "epoch": 0.67, "learning_rate": 0.0002645994985128098, "loss": 1.4289, "step": 3296 }, { "epoch": 0.67, "learning_rate": 0.0002643106562786029, "loss": 1.4329, "step": 3297 }, { "epoch": 0.67, "learning_rate": 0.0002640219151325442, "loss": 1.4367, "step": 3298 }, { "epoch": 0.67, "learning_rate": 0.0002637332751984756, "loss": 1.4537, "step": 3299 }, { "epoch": 0.67, "learning_rate": 0.0002634447366001962, "loss": 1.4377, "step": 3300 }, { "epoch": 0.67, "learning_rate": 0.0002631562994614613, "loss": 1.4444, "step": 3301 }, { "epoch": 0.67, "learning_rate": 0.0002628679639059829, "loss": 1.4325, "step": 3302 }, { "epoch": 0.67, "learning_rate": 0.00026257973005742937, "loss": 1.4916, "step": 3303 }, { "epoch": 0.67, "learning_rate": 0.00026229159803942527, "loss": 1.4686, "step": 3304 }, { "epoch": 0.67, "learning_rate": 0.00026200356797555175, "loss": 1.4777, "step": 3305 }, { "epoch": 0.67, "learning_rate": 0.00026171563998934606, "loss": 1.4238, "step": 3306 }, { "epoch": 0.67, "learning_rate": 0.00026142781420430175, "loss": 1.4482, "step": 3307 }, { "epoch": 0.67, "learning_rate": 0.00026114009074386846, "loss": 1.4561, "step": 3308 }, { "epoch": 0.67, "learning_rate": 0.00026085246973145203, "loss": 1.4228, "step": 3309 }, { "epoch": 0.67, "learning_rate": 0.00026056495129041457, "loss": 1.4062, "step": 3310 }, { "epoch": 0.67, "learning_rate": 0.0002602775355440734, "loss": 1.4344, "step": 3311 }, { "epoch": 0.67, "learning_rate": 0.0002599902226157027, "loss": 1.4236, "step": 3312 }, { "epoch": 0.67, "learning_rate": 0.000259703012628532, "loss": 1.4265, "step": 3313 }, { "epoch": 0.67, "learning_rate": 0.00025941590570574714, "loss": 1.4328, "step": 3314 }, { "epoch": 0.67, "learning_rate": 0.0002591289019704892, "loss": 1.4544, "step": 3315 }, { "epoch": 0.67, "learning_rate": 0.00025884200154585563, "loss": 1.443, "step": 3316 }, { "epoch": 0.67, "learning_rate": 0.00025855520455489884, "loss": 1.4557, "step": 3317 }, { "epoch": 0.67, "learning_rate": 0.00025826851112062746, "loss": 1.4543, "step": 3318 }, { "epoch": 0.67, "learning_rate": 0.0002579819213660054, "loss": 1.4553, "step": 3319 }, { "epoch": 0.67, "learning_rate": 0.00025769543541395224, "loss": 1.4585, "step": 3320 }, { "epoch": 0.67, "learning_rate": 0.0002574090533873431, "loss": 1.4446, "step": 3321 }, { "epoch": 0.67, "learning_rate": 0.00025712277540900863, "loss": 1.4193, "step": 3322 }, { "epoch": 0.67, "learning_rate": 0.0002568366016017342, "loss": 1.4586, "step": 3323 }, { "epoch": 0.67, "learning_rate": 0.00025655053208826107, "loss": 1.4282, "step": 3324 }, { "epoch": 0.67, "learning_rate": 0.00025626456699128587, "loss": 1.4599, "step": 3325 }, { "epoch": 0.67, "learning_rate": 0.00025597870643346, "loss": 1.4661, "step": 3326 }, { "epoch": 0.67, "learning_rate": 0.0002556929505373904, "loss": 1.4658, "step": 3327 }, { "epoch": 0.67, "learning_rate": 0.0002554072994256391, "loss": 1.446, "step": 3328 }, { "epoch": 0.67, "learning_rate": 0.00025512175322072274, "loss": 1.4602, "step": 3329 }, { "epoch": 0.67, "learning_rate": 0.0002548363120451134, "loss": 1.4222, "step": 3330 }, { "epoch": 0.67, "learning_rate": 0.0002545509760212381, "loss": 1.4577, "step": 3331 }, { "epoch": 0.67, "learning_rate": 0.0002542657452714785, "loss": 1.42, "step": 3332 }, { "epoch": 0.67, "learning_rate": 0.00025398061991817143, "loss": 1.4592, "step": 3333 }, { "epoch": 0.67, "learning_rate": 0.00025369560008360825, "loss": 1.4961, "step": 3334 }, { "epoch": 0.67, "learning_rate": 0.0002534106858900351, "loss": 1.445, "step": 3335 }, { "epoch": 0.67, "learning_rate": 0.0002531258774596531, "loss": 1.4337, "step": 3336 }, { "epoch": 0.67, "learning_rate": 0.0002528411749146176, "loss": 1.4098, "step": 3337 }, { "epoch": 0.67, "learning_rate": 0.0002525565783770387, "loss": 1.4437, "step": 3338 }, { "epoch": 0.68, "learning_rate": 0.0002522720879689811, "loss": 1.496, "step": 3339 }, { "epoch": 0.68, "learning_rate": 0.00025198770381246416, "loss": 1.4451, "step": 3340 }, { "epoch": 0.68, "learning_rate": 0.000251703426029461, "loss": 1.4614, "step": 3341 }, { "epoch": 0.68, "learning_rate": 0.00025141925474189973, "loss": 1.4743, "step": 3342 }, { "epoch": 0.68, "learning_rate": 0.00025113519007166277, "loss": 1.4431, "step": 3343 }, { "epoch": 0.68, "learning_rate": 0.0002508512321405864, "loss": 1.4262, "step": 3344 }, { "epoch": 0.68, "learning_rate": 0.0002505673810704615, "loss": 1.4416, "step": 3345 }, { "epoch": 0.68, "learning_rate": 0.00025028363698303323, "loss": 1.4573, "step": 3346 }, { "epoch": 0.68, "learning_rate": 0.0002500000000000001, "loss": 1.484, "step": 3347 }, { "epoch": 0.68, "learning_rate": 0.00024971647024301546, "loss": 1.4398, "step": 3348 }, { "epoch": 0.68, "learning_rate": 0.00024943304783368647, "loss": 1.4591, "step": 3349 }, { "epoch": 0.68, "learning_rate": 0.0002491497328935741, "loss": 1.4561, "step": 3350 }, { "epoch": 0.68, "learning_rate": 0.0002488665255441934, "loss": 1.4375, "step": 3351 }, { "epoch": 0.68, "learning_rate": 0.00024858342590701303, "loss": 1.4051, "step": 3352 }, { "epoch": 0.68, "learning_rate": 0.000248300434103456, "loss": 1.4414, "step": 3353 }, { "epoch": 0.68, "learning_rate": 0.00024801755025489813, "loss": 1.4396, "step": 3354 }, { "epoch": 0.68, "learning_rate": 0.00024773477448266983, "loss": 1.4566, "step": 3355 }, { "epoch": 0.68, "learning_rate": 0.00024745210690805473, "loss": 1.4743, "step": 3356 }, { "epoch": 0.68, "learning_rate": 0.00024716954765229015, "loss": 1.4517, "step": 3357 }, { "epoch": 0.68, "learning_rate": 0.000246887096836567, "loss": 1.4749, "step": 3358 }, { "epoch": 0.68, "learning_rate": 0.0002466047545820297, "loss": 1.426, "step": 3359 }, { "epoch": 0.68, "learning_rate": 0.0002463225210097756, "loss": 1.4571, "step": 3360 }, { "epoch": 0.68, "learning_rate": 0.0002460403962408563, "loss": 1.4777, "step": 3361 }, { "epoch": 0.68, "learning_rate": 0.0002457583803962761, "loss": 1.4142, "step": 3362 }, { "epoch": 0.68, "learning_rate": 0.0002454764735969929, "loss": 1.4539, "step": 3363 }, { "epoch": 0.68, "learning_rate": 0.00024519467596391756, "loss": 1.4534, "step": 3364 }, { "epoch": 0.68, "learning_rate": 0.0002449129876179144, "loss": 1.4384, "step": 3365 }, { "epoch": 0.68, "learning_rate": 0.00024463140867980054, "loss": 1.4267, "step": 3366 }, { "epoch": 0.68, "learning_rate": 0.00024434993927034666, "loss": 1.4826, "step": 3367 }, { "epoch": 0.68, "learning_rate": 0.00024406857951027596, "loss": 1.4699, "step": 3368 }, { "epoch": 0.68, "learning_rate": 0.00024378732952026484, "loss": 1.4342, "step": 3369 }, { "epoch": 0.68, "learning_rate": 0.00024350618942094266, "loss": 1.4328, "step": 3370 }, { "epoch": 0.68, "learning_rate": 0.00024322515933289173, "loss": 1.4528, "step": 3371 }, { "epoch": 0.68, "learning_rate": 0.00024294423937664672, "loss": 1.45, "step": 3372 }, { "epoch": 0.68, "learning_rate": 0.0002426634296726955, "loss": 1.4746, "step": 3373 }, { "epoch": 0.68, "learning_rate": 0.00024238273034147862, "loss": 1.4717, "step": 3374 }, { "epoch": 0.68, "learning_rate": 0.00024210214150338904, "loss": 1.4103, "step": 3375 }, { "epoch": 0.68, "learning_rate": 0.00024182166327877265, "loss": 1.4706, "step": 3376 }, { "epoch": 0.68, "learning_rate": 0.00024154129578792783, "loss": 1.4277, "step": 3377 }, { "epoch": 0.68, "learning_rate": 0.00024126103915110504, "loss": 1.438, "step": 3378 }, { "epoch": 0.68, "learning_rate": 0.00024098089348850767, "loss": 1.4722, "step": 3379 }, { "epoch": 0.68, "learning_rate": 0.00024070085892029141, "loss": 1.4324, "step": 3380 }, { "epoch": 0.68, "learning_rate": 0.00024042093556656425, "loss": 1.4406, "step": 3381 }, { "epoch": 0.68, "learning_rate": 0.00024014112354738654, "loss": 1.4615, "step": 3382 }, { "epoch": 0.68, "learning_rate": 0.00023986142298277092, "loss": 1.4605, "step": 3383 }, { "epoch": 0.68, "learning_rate": 0.0002395818339926818, "loss": 1.4705, "step": 3384 }, { "epoch": 0.68, "learning_rate": 0.00023930235669703627, "loss": 1.4756, "step": 3385 }, { "epoch": 0.68, "learning_rate": 0.00023902299121570332, "loss": 1.4056, "step": 3386 }, { "epoch": 0.68, "learning_rate": 0.000238743737668504, "loss": 1.4455, "step": 3387 }, { "epoch": 0.68, "learning_rate": 0.00023846459617521128, "loss": 1.4062, "step": 3388 }, { "epoch": 0.69, "learning_rate": 0.00023818556685555026, "loss": 1.4491, "step": 3389 }, { "epoch": 0.69, "learning_rate": 0.00023790664982919753, "loss": 1.4281, "step": 3390 }, { "epoch": 0.69, "learning_rate": 0.00023762784521578185, "loss": 1.4612, "step": 3391 }, { "epoch": 0.69, "learning_rate": 0.00023734915313488377, "loss": 1.4316, "step": 3392 }, { "epoch": 0.69, "learning_rate": 0.00023707057370603546, "loss": 1.445, "step": 3393 }, { "epoch": 0.69, "learning_rate": 0.0002367921070487208, "loss": 1.4259, "step": 3394 }, { "epoch": 0.69, "learning_rate": 0.0002365137532823753, "loss": 1.4728, "step": 3395 }, { "epoch": 0.69, "learning_rate": 0.00023623551252638608, "loss": 1.4588, "step": 3396 }, { "epoch": 0.69, "learning_rate": 0.00023595738490009177, "loss": 1.4582, "step": 3397 }, { "epoch": 0.69, "learning_rate": 0.00023567937052278244, "loss": 1.4307, "step": 3398 }, { "epoch": 0.69, "learning_rate": 0.00023540146951369967, "loss": 1.4877, "step": 3399 }, { "epoch": 0.69, "learning_rate": 0.0002351236819920363, "loss": 1.438, "step": 3400 }, { "epoch": 0.69, "learning_rate": 0.00023484600807693686, "loss": 1.4706, "step": 3401 }, { "epoch": 0.69, "learning_rate": 0.00023456844788749637, "loss": 1.453, "step": 3402 }, { "epoch": 0.69, "learning_rate": 0.0002342910015427618, "loss": 1.4528, "step": 3403 }, { "epoch": 0.69, "learning_rate": 0.000234013669161731, "loss": 1.5048, "step": 3404 }, { "epoch": 0.69, "learning_rate": 0.0002337364508633531, "loss": 1.446, "step": 3405 }, { "epoch": 0.69, "learning_rate": 0.00023345934676652808, "loss": 1.4577, "step": 3406 }, { "epoch": 0.69, "learning_rate": 0.0002331823569901073, "loss": 1.4598, "step": 3407 }, { "epoch": 0.69, "learning_rate": 0.00023290548165289245, "loss": 1.4464, "step": 3408 }, { "epoch": 0.69, "learning_rate": 0.00023262872087363669, "loss": 1.4919, "step": 3409 }, { "epoch": 0.69, "learning_rate": 0.00023235207477104392, "loss": 1.4548, "step": 3410 }, { "epoch": 0.69, "learning_rate": 0.00023207554346376874, "loss": 1.4315, "step": 3411 }, { "epoch": 0.69, "learning_rate": 0.00023179912707041667, "loss": 1.4712, "step": 3412 }, { "epoch": 0.69, "learning_rate": 0.0002315228257095438, "loss": 1.4305, "step": 3413 }, { "epoch": 0.69, "learning_rate": 0.0002312466394996572, "loss": 1.4278, "step": 3414 }, { "epoch": 0.69, "learning_rate": 0.00023097056855921383, "loss": 1.4415, "step": 3415 }, { "epoch": 0.69, "learning_rate": 0.00023069461300662193, "loss": 1.4184, "step": 3416 }, { "epoch": 0.69, "learning_rate": 0.00023041877296023995, "loss": 1.4704, "step": 3417 }, { "epoch": 0.69, "learning_rate": 0.00023014304853837686, "loss": 1.4591, "step": 3418 }, { "epoch": 0.69, "learning_rate": 0.00022986743985929208, "loss": 1.4872, "step": 3419 }, { "epoch": 0.69, "learning_rate": 0.0002295919470411954, "loss": 1.403, "step": 3420 }, { "epoch": 0.69, "learning_rate": 0.00022931657020224656, "loss": 1.4544, "step": 3421 }, { "epoch": 0.69, "learning_rate": 0.00022904130946055595, "loss": 1.4499, "step": 3422 }, { "epoch": 0.69, "learning_rate": 0.00022876616493418416, "loss": 1.4524, "step": 3423 }, { "epoch": 0.69, "learning_rate": 0.00022849113674114176, "loss": 1.4344, "step": 3424 }, { "epoch": 0.69, "learning_rate": 0.00022821622499938948, "loss": 1.4805, "step": 3425 }, { "epoch": 0.69, "learning_rate": 0.0002279414298268382, "loss": 1.4557, "step": 3426 }, { "epoch": 0.69, "learning_rate": 0.00022766675134134863, "loss": 1.4411, "step": 3427 }, { "epoch": 0.69, "learning_rate": 0.00022739218966073154, "loss": 1.4467, "step": 3428 }, { "epoch": 0.69, "learning_rate": 0.00022711774490274768, "loss": 1.425, "step": 3429 }, { "epoch": 0.69, "learning_rate": 0.00022684341718510743, "loss": 1.4077, "step": 3430 }, { "epoch": 0.69, "learning_rate": 0.00022656920662547121, "loss": 1.4856, "step": 3431 }, { "epoch": 0.69, "learning_rate": 0.0002262951133414492, "loss": 1.4669, "step": 3432 }, { "epoch": 0.69, "learning_rate": 0.00022602113745060076, "loss": 1.459, "step": 3433 }, { "epoch": 0.69, "learning_rate": 0.00022574727907043556, "loss": 1.4255, "step": 3434 }, { "epoch": 0.69, "learning_rate": 0.00022547353831841262, "loss": 1.4147, "step": 3435 }, { "epoch": 0.69, "learning_rate": 0.0002251999153119404, "loss": 1.4276, "step": 3436 }, { "epoch": 0.69, "learning_rate": 0.000224926410168377, "loss": 1.4306, "step": 3437 }, { "epoch": 0.7, "learning_rate": 0.0002246530230050301, "loss": 1.4869, "step": 3438 }, { "epoch": 0.7, "learning_rate": 0.00022437975393915628, "loss": 1.4487, "step": 3439 }, { "epoch": 0.7, "learning_rate": 0.00022410660308796193, "loss": 1.4473, "step": 3440 }, { "epoch": 0.7, "learning_rate": 0.0002238335705686026, "loss": 1.4608, "step": 3441 }, { "epoch": 0.7, "learning_rate": 0.00022356065649818308, "loss": 1.4432, "step": 3442 }, { "epoch": 0.7, "learning_rate": 0.00022328786099375737, "loss": 1.4669, "step": 3443 }, { "epoch": 0.7, "learning_rate": 0.00022301518417232875, "loss": 1.4762, "step": 3444 }, { "epoch": 0.7, "learning_rate": 0.00022274262615084916, "loss": 1.4134, "step": 3445 }, { "epoch": 0.7, "learning_rate": 0.00022247018704622001, "loss": 1.4403, "step": 3446 }, { "epoch": 0.7, "learning_rate": 0.0002221978669752916, "loss": 1.4534, "step": 3447 }, { "epoch": 0.7, "learning_rate": 0.00022192566605486313, "loss": 1.424, "step": 3448 }, { "epoch": 0.7, "learning_rate": 0.0002216535844016827, "loss": 1.4717, "step": 3449 }, { "epoch": 0.7, "learning_rate": 0.00022138162213244751, "loss": 1.4626, "step": 3450 }, { "epoch": 0.7, "learning_rate": 0.0002211097793638029, "loss": 1.4846, "step": 3451 }, { "epoch": 0.7, "learning_rate": 0.0002208380562123436, "loss": 1.4523, "step": 3452 }, { "epoch": 0.7, "learning_rate": 0.00022056645279461273, "loss": 1.4262, "step": 3453 }, { "epoch": 0.7, "learning_rate": 0.00022029496922710222, "loss": 1.4349, "step": 3454 }, { "epoch": 0.7, "learning_rate": 0.00022002360562625256, "loss": 1.4449, "step": 3455 }, { "epoch": 0.7, "learning_rate": 0.00021975236210845258, "loss": 1.4347, "step": 3456 }, { "epoch": 0.7, "learning_rate": 0.00021948123879003985, "loss": 1.4183, "step": 3457 }, { "epoch": 0.7, "learning_rate": 0.00021921023578730026, "loss": 1.4395, "step": 3458 }, { "epoch": 0.7, "learning_rate": 0.00021893935321646825, "loss": 1.4682, "step": 3459 }, { "epoch": 0.7, "learning_rate": 0.00021866859119372634, "loss": 1.4787, "step": 3460 }, { "epoch": 0.7, "learning_rate": 0.00021839794983520555, "loss": 1.4408, "step": 3461 }, { "epoch": 0.7, "learning_rate": 0.0002181274292569853, "loss": 1.4263, "step": 3462 }, { "epoch": 0.7, "learning_rate": 0.00021785702957509268, "loss": 1.4629, "step": 3463 }, { "epoch": 0.7, "learning_rate": 0.00021758675090550328, "loss": 1.4594, "step": 3464 }, { "epoch": 0.7, "learning_rate": 0.0002173165933641409, "loss": 1.4509, "step": 3465 }, { "epoch": 0.7, "learning_rate": 0.0002170465570668772, "loss": 1.451, "step": 3466 }, { "epoch": 0.7, "learning_rate": 0.00021677664212953185, "loss": 1.4845, "step": 3467 }, { "epoch": 0.7, "learning_rate": 0.00021650684866787273, "loss": 1.4688, "step": 3468 }, { "epoch": 0.7, "learning_rate": 0.00021623717679761494, "loss": 1.4793, "step": 3469 }, { "epoch": 0.7, "learning_rate": 0.00021596762663442215, "loss": 1.4703, "step": 3470 }, { "epoch": 0.7, "learning_rate": 0.00021569819829390553, "loss": 1.4673, "step": 3471 }, { "epoch": 0.7, "learning_rate": 0.00021542889189162402, "loss": 1.443, "step": 3472 }, { "epoch": 0.7, "learning_rate": 0.00021515970754308424, "loss": 1.4531, "step": 3473 }, { "epoch": 0.7, "learning_rate": 0.00021489064536374064, "loss": 1.4051, "step": 3474 }, { "epoch": 0.7, "learning_rate": 0.0002146217054689949, "loss": 1.4599, "step": 3475 }, { "epoch": 0.7, "learning_rate": 0.00021435288797419643, "loss": 1.482, "step": 3476 }, { "epoch": 0.7, "learning_rate": 0.00021408419299464242, "loss": 1.4415, "step": 3477 }, { "epoch": 0.7, "learning_rate": 0.00021381562064557707, "loss": 1.4464, "step": 3478 }, { "epoch": 0.7, "learning_rate": 0.00021354717104219234, "loss": 1.4276, "step": 3479 }, { "epoch": 0.7, "learning_rate": 0.0002132788442996273, "loss": 1.4557, "step": 3480 }, { "epoch": 0.7, "learning_rate": 0.0002130106405329686, "loss": 1.4707, "step": 3481 }, { "epoch": 0.7, "learning_rate": 0.0002127425598572496, "loss": 1.4424, "step": 3482 }, { "epoch": 0.7, "learning_rate": 0.00021247460238745148, "loss": 1.4431, "step": 3483 }, { "epoch": 0.7, "learning_rate": 0.00021220676823850227, "loss": 1.4333, "step": 3484 }, { "epoch": 0.7, "learning_rate": 0.0002119390575252771, "loss": 1.418, "step": 3485 }, { "epoch": 0.7, "learning_rate": 0.00021167147036259832, "loss": 1.4381, "step": 3486 }, { "epoch": 0.7, "learning_rate": 0.00021140400686523508, "loss": 1.4367, "step": 3487 }, { "epoch": 0.71, "learning_rate": 0.00021113666714790374, "loss": 1.4248, "step": 3488 }, { "epoch": 0.71, "learning_rate": 0.0002108694513252673, "loss": 1.4089, "step": 3489 }, { "epoch": 0.71, "learning_rate": 0.0002106023595119358, "loss": 1.4954, "step": 3490 }, { "epoch": 0.71, "learning_rate": 0.00021033539182246602, "loss": 1.4416, "step": 3491 }, { "epoch": 0.71, "learning_rate": 0.00021006854837136153, "loss": 1.4076, "step": 3492 }, { "epoch": 0.71, "learning_rate": 0.00020980182927307278, "loss": 1.4601, "step": 3493 }, { "epoch": 0.71, "learning_rate": 0.00020953523464199643, "loss": 1.4299, "step": 3494 }, { "epoch": 0.71, "learning_rate": 0.00020926876459247606, "loss": 1.428, "step": 3495 }, { "epoch": 0.71, "learning_rate": 0.00020900241923880187, "loss": 1.4507, "step": 3496 }, { "epoch": 0.71, "learning_rate": 0.00020873619869521054, "loss": 1.4631, "step": 3497 }, { "epoch": 0.71, "learning_rate": 0.00020847010307588516, "loss": 1.4242, "step": 3498 }, { "epoch": 0.71, "learning_rate": 0.00020820413249495536, "loss": 1.4462, "step": 3499 }, { "epoch": 0.71, "learning_rate": 0.00020793828706649675, "loss": 1.4443, "step": 3500 }, { "epoch": 0.71, "learning_rate": 0.00020767256690453173, "loss": 1.4205, "step": 3501 }, { "epoch": 0.71, "learning_rate": 0.0002074069721230288, "loss": 1.4249, "step": 3502 }, { "epoch": 0.71, "learning_rate": 0.0002071415028359026, "loss": 1.4359, "step": 3503 }, { "epoch": 0.71, "learning_rate": 0.00020687615915701408, "loss": 1.4735, "step": 3504 }, { "epoch": 0.71, "learning_rate": 0.00020661094120017043, "loss": 1.4302, "step": 3505 }, { "epoch": 0.71, "learning_rate": 0.0002063458490791244, "loss": 1.4127, "step": 3506 }, { "epoch": 0.71, "learning_rate": 0.00020608088290757526, "loss": 1.4763, "step": 3507 }, { "epoch": 0.71, "learning_rate": 0.00020581604279916815, "loss": 1.441, "step": 3508 }, { "epoch": 0.71, "learning_rate": 0.00020555132886749405, "loss": 1.4694, "step": 3509 }, { "epoch": 0.71, "learning_rate": 0.00020528674122608997, "loss": 1.4675, "step": 3510 }, { "epoch": 0.71, "learning_rate": 0.0002050222799884387, "loss": 1.4337, "step": 3511 }, { "epoch": 0.71, "learning_rate": 0.00020475794526796855, "loss": 1.4536, "step": 3512 }, { "epoch": 0.71, "learning_rate": 0.00020449373717805385, "loss": 1.4976, "step": 3513 }, { "epoch": 0.71, "learning_rate": 0.0002042296558320147, "loss": 1.3969, "step": 3514 }, { "epoch": 0.71, "learning_rate": 0.00020396570134311655, "loss": 1.4343, "step": 3515 }, { "epoch": 0.71, "learning_rate": 0.00020370187382457068, "loss": 1.4665, "step": 3516 }, { "epoch": 0.71, "learning_rate": 0.00020343817338953376, "loss": 1.4501, "step": 3517 }, { "epoch": 0.71, "learning_rate": 0.00020317460015110807, "loss": 1.4438, "step": 3518 }, { "epoch": 0.71, "learning_rate": 0.00020291115422234123, "loss": 1.4227, "step": 3519 }, { "epoch": 0.71, "learning_rate": 0.0002026478357162263, "loss": 1.4607, "step": 3520 }, { "epoch": 0.71, "learning_rate": 0.0002023846447457018, "loss": 1.427, "step": 3521 }, { "epoch": 0.71, "learning_rate": 0.0002021215814236513, "loss": 1.4495, "step": 3522 }, { "epoch": 0.71, "learning_rate": 0.00020185864586290398, "loss": 1.4279, "step": 3523 }, { "epoch": 0.71, "learning_rate": 0.00020159583817623366, "loss": 1.4357, "step": 3524 }, { "epoch": 0.71, "learning_rate": 0.0002013331584763599, "loss": 1.4589, "step": 3525 }, { "epoch": 0.71, "learning_rate": 0.0002010706068759471, "loss": 1.4274, "step": 3526 }, { "epoch": 0.71, "learning_rate": 0.0002008081834876046, "loss": 1.4445, "step": 3527 }, { "epoch": 0.71, "learning_rate": 0.00020054588842388705, "loss": 1.4386, "step": 3528 }, { "epoch": 0.71, "learning_rate": 0.00020028372179729405, "loss": 1.4612, "step": 3529 }, { "epoch": 0.71, "learning_rate": 0.00020002168372026957, "loss": 1.4306, "step": 3530 }, { "epoch": 0.71, "learning_rate": 0.00019975977430520308, "loss": 1.4351, "step": 3531 }, { "epoch": 0.71, "learning_rate": 0.00019949799366442855, "loss": 1.485, "step": 3532 }, { "epoch": 0.71, "learning_rate": 0.00019923634191022484, "loss": 1.3974, "step": 3533 }, { "epoch": 0.71, "learning_rate": 0.00019897481915481547, "loss": 1.4717, "step": 3534 }, { "epoch": 0.71, "learning_rate": 0.00019871342551036885, "loss": 1.4445, "step": 3535 }, { "epoch": 0.71, "learning_rate": 0.00019845216108899745, "loss": 1.4828, "step": 3536 }, { "epoch": 0.72, "learning_rate": 0.0001981910260027588, "loss": 1.4448, "step": 3537 }, { "epoch": 0.72, "learning_rate": 0.00019793002036365493, "loss": 1.4431, "step": 3538 }, { "epoch": 0.72, "learning_rate": 0.00019766914428363214, "loss": 1.438, "step": 3539 }, { "epoch": 0.72, "learning_rate": 0.00019740839787458136, "loss": 1.4538, "step": 3540 }, { "epoch": 0.72, "learning_rate": 0.00019714778124833775, "loss": 1.4566, "step": 3541 }, { "epoch": 0.72, "learning_rate": 0.00019688729451668114, "loss": 1.4557, "step": 3542 }, { "epoch": 0.72, "learning_rate": 0.00019662693779133494, "loss": 1.4632, "step": 3543 }, { "epoch": 0.72, "learning_rate": 0.00019636671118396755, "loss": 1.453, "step": 3544 }, { "epoch": 0.72, "learning_rate": 0.00019610661480619107, "loss": 1.465, "step": 3545 }, { "epoch": 0.72, "learning_rate": 0.00019584664876956203, "loss": 1.4582, "step": 3546 }, { "epoch": 0.72, "learning_rate": 0.00019558681318558097, "loss": 1.4259, "step": 3547 }, { "epoch": 0.72, "learning_rate": 0.00019532710816569238, "loss": 1.4782, "step": 3548 }, { "epoch": 0.72, "learning_rate": 0.00019506753382128494, "loss": 1.4369, "step": 3549 }, { "epoch": 0.72, "learning_rate": 0.00019480809026369112, "loss": 1.4302, "step": 3550 }, { "epoch": 0.72, "learning_rate": 0.00019454877760418732, "loss": 1.4707, "step": 3551 }, { "epoch": 0.72, "learning_rate": 0.00019428959595399386, "loss": 1.4812, "step": 3552 }, { "epoch": 0.72, "learning_rate": 0.00019403054542427485, "loss": 1.4574, "step": 3553 }, { "epoch": 0.72, "learning_rate": 0.0001937716261261383, "loss": 1.4164, "step": 3554 }, { "epoch": 0.72, "learning_rate": 0.00019351283817063548, "loss": 1.4937, "step": 3555 }, { "epoch": 0.72, "learning_rate": 0.00019325418166876168, "loss": 1.4582, "step": 3556 }, { "epoch": 0.72, "learning_rate": 0.00019299565673145592, "loss": 1.4099, "step": 3557 }, { "epoch": 0.72, "learning_rate": 0.00019273726346960053, "loss": 1.4382, "step": 3558 }, { "epoch": 0.72, "learning_rate": 0.00019247900199402147, "loss": 1.4147, "step": 3559 }, { "epoch": 0.72, "learning_rate": 0.00019222087241548835, "loss": 1.4832, "step": 3560 }, { "epoch": 0.72, "learning_rate": 0.00019196287484471376, "loss": 1.4559, "step": 3561 }, { "epoch": 0.72, "learning_rate": 0.00019170500939235397, "loss": 1.4172, "step": 3562 }, { "epoch": 0.72, "learning_rate": 0.0001914472761690087, "loss": 1.438, "step": 3563 }, { "epoch": 0.72, "learning_rate": 0.00019118967528522067, "loss": 1.4201, "step": 3564 }, { "epoch": 0.72, "learning_rate": 0.0001909322068514761, "loss": 1.4484, "step": 3565 }, { "epoch": 0.72, "learning_rate": 0.0001906748709782044, "loss": 1.4358, "step": 3566 }, { "epoch": 0.72, "learning_rate": 0.00019041766777577767, "loss": 1.4677, "step": 3567 }, { "epoch": 0.72, "learning_rate": 0.00019016059735451157, "loss": 1.4579, "step": 3568 }, { "epoch": 0.72, "learning_rate": 0.00018990365982466474, "loss": 1.4506, "step": 3569 }, { "epoch": 0.72, "learning_rate": 0.00018964685529643878, "loss": 1.444, "step": 3570 }, { "epoch": 0.72, "learning_rate": 0.00018939018387997814, "loss": 1.4382, "step": 3571 }, { "epoch": 0.72, "learning_rate": 0.0001891336456853705, "loss": 1.4284, "step": 3572 }, { "epoch": 0.72, "learning_rate": 0.00018887724082264584, "loss": 1.4101, "step": 3573 }, { "epoch": 0.72, "learning_rate": 0.00018862096940177743, "loss": 1.4324, "step": 3574 }, { "epoch": 0.72, "learning_rate": 0.00018836483153268115, "loss": 1.42, "step": 3575 }, { "epoch": 0.72, "learning_rate": 0.00018810882732521561, "loss": 1.4847, "step": 3576 }, { "epoch": 0.72, "learning_rate": 0.00018785295688918208, "loss": 1.4468, "step": 3577 }, { "epoch": 0.72, "learning_rate": 0.00018759722033432448, "loss": 1.4579, "step": 3578 }, { "epoch": 0.72, "learning_rate": 0.00018734161777032933, "loss": 1.4096, "step": 3579 }, { "epoch": 0.72, "learning_rate": 0.00018708614930682555, "loss": 1.4524, "step": 3580 }, { "epoch": 0.72, "learning_rate": 0.00018683081505338467, "loss": 1.4226, "step": 3581 }, { "epoch": 0.72, "learning_rate": 0.00018657561511952064, "loss": 1.4489, "step": 3582 }, { "epoch": 0.72, "learning_rate": 0.0001863205496146898, "loss": 1.4715, "step": 3583 }, { "epoch": 0.72, "learning_rate": 0.00018606561864829098, "loss": 1.4617, "step": 3584 }, { "epoch": 0.72, "learning_rate": 0.00018581082232966474, "loss": 1.4587, "step": 3585 }, { "epoch": 0.72, "learning_rate": 0.0001855561607680945, "loss": 1.4331, "step": 3586 }, { "epoch": 0.73, "learning_rate": 0.0001853016340728057, "loss": 1.4628, "step": 3587 }, { "epoch": 0.73, "learning_rate": 0.00018504724235296588, "loss": 1.3974, "step": 3588 }, { "epoch": 0.73, "learning_rate": 0.00018479298571768472, "loss": 1.4373, "step": 3589 }, { "epoch": 0.73, "learning_rate": 0.00018453886427601408, "loss": 1.4636, "step": 3590 }, { "epoch": 0.73, "learning_rate": 0.00018428487813694743, "loss": 1.4482, "step": 3591 }, { "epoch": 0.73, "learning_rate": 0.00018403102740942068, "loss": 1.4541, "step": 3592 }, { "epoch": 0.73, "learning_rate": 0.0001837773122023114, "loss": 1.4748, "step": 3593 }, { "epoch": 0.73, "learning_rate": 0.00018352373262443917, "loss": 1.4289, "step": 3594 }, { "epoch": 0.73, "learning_rate": 0.0001832702887845653, "loss": 1.419, "step": 3595 }, { "epoch": 0.73, "learning_rate": 0.00018301698079139313, "loss": 1.4323, "step": 3596 }, { "epoch": 0.73, "learning_rate": 0.00018276380875356709, "loss": 1.4202, "step": 3597 }, { "epoch": 0.73, "learning_rate": 0.00018251077277967398, "loss": 1.4405, "step": 3598 }, { "epoch": 0.73, "learning_rate": 0.00018225787297824193, "loss": 1.4284, "step": 3599 }, { "epoch": 0.73, "learning_rate": 0.00018200510945774078, "loss": 1.4416, "step": 3600 }, { "epoch": 0.73, "learning_rate": 0.00018175248232658186, "loss": 1.4138, "step": 3601 }, { "epoch": 0.73, "learning_rate": 0.00018149999169311815, "loss": 1.4123, "step": 3602 }, { "epoch": 0.73, "learning_rate": 0.00018124763766564362, "loss": 1.3733, "step": 3603 }, { "epoch": 0.73, "learning_rate": 0.00018099542035239407, "loss": 1.4183, "step": 3604 }, { "epoch": 0.73, "learning_rate": 0.00018074333986154673, "loss": 1.4351, "step": 3605 }, { "epoch": 0.73, "learning_rate": 0.0001804913963012198, "loss": 1.4333, "step": 3606 }, { "epoch": 0.73, "learning_rate": 0.00018023958977947301, "loss": 1.4623, "step": 3607 }, { "epoch": 0.73, "learning_rate": 0.00017998792040430724, "loss": 1.4291, "step": 3608 }, { "epoch": 0.73, "learning_rate": 0.00017973638828366455, "loss": 1.4063, "step": 3609 }, { "epoch": 0.73, "learning_rate": 0.00017948499352542807, "loss": 1.4452, "step": 3610 }, { "epoch": 0.73, "learning_rate": 0.00017923373623742213, "loss": 1.4872, "step": 3611 }, { "epoch": 0.73, "learning_rate": 0.00017898261652741194, "loss": 1.4736, "step": 3612 }, { "epoch": 0.73, "learning_rate": 0.00017873163450310393, "loss": 1.4282, "step": 3613 }, { "epoch": 0.73, "learning_rate": 0.0001784807902721452, "loss": 1.4301, "step": 3614 }, { "epoch": 0.73, "learning_rate": 0.0001782300839421242, "loss": 1.4694, "step": 3615 }, { "epoch": 0.73, "learning_rate": 0.00017797951562056947, "loss": 1.4482, "step": 3616 }, { "epoch": 0.73, "learning_rate": 0.00017772908541495102, "loss": 1.395, "step": 3617 }, { "epoch": 0.73, "learning_rate": 0.00017747879343267949, "loss": 1.4591, "step": 3618 }, { "epoch": 0.73, "learning_rate": 0.00017722863978110608, "loss": 1.435, "step": 3619 }, { "epoch": 0.73, "learning_rate": 0.00017697862456752273, "loss": 1.4429, "step": 3620 }, { "epoch": 0.73, "learning_rate": 0.00017672874789916227, "loss": 1.4523, "step": 3621 }, { "epoch": 0.73, "learning_rate": 0.00017647900988319737, "loss": 1.4296, "step": 3622 }, { "epoch": 0.73, "learning_rate": 0.00017622941062674202, "loss": 1.4488, "step": 3623 }, { "epoch": 0.73, "learning_rate": 0.00017597995023685038, "loss": 1.4494, "step": 3624 }, { "epoch": 0.73, "learning_rate": 0.00017573062882051692, "loss": 1.4455, "step": 3625 }, { "epoch": 0.73, "learning_rate": 0.0001754814464846768, "loss": 1.4749, "step": 3626 }, { "epoch": 0.73, "learning_rate": 0.00017523240333620544, "loss": 1.4164, "step": 3627 }, { "epoch": 0.73, "learning_rate": 0.00017498349948191818, "loss": 1.4227, "step": 3628 }, { "epoch": 0.73, "learning_rate": 0.00017473473502857111, "loss": 1.4499, "step": 3629 }, { "epoch": 0.73, "learning_rate": 0.00017448611008286037, "loss": 1.4288, "step": 3630 }, { "epoch": 0.73, "learning_rate": 0.0001742376247514222, "loss": 1.4709, "step": 3631 }, { "epoch": 0.73, "learning_rate": 0.00017398927914083297, "loss": 1.4242, "step": 3632 }, { "epoch": 0.73, "learning_rate": 0.00017374107335760936, "loss": 1.4824, "step": 3633 }, { "epoch": 0.73, "learning_rate": 0.00017349300750820756, "loss": 1.4561, "step": 3634 }, { "epoch": 0.73, "learning_rate": 0.00017324508169902419, "loss": 1.4535, "step": 3635 }, { "epoch": 0.74, "learning_rate": 0.00017299729603639568, "loss": 1.4214, "step": 3636 }, { "epoch": 0.74, "learning_rate": 0.00017274965062659837, "loss": 1.4072, "step": 3637 }, { "epoch": 0.74, "learning_rate": 0.00017250214557584836, "loss": 1.4509, "step": 3638 }, { "epoch": 0.74, "learning_rate": 0.0001722547809903016, "loss": 1.4647, "step": 3639 }, { "epoch": 0.74, "learning_rate": 0.00017200755697605386, "loss": 1.4583, "step": 3640 }, { "epoch": 0.74, "learning_rate": 0.00017176047363914054, "loss": 1.4262, "step": 3641 }, { "epoch": 0.74, "learning_rate": 0.0001715135310855367, "loss": 1.4481, "step": 3642 }, { "epoch": 0.74, "learning_rate": 0.00017126672942115696, "loss": 1.4163, "step": 3643 }, { "epoch": 0.74, "learning_rate": 0.00017102006875185572, "loss": 1.4692, "step": 3644 }, { "epoch": 0.74, "learning_rate": 0.0001707735491834269, "loss": 1.3827, "step": 3645 }, { "epoch": 0.74, "learning_rate": 0.00017052717082160346, "loss": 1.4434, "step": 3646 }, { "epoch": 0.74, "learning_rate": 0.00017028093377205821, "loss": 1.4066, "step": 3647 }, { "epoch": 0.74, "learning_rate": 0.00017003483814040328, "loss": 1.476, "step": 3648 }, { "epoch": 0.74, "learning_rate": 0.00016978888403219018, "loss": 1.4548, "step": 3649 }, { "epoch": 0.74, "learning_rate": 0.0001695430715529096, "loss": 1.4309, "step": 3650 }, { "epoch": 0.74, "learning_rate": 0.00016929740080799167, "loss": 1.4781, "step": 3651 }, { "epoch": 0.74, "learning_rate": 0.0001690518719028054, "loss": 1.4428, "step": 3652 }, { "epoch": 0.74, "learning_rate": 0.0001688064849426592, "loss": 1.4098, "step": 3653 }, { "epoch": 0.74, "learning_rate": 0.00016856124003280064, "loss": 1.4708, "step": 3654 }, { "epoch": 0.74, "learning_rate": 0.00016831613727841626, "loss": 1.4213, "step": 3655 }, { "epoch": 0.74, "learning_rate": 0.00016807117678463174, "loss": 1.4711, "step": 3656 }, { "epoch": 0.74, "learning_rate": 0.00016782635865651168, "loss": 1.4564, "step": 3657 }, { "epoch": 0.74, "learning_rate": 0.00016758168299905942, "loss": 1.5122, "step": 3658 }, { "epoch": 0.74, "learning_rate": 0.00016733714991721738, "loss": 1.4456, "step": 3659 }, { "epoch": 0.74, "learning_rate": 0.000167092759515867, "loss": 1.4325, "step": 3660 }, { "epoch": 0.74, "learning_rate": 0.00016684851189982826, "loss": 1.4399, "step": 3661 }, { "epoch": 0.74, "learning_rate": 0.00016660440717385994, "loss": 1.4089, "step": 3662 }, { "epoch": 0.74, "learning_rate": 0.00016636044544265984, "loss": 1.4237, "step": 3663 }, { "epoch": 0.74, "learning_rate": 0.00016611662681086374, "loss": 1.4389, "step": 3664 }, { "epoch": 0.74, "learning_rate": 0.00016587295138304677, "loss": 1.4791, "step": 3665 }, { "epoch": 0.74, "learning_rate": 0.00016562941926372227, "loss": 1.4555, "step": 3666 }, { "epoch": 0.74, "learning_rate": 0.00016538603055734214, "loss": 1.451, "step": 3667 }, { "epoch": 0.74, "learning_rate": 0.00016514278536829686, "loss": 1.4071, "step": 3668 }, { "epoch": 0.74, "learning_rate": 0.00016489968380091537, "loss": 1.44, "step": 3669 }, { "epoch": 0.74, "learning_rate": 0.00016465672595946486, "loss": 1.4198, "step": 3670 }, { "epoch": 0.74, "learning_rate": 0.00016441391194815097, "loss": 1.452, "step": 3671 }, { "epoch": 0.74, "learning_rate": 0.00016417124187111776, "loss": 1.4541, "step": 3672 }, { "epoch": 0.74, "learning_rate": 0.00016392871583244728, "loss": 1.4707, "step": 3673 }, { "epoch": 0.74, "learning_rate": 0.00016368633393616013, "loss": 1.4514, "step": 3674 }, { "epoch": 0.74, "learning_rate": 0.00016344409628621482, "loss": 1.4653, "step": 3675 }, { "epoch": 0.74, "learning_rate": 0.00016320200298650822, "loss": 1.4452, "step": 3676 }, { "epoch": 0.74, "learning_rate": 0.000162960054140875, "loss": 1.3919, "step": 3677 }, { "epoch": 0.74, "learning_rate": 0.00016271824985308802, "loss": 1.4389, "step": 3678 }, { "epoch": 0.74, "learning_rate": 0.00016247659022685824, "loss": 1.4514, "step": 3679 }, { "epoch": 0.74, "learning_rate": 0.00016223507536583447, "loss": 1.4062, "step": 3680 }, { "epoch": 0.74, "learning_rate": 0.00016199370537360347, "loss": 1.454, "step": 3681 }, { "epoch": 0.74, "learning_rate": 0.00016175248035368994, "loss": 1.4399, "step": 3682 }, { "epoch": 0.74, "learning_rate": 0.00016151140040955608, "loss": 1.471, "step": 3683 }, { "epoch": 0.74, "learning_rate": 0.00016127046564460217, "loss": 1.4228, "step": 3684 }, { "epoch": 0.74, "learning_rate": 0.0001610296761621662, "loss": 1.4232, "step": 3685 }, { "epoch": 0.75, "learning_rate": 0.0001607890320655237, "loss": 1.4508, "step": 3686 }, { "epoch": 0.75, "learning_rate": 0.000160548533457888, "loss": 1.4553, "step": 3687 }, { "epoch": 0.75, "learning_rate": 0.00016030818044241008, "loss": 1.4821, "step": 3688 }, { "epoch": 0.75, "learning_rate": 0.00016006797312217815, "loss": 1.4139, "step": 3689 }, { "epoch": 0.75, "learning_rate": 0.00015982791160021814, "loss": 1.4407, "step": 3690 }, { "epoch": 0.75, "learning_rate": 0.00015958799597949353, "loss": 1.4094, "step": 3691 }, { "epoch": 0.75, "learning_rate": 0.00015934822636290514, "loss": 1.4529, "step": 3692 }, { "epoch": 0.75, "learning_rate": 0.00015910860285329109, "loss": 1.4945, "step": 3693 }, { "epoch": 0.75, "learning_rate": 0.0001588691255534272, "loss": 1.52, "step": 3694 }, { "epoch": 0.75, "learning_rate": 0.00015862979456602582, "loss": 1.4661, "step": 3695 }, { "epoch": 0.75, "learning_rate": 0.00015839060999373728, "loss": 1.4619, "step": 3696 }, { "epoch": 0.75, "learning_rate": 0.0001581515719391488, "loss": 1.4611, "step": 3697 }, { "epoch": 0.75, "learning_rate": 0.00015791268050478486, "loss": 1.4387, "step": 3698 }, { "epoch": 0.75, "learning_rate": 0.0001576739357931069, "loss": 1.439, "step": 3699 }, { "epoch": 0.75, "learning_rate": 0.0001574353379065136, "loss": 1.4705, "step": 3700 }, { "epoch": 0.75, "learning_rate": 0.00015719688694734057, "loss": 1.4231, "step": 3701 }, { "epoch": 0.75, "learning_rate": 0.00015695858301786048, "loss": 1.4396, "step": 3702 }, { "epoch": 0.75, "learning_rate": 0.00015672042622028276, "loss": 1.4434, "step": 3703 }, { "epoch": 0.75, "learning_rate": 0.000156482416656754, "loss": 1.4372, "step": 3704 }, { "epoch": 0.75, "learning_rate": 0.00015624455442935742, "loss": 1.448, "step": 3705 }, { "epoch": 0.75, "learning_rate": 0.0001560068396401133, "loss": 1.4285, "step": 3706 }, { "epoch": 0.75, "learning_rate": 0.0001557692723909782, "loss": 1.446, "step": 3707 }, { "epoch": 0.75, "learning_rate": 0.00015553185278384586, "loss": 1.4467, "step": 3708 }, { "epoch": 0.75, "learning_rate": 0.00015529458092054655, "loss": 1.438, "step": 3709 }, { "epoch": 0.75, "learning_rate": 0.0001550574569028471, "loss": 1.4493, "step": 3710 }, { "epoch": 0.75, "learning_rate": 0.00015482048083245115, "loss": 1.4562, "step": 3711 }, { "epoch": 0.75, "learning_rate": 0.00015458365281099877, "loss": 1.4278, "step": 3712 }, { "epoch": 0.75, "learning_rate": 0.00015434697294006622, "loss": 1.4729, "step": 3713 }, { "epoch": 0.75, "learning_rate": 0.00015411044132116665, "loss": 1.5083, "step": 3714 }, { "epoch": 0.75, "learning_rate": 0.0001538740580557494, "loss": 1.4163, "step": 3715 }, { "epoch": 0.75, "learning_rate": 0.00015363782324520031, "loss": 1.4224, "step": 3716 }, { "epoch": 0.75, "learning_rate": 0.0001534017369908415, "loss": 1.4502, "step": 3717 }, { "epoch": 0.75, "learning_rate": 0.0001531657993939314, "loss": 1.429, "step": 3718 }, { "epoch": 0.75, "learning_rate": 0.0001529300105556644, "loss": 1.4625, "step": 3719 }, { "epoch": 0.75, "learning_rate": 0.0001526943705771715, "loss": 1.4627, "step": 3720 }, { "epoch": 0.75, "learning_rate": 0.00015245887955951966, "loss": 1.4647, "step": 3721 }, { "epoch": 0.75, "learning_rate": 0.00015222353760371195, "loss": 1.4137, "step": 3722 }, { "epoch": 0.75, "learning_rate": 0.00015198834481068753, "loss": 1.4341, "step": 3723 }, { "epoch": 0.75, "learning_rate": 0.0001517533012813217, "loss": 1.4577, "step": 3724 }, { "epoch": 0.75, "learning_rate": 0.00015151840711642535, "loss": 1.4569, "step": 3725 }, { "epoch": 0.75, "learning_rate": 0.0001512836624167457, "loss": 1.4236, "step": 3726 }, { "epoch": 0.75, "learning_rate": 0.00015104906728296568, "loss": 1.4171, "step": 3727 }, { "epoch": 0.75, "learning_rate": 0.00015081462181570427, "loss": 1.4743, "step": 3728 }, { "epoch": 0.75, "learning_rate": 0.00015058032611551603, "loss": 1.4507, "step": 3729 }, { "epoch": 0.75, "learning_rate": 0.00015034618028289139, "loss": 1.4114, "step": 3730 }, { "epoch": 0.75, "learning_rate": 0.00015011218441825642, "loss": 1.4229, "step": 3731 }, { "epoch": 0.75, "learning_rate": 0.00014987833862197298, "loss": 1.4927, "step": 3732 }, { "epoch": 0.75, "learning_rate": 0.0001496446429943385, "loss": 1.4819, "step": 3733 }, { "epoch": 0.75, "learning_rate": 0.00014941109763558601, "loss": 1.429, "step": 3734 }, { "epoch": 0.76, "learning_rate": 0.00014917770264588414, "loss": 1.4261, "step": 3735 }, { "epoch": 0.76, "learning_rate": 0.0001489444581253369, "loss": 1.4347, "step": 3736 }, { "epoch": 0.76, "learning_rate": 0.00014871136417398407, "loss": 1.4472, "step": 3737 }, { "epoch": 0.76, "learning_rate": 0.00014847842089180024, "loss": 1.4118, "step": 3738 }, { "epoch": 0.76, "learning_rate": 0.00014824562837869603, "loss": 1.4591, "step": 3739 }, { "epoch": 0.76, "learning_rate": 0.00014801298673451703, "loss": 1.4498, "step": 3740 }, { "epoch": 0.76, "learning_rate": 0.0001477804960590442, "loss": 1.4663, "step": 3741 }, { "epoch": 0.76, "learning_rate": 0.0001475481564519938, "loss": 1.4262, "step": 3742 }, { "epoch": 0.76, "learning_rate": 0.0001473159680130175, "loss": 1.4445, "step": 3743 }, { "epoch": 0.76, "learning_rate": 0.00014708393084170146, "loss": 1.4598, "step": 3744 }, { "epoch": 0.76, "learning_rate": 0.0001468520450375676, "loss": 1.4499, "step": 3745 }, { "epoch": 0.76, "learning_rate": 0.00014662031070007271, "loss": 1.4527, "step": 3746 }, { "epoch": 0.76, "learning_rate": 0.00014638872792860862, "loss": 1.45, "step": 3747 }, { "epoch": 0.76, "learning_rate": 0.00014615729682250218, "loss": 1.3683, "step": 3748 }, { "epoch": 0.76, "learning_rate": 0.0001459260174810153, "loss": 1.4164, "step": 3749 }, { "epoch": 0.76, "learning_rate": 0.00014569489000334436, "loss": 1.4695, "step": 3750 }, { "epoch": 0.76, "learning_rate": 0.00014546391448862095, "loss": 1.45, "step": 3751 }, { "epoch": 0.76, "learning_rate": 0.0001452330910359116, "loss": 1.4341, "step": 3752 }, { "epoch": 0.76, "learning_rate": 0.00014500241974421736, "loss": 1.4252, "step": 3753 }, { "epoch": 0.76, "learning_rate": 0.00014477190071247414, "loss": 1.416, "step": 3754 }, { "epoch": 0.76, "learning_rate": 0.00014454153403955266, "loss": 1.4262, "step": 3755 }, { "epoch": 0.76, "learning_rate": 0.00014431131982425795, "loss": 1.448, "step": 3756 }, { "epoch": 0.76, "learning_rate": 0.0001440812581653298, "loss": 1.4066, "step": 3757 }, { "epoch": 0.76, "learning_rate": 0.00014385134916144278, "loss": 1.5143, "step": 3758 }, { "epoch": 0.76, "learning_rate": 0.00014362159291120575, "loss": 1.4451, "step": 3759 }, { "epoch": 0.76, "learning_rate": 0.00014339198951316217, "loss": 1.4724, "step": 3760 }, { "epoch": 0.76, "learning_rate": 0.00014316253906578995, "loss": 1.472, "step": 3761 }, { "epoch": 0.76, "learning_rate": 0.0001429332416675012, "loss": 1.4334, "step": 3762 }, { "epoch": 0.76, "learning_rate": 0.00014270409741664269, "loss": 1.4668, "step": 3763 }, { "epoch": 0.76, "learning_rate": 0.00014247510641149524, "loss": 1.4397, "step": 3764 }, { "epoch": 0.76, "learning_rate": 0.00014224626875027413, "loss": 1.4221, "step": 3765 }, { "epoch": 0.76, "learning_rate": 0.00014201758453112877, "loss": 1.4191, "step": 3766 }, { "epoch": 0.76, "learning_rate": 0.00014178905385214293, "loss": 1.4106, "step": 3767 }, { "epoch": 0.76, "learning_rate": 0.000141560676811334, "loss": 1.5047, "step": 3768 }, { "epoch": 0.76, "learning_rate": 0.00014133245350665414, "loss": 1.4231, "step": 3769 }, { "epoch": 0.76, "learning_rate": 0.00014110438403598912, "loss": 1.4231, "step": 3770 }, { "epoch": 0.76, "learning_rate": 0.00014087646849715907, "loss": 1.4026, "step": 3771 }, { "epoch": 0.76, "learning_rate": 0.0001406487069879178, "loss": 1.4433, "step": 3772 }, { "epoch": 0.76, "learning_rate": 0.0001404210996059534, "loss": 1.4191, "step": 3773 }, { "epoch": 0.76, "learning_rate": 0.00014019364644888722, "loss": 1.465, "step": 3774 }, { "epoch": 0.76, "learning_rate": 0.00013996634761427511, "loss": 1.4554, "step": 3775 }, { "epoch": 0.76, "learning_rate": 0.00013973920319960653, "loss": 1.4099, "step": 3776 }, { "epoch": 0.76, "learning_rate": 0.00013951221330230456, "loss": 1.4657, "step": 3777 }, { "epoch": 0.76, "learning_rate": 0.00013928537801972618, "loss": 1.4604, "step": 3778 }, { "epoch": 0.76, "learning_rate": 0.0001390586974491622, "loss": 1.4727, "step": 3779 }, { "epoch": 0.76, "learning_rate": 0.00013883217168783645, "loss": 1.4775, "step": 3780 }, { "epoch": 0.76, "learning_rate": 0.00013860580083290702, "loss": 1.4796, "step": 3781 }, { "epoch": 0.76, "learning_rate": 0.0001383795849814653, "loss": 1.4674, "step": 3782 }, { "epoch": 0.76, "learning_rate": 0.00013815352423053617, "loss": 1.4671, "step": 3783 }, { "epoch": 0.76, "learning_rate": 0.00013792761867707808, "loss": 1.4166, "step": 3784 }, { "epoch": 0.77, "learning_rate": 0.00013770186841798303, "loss": 1.4483, "step": 3785 }, { "epoch": 0.77, "learning_rate": 0.0001374762735500759, "loss": 1.4228, "step": 3786 }, { "epoch": 0.77, "learning_rate": 0.00013725083417011547, "loss": 1.4542, "step": 3787 }, { "epoch": 0.77, "learning_rate": 0.00013702555037479363, "loss": 1.4159, "step": 3788 }, { "epoch": 0.77, "learning_rate": 0.00013680042226073554, "loss": 1.4429, "step": 3789 }, { "epoch": 0.77, "learning_rate": 0.0001365754499244996, "loss": 1.4234, "step": 3790 }, { "epoch": 0.77, "learning_rate": 0.00013635063346257732, "loss": 1.4319, "step": 3791 }, { "epoch": 0.77, "learning_rate": 0.00013612597297139357, "loss": 1.4232, "step": 3792 }, { "epoch": 0.77, "learning_rate": 0.000135901468547306, "loss": 1.4743, "step": 3793 }, { "epoch": 0.77, "learning_rate": 0.0001356771202866056, "loss": 1.4529, "step": 3794 }, { "epoch": 0.77, "learning_rate": 0.00013545292828551632, "loss": 1.484, "step": 3795 }, { "epoch": 0.77, "learning_rate": 0.00013522889264019496, "loss": 1.4597, "step": 3796 }, { "epoch": 0.77, "learning_rate": 0.00013500501344673138, "loss": 1.4704, "step": 3797 }, { "epoch": 0.77, "learning_rate": 0.00013478129080114848, "loss": 1.4101, "step": 3798 }, { "epoch": 0.77, "learning_rate": 0.00013455772479940153, "loss": 1.4386, "step": 3799 }, { "epoch": 0.77, "learning_rate": 0.00013433431553737903, "loss": 1.4011, "step": 3800 }, { "epoch": 0.77, "learning_rate": 0.0001341110631109022, "loss": 1.4478, "step": 3801 }, { "epoch": 0.77, "learning_rate": 0.00013388796761572492, "loss": 1.4144, "step": 3802 }, { "epoch": 0.77, "learning_rate": 0.00013366502914753382, "loss": 1.4166, "step": 3803 }, { "epoch": 0.77, "learning_rate": 0.00013344224780194826, "loss": 1.4094, "step": 3804 }, { "epoch": 0.77, "learning_rate": 0.00013321962367451978, "loss": 1.463, "step": 3805 }, { "epoch": 0.77, "learning_rate": 0.00013299715686073294, "loss": 1.4275, "step": 3806 }, { "epoch": 0.77, "learning_rate": 0.00013277484745600476, "loss": 1.3891, "step": 3807 }, { "epoch": 0.77, "learning_rate": 0.00013255269555568466, "loss": 1.4461, "step": 3808 }, { "epoch": 0.77, "learning_rate": 0.0001323307012550545, "loss": 1.445, "step": 3809 }, { "epoch": 0.77, "learning_rate": 0.00013210886464932865, "loss": 1.4519, "step": 3810 }, { "epoch": 0.77, "learning_rate": 0.00013188718583365355, "loss": 1.4554, "step": 3811 }, { "epoch": 0.77, "learning_rate": 0.00013166566490310826, "loss": 1.4305, "step": 3812 }, { "epoch": 0.77, "learning_rate": 0.00013144430195270417, "loss": 1.4142, "step": 3813 }, { "epoch": 0.77, "learning_rate": 0.00013122309707738462, "loss": 1.4231, "step": 3814 }, { "epoch": 0.77, "learning_rate": 0.0001310020503720254, "loss": 1.4612, "step": 3815 }, { "epoch": 0.77, "learning_rate": 0.00013078116193143447, "loss": 1.434, "step": 3816 }, { "epoch": 0.77, "learning_rate": 0.00013056043185035154, "loss": 1.4552, "step": 3817 }, { "epoch": 0.77, "learning_rate": 0.0001303398602234488, "loss": 1.4652, "step": 3818 }, { "epoch": 0.77, "learning_rate": 0.00013011944714533036, "loss": 1.4524, "step": 3819 }, { "epoch": 0.77, "learning_rate": 0.0001298991927105323, "loss": 1.4124, "step": 3820 }, { "epoch": 0.77, "learning_rate": 0.00012967909701352265, "loss": 1.4206, "step": 3821 }, { "epoch": 0.77, "learning_rate": 0.0001294591601487014, "loss": 1.4313, "step": 3822 }, { "epoch": 0.77, "learning_rate": 0.00012923938221040044, "loss": 1.436, "step": 3823 }, { "epoch": 0.77, "learning_rate": 0.0001290197632928834, "loss": 1.4654, "step": 3824 }, { "epoch": 0.77, "learning_rate": 0.00012880030349034577, "loss": 1.4195, "step": 3825 }, { "epoch": 0.77, "learning_rate": 0.00012858100289691476, "loss": 1.4565, "step": 3826 }, { "epoch": 0.77, "learning_rate": 0.00012836186160664932, "loss": 1.4704, "step": 3827 }, { "epoch": 0.77, "learning_rate": 0.00012814287971354022, "loss": 1.4266, "step": 3828 }, { "epoch": 0.77, "learning_rate": 0.00012792405731150942, "loss": 1.4512, "step": 3829 }, { "epoch": 0.77, "learning_rate": 0.00012770539449441092, "loss": 1.4224, "step": 3830 }, { "epoch": 0.77, "learning_rate": 0.00012748689135603014, "loss": 1.4506, "step": 3831 }, { "epoch": 0.77, "learning_rate": 0.0001272685479900839, "loss": 1.4439, "step": 3832 }, { "epoch": 0.77, "learning_rate": 0.00012705036449022072, "loss": 1.4269, "step": 3833 }, { "epoch": 0.78, "learning_rate": 0.00012683234095002045, "loss": 1.4204, "step": 3834 }, { "epoch": 0.78, "learning_rate": 0.0001266144774629941, "loss": 1.4361, "step": 3835 }, { "epoch": 0.78, "learning_rate": 0.00012639677412258426, "loss": 1.4669, "step": 3836 }, { "epoch": 0.78, "learning_rate": 0.00012617923102216494, "loss": 1.4751, "step": 3837 }, { "epoch": 0.78, "learning_rate": 0.0001259618482550412, "loss": 1.4521, "step": 3838 }, { "epoch": 0.78, "learning_rate": 0.0001257446259144494, "loss": 1.4419, "step": 3839 }, { "epoch": 0.78, "learning_rate": 0.00012552756409355738, "loss": 1.4797, "step": 3840 }, { "epoch": 0.78, "learning_rate": 0.0001253106628854635, "loss": 1.4481, "step": 3841 }, { "epoch": 0.78, "learning_rate": 0.00012509392238319766, "loss": 1.439, "step": 3842 }, { "epoch": 0.78, "learning_rate": 0.00012487734267972096, "loss": 1.4581, "step": 3843 }, { "epoch": 0.78, "learning_rate": 0.00012466092386792516, "loss": 1.4219, "step": 3844 }, { "epoch": 0.78, "learning_rate": 0.00012444466604063332, "loss": 1.4116, "step": 3845 }, { "epoch": 0.78, "learning_rate": 0.00012422856929059938, "loss": 1.4117, "step": 3846 }, { "epoch": 0.78, "learning_rate": 0.00012401263371050793, "loss": 1.4587, "step": 3847 }, { "epoch": 0.78, "learning_rate": 0.00012379685939297474, "loss": 1.4494, "step": 3848 }, { "epoch": 0.78, "learning_rate": 0.00012358124643054635, "loss": 1.4517, "step": 3849 }, { "epoch": 0.78, "learning_rate": 0.00012336579491570006, "loss": 1.4386, "step": 3850 }, { "epoch": 0.78, "learning_rate": 0.0001231505049408439, "loss": 1.457, "step": 3851 }, { "epoch": 0.78, "learning_rate": 0.00012293537659831673, "loss": 1.4184, "step": 3852 }, { "epoch": 0.78, "learning_rate": 0.00012272040998038787, "loss": 1.4783, "step": 3853 }, { "epoch": 0.78, "learning_rate": 0.00012250560517925745, "loss": 1.3978, "step": 3854 }, { "epoch": 0.78, "learning_rate": 0.00012229096228705621, "loss": 1.4044, "step": 3855 }, { "epoch": 0.78, "learning_rate": 0.00012207648139584537, "loss": 1.4566, "step": 3856 }, { "epoch": 0.78, "learning_rate": 0.00012186216259761662, "loss": 1.4594, "step": 3857 }, { "epoch": 0.78, "learning_rate": 0.00012164800598429238, "loss": 1.4544, "step": 3858 }, { "epoch": 0.78, "learning_rate": 0.000121434011647725, "loss": 1.4014, "step": 3859 }, { "epoch": 0.78, "learning_rate": 0.00012122017967969772, "loss": 1.3871, "step": 3860 }, { "epoch": 0.78, "learning_rate": 0.000121006510171924, "loss": 1.4203, "step": 3861 }, { "epoch": 0.78, "learning_rate": 0.00012079300321604753, "loss": 1.4934, "step": 3862 }, { "epoch": 0.78, "learning_rate": 0.00012057965890364237, "loss": 1.431, "step": 3863 }, { "epoch": 0.78, "learning_rate": 0.00012036647732621275, "loss": 1.4457, "step": 3864 }, { "epoch": 0.78, "learning_rate": 0.00012015345857519339, "loss": 1.4329, "step": 3865 }, { "epoch": 0.78, "learning_rate": 0.00011994060274194851, "loss": 1.4544, "step": 3866 }, { "epoch": 0.78, "learning_rate": 0.0001197279099177731, "loss": 1.4258, "step": 3867 }, { "epoch": 0.78, "learning_rate": 0.00011951538019389196, "loss": 1.4579, "step": 3868 }, { "epoch": 0.78, "learning_rate": 0.00011930301366145996, "loss": 1.4508, "step": 3869 }, { "epoch": 0.78, "learning_rate": 0.00011909081041156206, "loss": 1.4407, "step": 3870 }, { "epoch": 0.78, "learning_rate": 0.00011887877053521328, "loss": 1.4338, "step": 3871 }, { "epoch": 0.78, "learning_rate": 0.00011866689412335801, "loss": 1.4182, "step": 3872 }, { "epoch": 0.78, "learning_rate": 0.00011845518126687121, "loss": 1.4465, "step": 3873 }, { "epoch": 0.78, "learning_rate": 0.0001182436320565573, "loss": 1.4264, "step": 3874 }, { "epoch": 0.78, "learning_rate": 0.0001180322465831507, "loss": 1.4265, "step": 3875 }, { "epoch": 0.78, "learning_rate": 0.0001178210249373155, "loss": 1.4311, "step": 3876 }, { "epoch": 0.78, "learning_rate": 0.00011760996720964573, "loss": 1.4248, "step": 3877 }, { "epoch": 0.78, "learning_rate": 0.00011739907349066453, "loss": 1.4266, "step": 3878 }, { "epoch": 0.78, "learning_rate": 0.00011718834387082539, "loss": 1.4838, "step": 3879 }, { "epoch": 0.78, "learning_rate": 0.00011697777844051105, "loss": 1.4698, "step": 3880 }, { "epoch": 0.78, "learning_rate": 0.00011676737729003389, "loss": 1.3791, "step": 3881 }, { "epoch": 0.78, "learning_rate": 0.0001165571405096359, "loss": 1.4049, "step": 3882 }, { "epoch": 0.78, "learning_rate": 0.00011634706818948848, "loss": 1.4484, "step": 3883 }, { "epoch": 0.79, "learning_rate": 0.00011613716041969257, "loss": 1.4241, "step": 3884 }, { "epoch": 0.79, "learning_rate": 0.00011592741729027845, "loss": 1.3987, "step": 3885 }, { "epoch": 0.79, "learning_rate": 0.00011571783889120585, "loss": 1.4284, "step": 3886 }, { "epoch": 0.79, "learning_rate": 0.00011550842531236377, "loss": 1.4023, "step": 3887 }, { "epoch": 0.79, "learning_rate": 0.00011529917664357064, "loss": 1.404, "step": 3888 }, { "epoch": 0.79, "learning_rate": 0.00011509009297457423, "loss": 1.4384, "step": 3889 }, { "epoch": 0.79, "learning_rate": 0.00011488117439505108, "loss": 1.3943, "step": 3890 }, { "epoch": 0.79, "learning_rate": 0.00011467242099460745, "loss": 1.4453, "step": 3891 }, { "epoch": 0.79, "learning_rate": 0.00011446383286277845, "loss": 1.4336, "step": 3892 }, { "epoch": 0.79, "learning_rate": 0.00011425541008902851, "loss": 1.4359, "step": 3893 }, { "epoch": 0.79, "learning_rate": 0.00011404715276275096, "loss": 1.459, "step": 3894 }, { "epoch": 0.79, "learning_rate": 0.0001138390609732684, "loss": 1.4389, "step": 3895 }, { "epoch": 0.79, "learning_rate": 0.00011363113480983201, "loss": 1.4262, "step": 3896 }, { "epoch": 0.79, "learning_rate": 0.00011342337436162226, "loss": 1.438, "step": 3897 }, { "epoch": 0.79, "learning_rate": 0.0001132157797177486, "loss": 1.4195, "step": 3898 }, { "epoch": 0.79, "learning_rate": 0.00011300835096724915, "loss": 1.4554, "step": 3899 }, { "epoch": 0.79, "learning_rate": 0.00011280108819909102, "loss": 1.4206, "step": 3900 }, { "epoch": 0.79, "learning_rate": 0.0001125939915021702, "loss": 1.4285, "step": 3901 }, { "epoch": 0.79, "learning_rate": 0.00011238706096531104, "loss": 1.4456, "step": 3902 }, { "epoch": 0.79, "learning_rate": 0.00011218029667726704, "loss": 1.4369, "step": 3903 }, { "epoch": 0.79, "learning_rate": 0.00011197369872672037, "loss": 1.4178, "step": 3904 }, { "epoch": 0.79, "learning_rate": 0.00011176726720228158, "loss": 1.4358, "step": 3905 }, { "epoch": 0.79, "learning_rate": 0.00011156100219249022, "loss": 1.4434, "step": 3906 }, { "epoch": 0.79, "learning_rate": 0.0001113549037858142, "loss": 1.4267, "step": 3907 }, { "epoch": 0.79, "learning_rate": 0.00011114897207064983, "loss": 1.4457, "step": 3908 }, { "epoch": 0.79, "learning_rate": 0.00011094320713532213, "loss": 1.4486, "step": 3909 }, { "epoch": 0.79, "learning_rate": 0.00011073760906808455, "loss": 1.4209, "step": 3910 }, { "epoch": 0.79, "learning_rate": 0.00011053217795711906, "loss": 1.4354, "step": 3911 }, { "epoch": 0.79, "learning_rate": 0.00011032691389053578, "loss": 1.4505, "step": 3912 }, { "epoch": 0.79, "learning_rate": 0.00011012181695637347, "loss": 1.4524, "step": 3913 }, { "epoch": 0.79, "learning_rate": 0.00010991688724259902, "loss": 1.409, "step": 3914 }, { "epoch": 0.79, "learning_rate": 0.00010971212483710758, "loss": 1.41, "step": 3915 }, { "epoch": 0.79, "learning_rate": 0.00010950752982772266, "loss": 1.4406, "step": 3916 }, { "epoch": 0.79, "learning_rate": 0.00010930310230219603, "loss": 1.4115, "step": 3917 }, { "epoch": 0.79, "learning_rate": 0.00010909884234820728, "loss": 1.457, "step": 3918 }, { "epoch": 0.79, "learning_rate": 0.00010889475005336446, "loss": 1.4485, "step": 3919 }, { "epoch": 0.79, "learning_rate": 0.00010869082550520365, "loss": 1.4489, "step": 3920 }, { "epoch": 0.79, "learning_rate": 0.00010848706879118891, "loss": 1.4102, "step": 3921 }, { "epoch": 0.79, "learning_rate": 0.00010828347999871235, "loss": 1.4922, "step": 3922 }, { "epoch": 0.79, "learning_rate": 0.00010808005921509406, "loss": 1.4252, "step": 3923 }, { "epoch": 0.79, "learning_rate": 0.00010787680652758209, "loss": 1.4503, "step": 3924 }, { "epoch": 0.79, "learning_rate": 0.00010767372202335229, "loss": 1.4557, "step": 3925 }, { "epoch": 0.79, "learning_rate": 0.00010747080578950874, "loss": 1.4514, "step": 3926 }, { "epoch": 0.79, "learning_rate": 0.00010726805791308264, "loss": 1.4412, "step": 3927 }, { "epoch": 0.79, "learning_rate": 0.00010706547848103365, "loss": 1.4352, "step": 3928 }, { "epoch": 0.79, "learning_rate": 0.00010686306758024889, "loss": 1.4757, "step": 3929 }, { "epoch": 0.79, "learning_rate": 0.00010666082529754317, "loss": 1.4154, "step": 3930 }, { "epoch": 0.79, "learning_rate": 0.00010645875171965925, "loss": 1.4137, "step": 3931 }, { "epoch": 0.79, "learning_rate": 0.00010625684693326726, "loss": 1.4486, "step": 3932 }, { "epoch": 0.8, "learning_rate": 0.00010605511102496484, "loss": 1.4472, "step": 3933 }, { "epoch": 0.8, "learning_rate": 0.00010585354408127757, "loss": 1.4567, "step": 3934 }, { "epoch": 0.8, "learning_rate": 0.00010565214618865821, "loss": 1.4012, "step": 3935 }, { "epoch": 0.8, "learning_rate": 0.00010545091743348723, "loss": 1.4822, "step": 3936 }, { "epoch": 0.8, "learning_rate": 0.00010524985790207254, "loss": 1.429, "step": 3937 }, { "epoch": 0.8, "learning_rate": 0.00010504896768064942, "loss": 1.4129, "step": 3938 }, { "epoch": 0.8, "learning_rate": 0.00010484824685538041, "loss": 1.4552, "step": 3939 }, { "epoch": 0.8, "learning_rate": 0.00010464769551235553, "loss": 1.462, "step": 3940 }, { "epoch": 0.8, "learning_rate": 0.00010444731373759219, "loss": 1.4248, "step": 3941 }, { "epoch": 0.8, "learning_rate": 0.00010424710161703488, "loss": 1.4584, "step": 3942 }, { "epoch": 0.8, "learning_rate": 0.0001040470592365555, "loss": 1.4505, "step": 3943 }, { "epoch": 0.8, "learning_rate": 0.00010384718668195314, "loss": 1.46, "step": 3944 }, { "epoch": 0.8, "learning_rate": 0.00010364748403895369, "loss": 1.4355, "step": 3945 }, { "epoch": 0.8, "learning_rate": 0.00010344795139321067, "loss": 1.4654, "step": 3946 }, { "epoch": 0.8, "learning_rate": 0.00010324858883030442, "loss": 1.4519, "step": 3947 }, { "epoch": 0.8, "learning_rate": 0.00010304939643574234, "loss": 1.4578, "step": 3948 }, { "epoch": 0.8, "learning_rate": 0.00010285037429495897, "loss": 1.4347, "step": 3949 }, { "epoch": 0.8, "learning_rate": 0.00010265152249331572, "loss": 1.4517, "step": 3950 }, { "epoch": 0.8, "learning_rate": 0.00010245284111610087, "loss": 1.4105, "step": 3951 }, { "epoch": 0.8, "learning_rate": 0.00010225433024852982, "loss": 1.4534, "step": 3952 }, { "epoch": 0.8, "learning_rate": 0.00010205598997574461, "loss": 1.4562, "step": 3953 }, { "epoch": 0.8, "learning_rate": 0.00010185782038281433, "loss": 1.4221, "step": 3954 }, { "epoch": 0.8, "learning_rate": 0.00010165982155473463, "loss": 1.4205, "step": 3955 }, { "epoch": 0.8, "learning_rate": 0.00010146199357642827, "loss": 1.4498, "step": 3956 }, { "epoch": 0.8, "learning_rate": 0.00010126433653274413, "loss": 1.3868, "step": 3957 }, { "epoch": 0.8, "learning_rate": 0.00010106685050845838, "loss": 1.4244, "step": 3958 }, { "epoch": 0.8, "learning_rate": 0.00010086953558827349, "loss": 1.4181, "step": 3959 }, { "epoch": 0.8, "learning_rate": 0.00010067239185681882, "loss": 1.4423, "step": 3960 }, { "epoch": 0.8, "learning_rate": 0.00010047541939864995, "loss": 1.4386, "step": 3961 }, { "epoch": 0.8, "learning_rate": 0.00010027861829824952, "loss": 1.4239, "step": 3962 }, { "epoch": 0.8, "learning_rate": 0.00010008198864002593, "loss": 1.4178, "step": 3963 }, { "epoch": 0.8, "learning_rate": 9.988553050831467e-05, "loss": 1.4242, "step": 3964 }, { "epoch": 0.8, "learning_rate": 9.968924398737745e-05, "loss": 1.3811, "step": 3965 }, { "epoch": 0.8, "learning_rate": 9.949312916140235e-05, "loss": 1.4424, "step": 3966 }, { "epoch": 0.8, "learning_rate": 9.929718611450389e-05, "loss": 1.4231, "step": 3967 }, { "epoch": 0.8, "learning_rate": 9.9101414930723e-05, "loss": 1.3982, "step": 3968 }, { "epoch": 0.8, "learning_rate": 9.89058156940264e-05, "loss": 1.4195, "step": 3969 }, { "epoch": 0.8, "learning_rate": 9.871038848830765e-05, "loss": 1.473, "step": 3970 }, { "epoch": 0.8, "learning_rate": 9.851513339738626e-05, "loss": 1.4474, "step": 3971 }, { "epoch": 0.8, "learning_rate": 9.832005050500803e-05, "loss": 1.4137, "step": 3972 }, { "epoch": 0.8, "learning_rate": 9.812513989484473e-05, "loss": 1.4496, "step": 3973 }, { "epoch": 0.8, "learning_rate": 9.79304016504945e-05, "loss": 1.4635, "step": 3974 }, { "epoch": 0.8, "learning_rate": 9.773583585548113e-05, "loss": 1.4521, "step": 3975 }, { "epoch": 0.8, "learning_rate": 9.754144259325487e-05, "loss": 1.4208, "step": 3976 }, { "epoch": 0.8, "learning_rate": 9.734722194719176e-05, "loss": 1.4391, "step": 3977 }, { "epoch": 0.8, "learning_rate": 9.715317400059386e-05, "loss": 1.49, "step": 3978 }, { "epoch": 0.8, "learning_rate": 9.695929883668919e-05, "loss": 1.4323, "step": 3979 }, { "epoch": 0.8, "learning_rate": 9.67655965386316e-05, "loss": 1.397, "step": 3980 }, { "epoch": 0.8, "learning_rate": 9.657206718950079e-05, "loss": 1.4316, "step": 3981 }, { "epoch": 0.81, "learning_rate": 9.637871087230227e-05, "loss": 1.4346, "step": 3982 }, { "epoch": 0.81, "learning_rate": 9.618552766996752e-05, "loss": 1.4677, "step": 3983 }, { "epoch": 0.81, "learning_rate": 9.599251766535344e-05, "loss": 1.4465, "step": 3984 }, { "epoch": 0.81, "learning_rate": 9.579968094124303e-05, "loss": 1.4412, "step": 3985 }, { "epoch": 0.81, "learning_rate": 9.560701758034474e-05, "loss": 1.4042, "step": 3986 }, { "epoch": 0.81, "learning_rate": 9.541452766529246e-05, "loss": 1.4076, "step": 3987 }, { "epoch": 0.81, "learning_rate": 9.522221127864611e-05, "loss": 1.4327, "step": 3988 }, { "epoch": 0.81, "learning_rate": 9.503006850289097e-05, "loss": 1.4228, "step": 3989 }, { "epoch": 0.81, "learning_rate": 9.483809942043792e-05, "loss": 1.3941, "step": 3990 }, { "epoch": 0.81, "learning_rate": 9.464630411362324e-05, "loss": 1.4329, "step": 3991 }, { "epoch": 0.81, "learning_rate": 9.445468266470875e-05, "loss": 1.4566, "step": 3992 }, { "epoch": 0.81, "learning_rate": 9.426323515588182e-05, "loss": 1.3901, "step": 3993 }, { "epoch": 0.81, "learning_rate": 9.407196166925486e-05, "loss": 1.4352, "step": 3994 }, { "epoch": 0.81, "learning_rate": 9.388086228686604e-05, "loss": 1.4884, "step": 3995 }, { "epoch": 0.81, "learning_rate": 9.368993709067858e-05, "loss": 1.4191, "step": 3996 }, { "epoch": 0.81, "learning_rate": 9.349918616258113e-05, "loss": 1.4145, "step": 3997 }, { "epoch": 0.81, "learning_rate": 9.330860958438764e-05, "loss": 1.4414, "step": 3998 }, { "epoch": 0.81, "learning_rate": 9.311820743783728e-05, "loss": 1.4167, "step": 3999 }, { "epoch": 0.81, "learning_rate": 9.292797980459405e-05, "loss": 1.4901, "step": 4000 }, { "epoch": 0.81, "learning_rate": 9.273792676624749e-05, "loss": 1.4507, "step": 4001 }, { "epoch": 0.81, "learning_rate": 9.254804840431225e-05, "loss": 1.4239, "step": 4002 }, { "epoch": 0.81, "learning_rate": 9.235834480022786e-05, "loss": 1.417, "step": 4003 }, { "epoch": 0.81, "learning_rate": 9.216881603535899e-05, "loss": 1.4233, "step": 4004 }, { "epoch": 0.81, "learning_rate": 9.197946219099551e-05, "loss": 1.4255, "step": 4005 }, { "epoch": 0.81, "learning_rate": 9.179028334835182e-05, "loss": 1.3937, "step": 4006 }, { "epoch": 0.81, "learning_rate": 9.160127958856757e-05, "loss": 1.4372, "step": 4007 }, { "epoch": 0.81, "learning_rate": 9.141245099270734e-05, "loss": 1.4637, "step": 4008 }, { "epoch": 0.81, "learning_rate": 9.122379764176048e-05, "loss": 1.438, "step": 4009 }, { "epoch": 0.81, "learning_rate": 9.103531961664119e-05, "loss": 1.404, "step": 4010 }, { "epoch": 0.81, "learning_rate": 9.084701699818853e-05, "loss": 1.4208, "step": 4011 }, { "epoch": 0.81, "learning_rate": 9.065888986716625e-05, "loss": 1.471, "step": 4012 }, { "epoch": 0.81, "learning_rate": 9.047093830426284e-05, "loss": 1.4568, "step": 4013 }, { "epoch": 0.81, "learning_rate": 9.028316239009154e-05, "loss": 1.4627, "step": 4014 }, { "epoch": 0.81, "learning_rate": 9.009556220519028e-05, "loss": 1.424, "step": 4015 }, { "epoch": 0.81, "learning_rate": 8.990813783002149e-05, "loss": 1.4118, "step": 4016 }, { "epoch": 0.81, "learning_rate": 8.972088934497241e-05, "loss": 1.4154, "step": 4017 }, { "epoch": 0.81, "learning_rate": 8.953381683035444e-05, "loss": 1.4384, "step": 4018 }, { "epoch": 0.81, "learning_rate": 8.934692036640396e-05, "loss": 1.4516, "step": 4019 }, { "epoch": 0.81, "learning_rate": 8.916020003328157e-05, "loss": 1.4753, "step": 4020 }, { "epoch": 0.81, "learning_rate": 8.897365591107242e-05, "loss": 1.4286, "step": 4021 }, { "epoch": 0.81, "learning_rate": 8.878728807978614e-05, "loss": 1.4325, "step": 4022 }, { "epoch": 0.81, "learning_rate": 8.860109661935672e-05, "loss": 1.4352, "step": 4023 }, { "epoch": 0.81, "learning_rate": 8.841508160964218e-05, "loss": 1.431, "step": 4024 }, { "epoch": 0.81, "learning_rate": 8.822924313042535e-05, "loss": 1.4051, "step": 4025 }, { "epoch": 0.81, "learning_rate": 8.80435812614131e-05, "loss": 1.3911, "step": 4026 }, { "epoch": 0.81, "learning_rate": 8.785809608223655e-05, "loss": 1.4109, "step": 4027 }, { "epoch": 0.81, "learning_rate": 8.767278767245113e-05, "loss": 1.4603, "step": 4028 }, { "epoch": 0.81, "learning_rate": 8.748765611153648e-05, "loss": 1.4649, "step": 4029 }, { "epoch": 0.81, "learning_rate": 8.730270147889602e-05, "loss": 1.5298, "step": 4030 }, { "epoch": 0.81, "learning_rate": 8.711792385385781e-05, "loss": 1.4604, "step": 4031 }, { "epoch": 0.82, "learning_rate": 8.693332331567361e-05, "loss": 1.4295, "step": 4032 }, { "epoch": 0.82, "learning_rate": 8.674889994351942e-05, "loss": 1.4284, "step": 4033 }, { "epoch": 0.82, "learning_rate": 8.656465381649525e-05, "loss": 1.4414, "step": 4034 }, { "epoch": 0.82, "learning_rate": 8.638058501362512e-05, "loss": 1.464, "step": 4035 }, { "epoch": 0.82, "learning_rate": 8.619669361385663e-05, "loss": 1.4303, "step": 4036 }, { "epoch": 0.82, "learning_rate": 8.60129796960617e-05, "loss": 1.4259, "step": 4037 }, { "epoch": 0.82, "learning_rate": 8.582944333903609e-05, "loss": 1.4115, "step": 4038 }, { "epoch": 0.82, "learning_rate": 8.564608462149919e-05, "loss": 1.4456, "step": 4039 }, { "epoch": 0.82, "learning_rate": 8.546290362209442e-05, "loss": 1.4472, "step": 4040 }, { "epoch": 0.82, "learning_rate": 8.527990041938882e-05, "loss": 1.4408, "step": 4041 }, { "epoch": 0.82, "learning_rate": 8.509707509187331e-05, "loss": 1.4547, "step": 4042 }, { "epoch": 0.82, "learning_rate": 8.491442771796232e-05, "loss": 1.4222, "step": 4043 }, { "epoch": 0.82, "learning_rate": 8.473195837599418e-05, "loss": 1.4477, "step": 4044 }, { "epoch": 0.82, "learning_rate": 8.454966714423068e-05, "loss": 1.4024, "step": 4045 }, { "epoch": 0.82, "learning_rate": 8.436755410085733e-05, "loss": 1.4631, "step": 4046 }, { "epoch": 0.82, "learning_rate": 8.418561932398327e-05, "loss": 1.4172, "step": 4047 }, { "epoch": 0.82, "learning_rate": 8.400386289164091e-05, "loss": 1.4272, "step": 4048 }, { "epoch": 0.82, "learning_rate": 8.38222848817864e-05, "loss": 1.4441, "step": 4049 }, { "epoch": 0.82, "learning_rate": 8.364088537229935e-05, "loss": 1.4405, "step": 4050 }, { "epoch": 0.82, "learning_rate": 8.34596644409828e-05, "loss": 1.4446, "step": 4051 }, { "epoch": 0.82, "learning_rate": 8.327862216556309e-05, "loss": 1.4308, "step": 4052 }, { "epoch": 0.82, "learning_rate": 8.309775862369007e-05, "loss": 1.4363, "step": 4053 }, { "epoch": 0.82, "learning_rate": 8.291707389293695e-05, "loss": 1.424, "step": 4054 }, { "epoch": 0.82, "learning_rate": 8.273656805079994e-05, "loss": 1.4325, "step": 4055 }, { "epoch": 0.82, "learning_rate": 8.25562411746989e-05, "loss": 1.4294, "step": 4056 }, { "epoch": 0.82, "learning_rate": 8.237609334197677e-05, "loss": 1.4379, "step": 4057 }, { "epoch": 0.82, "learning_rate": 8.219612462989967e-05, "loss": 1.3998, "step": 4058 }, { "epoch": 0.82, "learning_rate": 8.201633511565703e-05, "loss": 1.404, "step": 4059 }, { "epoch": 0.82, "learning_rate": 8.183672487636135e-05, "loss": 1.413, "step": 4060 }, { "epoch": 0.82, "learning_rate": 8.165729398904803e-05, "loss": 1.4183, "step": 4061 }, { "epoch": 0.82, "learning_rate": 8.147804253067581e-05, "loss": 1.4406, "step": 4062 }, { "epoch": 0.82, "learning_rate": 8.129897057812641e-05, "loss": 1.4234, "step": 4063 }, { "epoch": 0.82, "learning_rate": 8.112007820820461e-05, "loss": 1.4525, "step": 4064 }, { "epoch": 0.82, "learning_rate": 8.094136549763797e-05, "loss": 1.4527, "step": 4065 }, { "epoch": 0.82, "learning_rate": 8.076283252307732e-05, "loss": 1.4622, "step": 4066 }, { "epoch": 0.82, "learning_rate": 8.058447936109597e-05, "loss": 1.4654, "step": 4067 }, { "epoch": 0.82, "learning_rate": 8.040630608819039e-05, "loss": 1.4634, "step": 4068 }, { "epoch": 0.82, "learning_rate": 8.022831278077996e-05, "loss": 1.4376, "step": 4069 }, { "epoch": 0.82, "learning_rate": 8.005049951520666e-05, "loss": 1.4488, "step": 4070 }, { "epoch": 0.82, "learning_rate": 7.987286636773533e-05, "loss": 1.394, "step": 4071 }, { "epoch": 0.82, "learning_rate": 7.969541341455366e-05, "loss": 1.4781, "step": 4072 }, { "epoch": 0.82, "learning_rate": 7.951814073177188e-05, "loss": 1.4564, "step": 4073 }, { "epoch": 0.82, "learning_rate": 7.934104839542306e-05, "loss": 1.4335, "step": 4074 }, { "epoch": 0.82, "learning_rate": 7.916413648146281e-05, "loss": 1.4186, "step": 4075 }, { "epoch": 0.82, "learning_rate": 7.898740506576935e-05, "loss": 1.4472, "step": 4076 }, { "epoch": 0.82, "learning_rate": 7.881085422414364e-05, "loss": 1.4796, "step": 4077 }, { "epoch": 0.82, "learning_rate": 7.863448403230905e-05, "loss": 1.4125, "step": 4078 }, { "epoch": 0.82, "learning_rate": 7.845829456591142e-05, "loss": 1.4499, "step": 4079 }, { "epoch": 0.82, "learning_rate": 7.828228590051911e-05, "loss": 1.4397, "step": 4080 }, { "epoch": 0.83, "learning_rate": 7.810645811162309e-05, "loss": 1.4332, "step": 4081 }, { "epoch": 0.83, "learning_rate": 7.793081127463658e-05, "loss": 1.451, "step": 4082 }, { "epoch": 0.83, "learning_rate": 7.775534546489526e-05, "loss": 1.4427, "step": 4083 }, { "epoch": 0.83, "learning_rate": 7.758006075765722e-05, "loss": 1.4597, "step": 4084 }, { "epoch": 0.83, "learning_rate": 7.74049572281027e-05, "loss": 1.4392, "step": 4085 }, { "epoch": 0.83, "learning_rate": 7.72300349513343e-05, "loss": 1.3895, "step": 4086 }, { "epoch": 0.83, "learning_rate": 7.705529400237704e-05, "loss": 1.4314, "step": 4087 }, { "epoch": 0.83, "learning_rate": 7.688073445617799e-05, "loss": 1.4516, "step": 4088 }, { "epoch": 0.83, "learning_rate": 7.670635638760649e-05, "loss": 1.4711, "step": 4089 }, { "epoch": 0.83, "learning_rate": 7.653215987145412e-05, "loss": 1.4559, "step": 4090 }, { "epoch": 0.83, "learning_rate": 7.635814498243426e-05, "loss": 1.4688, "step": 4091 }, { "epoch": 0.83, "learning_rate": 7.618431179518276e-05, "loss": 1.4366, "step": 4092 }, { "epoch": 0.83, "learning_rate": 7.601066038425741e-05, "loss": 1.4373, "step": 4093 }, { "epoch": 0.83, "learning_rate": 7.583719082413798e-05, "loss": 1.4407, "step": 4094 }, { "epoch": 0.83, "learning_rate": 7.56639031892264e-05, "loss": 1.4349, "step": 4095 }, { "epoch": 0.83, "learning_rate": 7.549079755384646e-05, "loss": 1.4372, "step": 4096 }, { "epoch": 0.83, "learning_rate": 7.531787399224371e-05, "loss": 1.4249, "step": 4097 }, { "epoch": 0.83, "learning_rate": 7.514513257858597e-05, "loss": 1.4452, "step": 4098 }, { "epoch": 0.83, "learning_rate": 7.497257338696261e-05, "loss": 1.4329, "step": 4099 }, { "epoch": 0.83, "learning_rate": 7.480019649138514e-05, "loss": 1.4413, "step": 4100 }, { "epoch": 0.83, "learning_rate": 7.462800196578661e-05, "loss": 1.3914, "step": 4101 }, { "epoch": 0.83, "learning_rate": 7.445598988402207e-05, "loss": 1.4027, "step": 4102 }, { "epoch": 0.83, "learning_rate": 7.428416031986818e-05, "loss": 1.4151, "step": 4103 }, { "epoch": 0.83, "learning_rate": 7.411251334702335e-05, "loss": 1.4262, "step": 4104 }, { "epoch": 0.83, "learning_rate": 7.39410490391077e-05, "loss": 1.4473, "step": 4105 }, { "epoch": 0.83, "learning_rate": 7.376976746966302e-05, "loss": 1.4476, "step": 4106 }, { "epoch": 0.83, "learning_rate": 7.359866871215265e-05, "loss": 1.4322, "step": 4107 }, { "epoch": 0.83, "learning_rate": 7.342775283996173e-05, "loss": 1.4617, "step": 4108 }, { "epoch": 0.83, "learning_rate": 7.325701992639649e-05, "loss": 1.4074, "step": 4109 }, { "epoch": 0.83, "learning_rate": 7.308647004468517e-05, "loss": 1.3892, "step": 4110 }, { "epoch": 0.83, "learning_rate": 7.291610326797732e-05, "loss": 1.4438, "step": 4111 }, { "epoch": 0.83, "learning_rate": 7.27459196693439e-05, "loss": 1.4247, "step": 4112 }, { "epoch": 0.83, "learning_rate": 7.257591932177748e-05, "loss": 1.4179, "step": 4113 }, { "epoch": 0.83, "learning_rate": 7.240610229819194e-05, "loss": 1.4446, "step": 4114 }, { "epoch": 0.83, "learning_rate": 7.223646867142237e-05, "loss": 1.3994, "step": 4115 }, { "epoch": 0.83, "learning_rate": 7.206701851422536e-05, "loss": 1.4472, "step": 4116 }, { "epoch": 0.83, "learning_rate": 7.189775189927889e-05, "loss": 1.4091, "step": 4117 }, { "epoch": 0.83, "learning_rate": 7.172866889918206e-05, "loss": 1.4517, "step": 4118 }, { "epoch": 0.83, "learning_rate": 7.155976958645532e-05, "loss": 1.4272, "step": 4119 }, { "epoch": 0.83, "learning_rate": 7.139105403354024e-05, "loss": 1.4057, "step": 4120 }, { "epoch": 0.83, "learning_rate": 7.122252231279974e-05, "loss": 1.4656, "step": 4121 }, { "epoch": 0.83, "learning_rate": 7.105417449651763e-05, "loss": 1.4115, "step": 4122 }, { "epoch": 0.83, "learning_rate": 7.08860106568991e-05, "loss": 1.4172, "step": 4123 }, { "epoch": 0.83, "learning_rate": 7.071803086607021e-05, "loss": 1.4433, "step": 4124 }, { "epoch": 0.83, "learning_rate": 7.055023519607828e-05, "loss": 1.3936, "step": 4125 }, { "epoch": 0.83, "learning_rate": 7.038262371889159e-05, "loss": 1.4433, "step": 4126 }, { "epoch": 0.83, "learning_rate": 7.021519650639951e-05, "loss": 1.4059, "step": 4127 }, { "epoch": 0.83, "learning_rate": 7.004795363041205e-05, "loss": 1.4148, "step": 4128 }, { "epoch": 0.83, "learning_rate": 6.98808951626605e-05, "loss": 1.4351, "step": 4129 }, { "epoch": 0.83, "learning_rate": 6.971402117479686e-05, "loss": 1.4022, "step": 4130 }, { "epoch": 0.84, "learning_rate": 6.954733173839416e-05, "loss": 1.431, "step": 4131 }, { "epoch": 0.84, "learning_rate": 6.938082692494618e-05, "loss": 1.4488, "step": 4132 }, { "epoch": 0.84, "learning_rate": 6.921450680586756e-05, "loss": 1.4458, "step": 4133 }, { "epoch": 0.84, "learning_rate": 6.904837145249365e-05, "loss": 1.4351, "step": 4134 }, { "epoch": 0.84, "learning_rate": 6.888242093608066e-05, "loss": 1.4122, "step": 4135 }, { "epoch": 0.84, "learning_rate": 6.871665532780535e-05, "loss": 1.4111, "step": 4136 }, { "epoch": 0.84, "learning_rate": 6.855107469876543e-05, "loss": 1.427, "step": 4137 }, { "epoch": 0.84, "learning_rate": 6.8385679119979e-05, "loss": 1.4186, "step": 4138 }, { "epoch": 0.84, "learning_rate": 6.822046866238513e-05, "loss": 1.4428, "step": 4139 }, { "epoch": 0.84, "learning_rate": 6.805544339684295e-05, "loss": 1.4905, "step": 4140 }, { "epoch": 0.84, "learning_rate": 6.789060339413272e-05, "loss": 1.4049, "step": 4141 }, { "epoch": 0.84, "learning_rate": 6.772594872495491e-05, "loss": 1.4238, "step": 4142 }, { "epoch": 0.84, "learning_rate": 6.756147945993063e-05, "loss": 1.4159, "step": 4143 }, { "epoch": 0.84, "learning_rate": 6.739719566960146e-05, "loss": 1.4348, "step": 4144 }, { "epoch": 0.84, "learning_rate": 6.723309742442951e-05, "loss": 1.4437, "step": 4145 }, { "epoch": 0.84, "learning_rate": 6.706918479479696e-05, "loss": 1.4559, "step": 4146 }, { "epoch": 0.84, "learning_rate": 6.690545785100671e-05, "loss": 1.432, "step": 4147 }, { "epoch": 0.84, "learning_rate": 6.6741916663282e-05, "loss": 1.4856, "step": 4148 }, { "epoch": 0.84, "learning_rate": 6.657856130176632e-05, "loss": 1.377, "step": 4149 }, { "epoch": 0.84, "learning_rate": 6.641539183652345e-05, "loss": 1.3983, "step": 4150 }, { "epoch": 0.84, "learning_rate": 6.625240833753755e-05, "loss": 1.4344, "step": 4151 }, { "epoch": 0.84, "learning_rate": 6.608961087471276e-05, "loss": 1.4498, "step": 4152 }, { "epoch": 0.84, "learning_rate": 6.592699951787362e-05, "loss": 1.4274, "step": 4153 }, { "epoch": 0.84, "learning_rate": 6.576457433676492e-05, "loss": 1.4574, "step": 4154 }, { "epoch": 0.84, "learning_rate": 6.560233540105143e-05, "loss": 1.4231, "step": 4155 }, { "epoch": 0.84, "learning_rate": 6.544028278031816e-05, "loss": 1.4167, "step": 4156 }, { "epoch": 0.84, "learning_rate": 6.527841654407024e-05, "loss": 1.4387, "step": 4157 }, { "epoch": 0.84, "learning_rate": 6.511673676173252e-05, "loss": 1.4534, "step": 4158 }, { "epoch": 0.84, "learning_rate": 6.495524350265037e-05, "loss": 1.3765, "step": 4159 }, { "epoch": 0.84, "learning_rate": 6.479393683608875e-05, "loss": 1.44, "step": 4160 }, { "epoch": 0.84, "learning_rate": 6.46328168312329e-05, "loss": 1.4189, "step": 4161 }, { "epoch": 0.84, "learning_rate": 6.447188355718786e-05, "loss": 1.427, "step": 4162 }, { "epoch": 0.84, "learning_rate": 6.431113708297853e-05, "loss": 1.469, "step": 4163 }, { "epoch": 0.84, "learning_rate": 6.415057747754976e-05, "loss": 1.4044, "step": 4164 }, { "epoch": 0.84, "learning_rate": 6.399020480976625e-05, "loss": 1.4441, "step": 4165 }, { "epoch": 0.84, "learning_rate": 6.383001914841253e-05, "loss": 1.4221, "step": 4166 }, { "epoch": 0.84, "learning_rate": 6.367002056219284e-05, "loss": 1.4718, "step": 4167 }, { "epoch": 0.84, "learning_rate": 6.351020911973132e-05, "loss": 1.4238, "step": 4168 }, { "epoch": 0.84, "learning_rate": 6.335058488957179e-05, "loss": 1.4192, "step": 4169 }, { "epoch": 0.84, "learning_rate": 6.31911479401776e-05, "loss": 1.4311, "step": 4170 }, { "epoch": 0.84, "learning_rate": 6.303189833993207e-05, "loss": 1.4486, "step": 4171 }, { "epoch": 0.84, "learning_rate": 6.287283615713796e-05, "loss": 1.4684, "step": 4172 }, { "epoch": 0.84, "learning_rate": 6.271396146001767e-05, "loss": 1.4675, "step": 4173 }, { "epoch": 0.84, "learning_rate": 6.255527431671332e-05, "loss": 1.4408, "step": 4174 }, { "epoch": 0.84, "learning_rate": 6.23967747952865e-05, "loss": 1.4424, "step": 4175 }, { "epoch": 0.84, "learning_rate": 6.223846296371821e-05, "loss": 1.397, "step": 4176 }, { "epoch": 0.84, "learning_rate": 6.208033888990911e-05, "loss": 1.4497, "step": 4177 }, { "epoch": 0.84, "learning_rate": 6.192240264167926e-05, "loss": 1.465, "step": 4178 }, { "epoch": 0.84, "learning_rate": 6.17646542867682e-05, "loss": 1.4151, "step": 4179 }, { "epoch": 0.85, "learning_rate": 6.160709389283481e-05, "loss": 1.4274, "step": 4180 }, { "epoch": 0.85, "learning_rate": 6.144972152745753e-05, "loss": 1.4181, "step": 4181 }, { "epoch": 0.85, "learning_rate": 6.129253725813405e-05, "loss": 1.4033, "step": 4182 }, { "epoch": 0.85, "learning_rate": 6.113554115228115e-05, "loss": 1.4392, "step": 4183 }, { "epoch": 0.85, "learning_rate": 6.097873327723519e-05, "loss": 1.4377, "step": 4184 }, { "epoch": 0.85, "learning_rate": 6.082211370025181e-05, "loss": 1.4309, "step": 4185 }, { "epoch": 0.85, "learning_rate": 6.0665682488505735e-05, "loss": 1.4229, "step": 4186 }, { "epoch": 0.85, "learning_rate": 6.050943970909101e-05, "loss": 1.4049, "step": 4187 }, { "epoch": 0.85, "learning_rate": 6.035338542902091e-05, "loss": 1.4862, "step": 4188 }, { "epoch": 0.85, "learning_rate": 6.019751971522758e-05, "loss": 1.4421, "step": 4189 }, { "epoch": 0.85, "learning_rate": 6.0041842634562525e-05, "loss": 1.4788, "step": 4190 }, { "epoch": 0.85, "learning_rate": 5.988635425379635e-05, "loss": 1.4433, "step": 4191 }, { "epoch": 0.85, "learning_rate": 5.973105463961864e-05, "loss": 1.3871, "step": 4192 }, { "epoch": 0.85, "learning_rate": 5.957594385863807e-05, "loss": 1.4409, "step": 4193 }, { "epoch": 0.85, "learning_rate": 5.942102197738231e-05, "loss": 1.4481, "step": 4194 }, { "epoch": 0.85, "learning_rate": 5.926628906229803e-05, "loss": 1.424, "step": 4195 }, { "epoch": 0.85, "learning_rate": 5.911174517975077e-05, "loss": 1.4099, "step": 4196 }, { "epoch": 0.85, "learning_rate": 5.895739039602505e-05, "loss": 1.3983, "step": 4197 }, { "epoch": 0.85, "learning_rate": 5.88032247773243e-05, "loss": 1.4146, "step": 4198 }, { "epoch": 0.85, "learning_rate": 5.864924838977087e-05, "loss": 1.4942, "step": 4199 }, { "epoch": 0.85, "learning_rate": 5.849546129940586e-05, "loss": 1.4103, "step": 4200 }, { "epoch": 0.85, "learning_rate": 5.834186357218907e-05, "loss": 1.4477, "step": 4201 }, { "epoch": 0.85, "learning_rate": 5.81884552739993e-05, "loss": 1.4285, "step": 4202 }, { "epoch": 0.85, "learning_rate": 5.8035236470634024e-05, "loss": 1.4301, "step": 4203 }, { "epoch": 0.85, "learning_rate": 5.788220722780935e-05, "loss": 1.4128, "step": 4204 }, { "epoch": 0.85, "learning_rate": 5.7729367611160265e-05, "loss": 1.4339, "step": 4205 }, { "epoch": 0.85, "learning_rate": 5.75767176862404e-05, "loss": 1.4593, "step": 4206 }, { "epoch": 0.85, "learning_rate": 5.74242575185217e-05, "loss": 1.4074, "step": 4207 }, { "epoch": 0.85, "learning_rate": 5.72719871733951e-05, "loss": 1.3807, "step": 4208 }, { "epoch": 0.85, "learning_rate": 5.711990671616995e-05, "loss": 1.4795, "step": 4209 }, { "epoch": 0.85, "learning_rate": 5.6968016212074313e-05, "loss": 1.4508, "step": 4210 }, { "epoch": 0.85, "learning_rate": 5.681631572625451e-05, "loss": 1.4269, "step": 4211 }, { "epoch": 0.85, "learning_rate": 5.6664805323775715e-05, "loss": 1.4353, "step": 4212 }, { "epoch": 0.85, "learning_rate": 5.6513485069621174e-05, "loss": 1.4758, "step": 4213 }, { "epoch": 0.85, "learning_rate": 5.636235502869286e-05, "loss": 1.4338, "step": 4214 }, { "epoch": 0.85, "learning_rate": 5.621141526581103e-05, "loss": 1.3977, "step": 4215 }, { "epoch": 0.85, "learning_rate": 5.606066584571446e-05, "loss": 1.4438, "step": 4216 }, { "epoch": 0.85, "learning_rate": 5.591010683306014e-05, "loss": 1.4052, "step": 4217 }, { "epoch": 0.85, "learning_rate": 5.575973829242364e-05, "loss": 1.4153, "step": 4218 }, { "epoch": 0.85, "learning_rate": 5.560956028829839e-05, "loss": 1.4354, "step": 4219 }, { "epoch": 0.85, "learning_rate": 5.545957288509651e-05, "loss": 1.45, "step": 4220 }, { "epoch": 0.85, "learning_rate": 5.530977614714816e-05, "loss": 1.3694, "step": 4221 }, { "epoch": 0.85, "learning_rate": 5.5160170138701895e-05, "loss": 1.4587, "step": 4222 }, { "epoch": 0.85, "learning_rate": 5.5010754923924267e-05, "loss": 1.4294, "step": 4223 }, { "epoch": 0.85, "learning_rate": 5.486153056690013e-05, "loss": 1.42, "step": 4224 }, { "epoch": 0.85, "learning_rate": 5.4712497131632396e-05, "loss": 1.4631, "step": 4225 }, { "epoch": 0.85, "learning_rate": 5.456365468204222e-05, "loss": 1.4358, "step": 4226 }, { "epoch": 0.85, "learning_rate": 5.441500328196869e-05, "loss": 1.403, "step": 4227 }, { "epoch": 0.85, "learning_rate": 5.4266542995168975e-05, "loss": 1.4032, "step": 4228 }, { "epoch": 0.85, "learning_rate": 5.411827388531837e-05, "loss": 1.4692, "step": 4229 }, { "epoch": 0.86, "learning_rate": 5.3970196016010274e-05, "loss": 1.472, "step": 4230 }, { "epoch": 0.86, "learning_rate": 5.3822309450755555e-05, "loss": 1.4856, "step": 4231 }, { "epoch": 0.86, "learning_rate": 5.367461425298359e-05, "loss": 1.445, "step": 4232 }, { "epoch": 0.86, "learning_rate": 5.3527110486041476e-05, "loss": 1.4685, "step": 4233 }, { "epoch": 0.86, "learning_rate": 5.337979821319416e-05, "loss": 1.4105, "step": 4234 }, { "epoch": 0.86, "learning_rate": 5.3232677497624504e-05, "loss": 1.3658, "step": 4235 }, { "epoch": 0.86, "learning_rate": 5.3085748402433244e-05, "loss": 1.4188, "step": 4236 }, { "epoch": 0.86, "learning_rate": 5.29390109906388e-05, "loss": 1.428, "step": 4237 }, { "epoch": 0.86, "learning_rate": 5.279246532517745e-05, "loss": 1.4565, "step": 4238 }, { "epoch": 0.86, "learning_rate": 5.264611146890336e-05, "loss": 1.4047, "step": 4239 }, { "epoch": 0.86, "learning_rate": 5.2499949484588196e-05, "loss": 1.4379, "step": 4240 }, { "epoch": 0.86, "learning_rate": 5.235397943492154e-05, "loss": 1.4063, "step": 4241 }, { "epoch": 0.86, "learning_rate": 5.220820138251053e-05, "loss": 1.4067, "step": 4242 }, { "epoch": 0.86, "learning_rate": 5.206261538987994e-05, "loss": 1.4418, "step": 4243 }, { "epoch": 0.86, "learning_rate": 5.191722151947226e-05, "loss": 1.4284, "step": 4244 }, { "epoch": 0.86, "learning_rate": 5.1772019833647486e-05, "loss": 1.4804, "step": 4245 }, { "epoch": 0.86, "learning_rate": 5.162701039468326e-05, "loss": 1.4453, "step": 4246 }, { "epoch": 0.86, "learning_rate": 5.1482193264774714e-05, "loss": 1.4568, "step": 4247 }, { "epoch": 0.86, "learning_rate": 5.133756850603461e-05, "loss": 1.431, "step": 4248 }, { "epoch": 0.86, "learning_rate": 5.119313618049309e-05, "loss": 1.4443, "step": 4249 }, { "epoch": 0.86, "learning_rate": 5.104889635009763e-05, "loss": 1.4206, "step": 4250 }, { "epoch": 0.86, "learning_rate": 5.0904849076713445e-05, "loss": 1.4903, "step": 4251 }, { "epoch": 0.86, "learning_rate": 5.076099442212301e-05, "loss": 1.4774, "step": 4252 }, { "epoch": 0.86, "learning_rate": 5.061733244802608e-05, "loss": 1.4589, "step": 4253 }, { "epoch": 0.86, "learning_rate": 5.0473863216040026e-05, "loss": 1.4303, "step": 4254 }, { "epoch": 0.86, "learning_rate": 5.03305867876993e-05, "loss": 1.4458, "step": 4255 }, { "epoch": 0.86, "learning_rate": 5.018750322445581e-05, "loss": 1.4651, "step": 4256 }, { "epoch": 0.86, "learning_rate": 5.004461258767873e-05, "loss": 1.4585, "step": 4257 }, { "epoch": 0.86, "learning_rate": 4.990191493865437e-05, "loss": 1.3859, "step": 4258 }, { "epoch": 0.86, "learning_rate": 4.975941033858639e-05, "loss": 1.4212, "step": 4259 }, { "epoch": 0.86, "learning_rate": 4.961709884859561e-05, "loss": 1.4265, "step": 4260 }, { "epoch": 0.86, "learning_rate": 4.94749805297201e-05, "loss": 1.3955, "step": 4261 }, { "epoch": 0.86, "learning_rate": 4.933305544291483e-05, "loss": 1.4303, "step": 4262 }, { "epoch": 0.86, "learning_rate": 4.919132364905221e-05, "loss": 1.4169, "step": 4263 }, { "epoch": 0.86, "learning_rate": 4.90497852089215e-05, "loss": 1.3815, "step": 4264 }, { "epoch": 0.86, "learning_rate": 4.890844018322921e-05, "loss": 1.4391, "step": 4265 }, { "epoch": 0.86, "learning_rate": 4.8767288632598774e-05, "loss": 1.4043, "step": 4266 }, { "epoch": 0.86, "learning_rate": 4.862633061757082e-05, "loss": 1.4403, "step": 4267 }, { "epoch": 0.86, "learning_rate": 4.848556619860262e-05, "loss": 1.4077, "step": 4268 }, { "epoch": 0.86, "learning_rate": 4.834499543606869e-05, "loss": 1.4534, "step": 4269 }, { "epoch": 0.86, "learning_rate": 4.820461839026047e-05, "loss": 1.4243, "step": 4270 }, { "epoch": 0.86, "learning_rate": 4.8064435121386194e-05, "loss": 1.4496, "step": 4271 }, { "epoch": 0.86, "learning_rate": 4.79244456895711e-05, "loss": 1.4715, "step": 4272 }, { "epoch": 0.86, "learning_rate": 4.7784650154857335e-05, "loss": 1.4033, "step": 4273 }, { "epoch": 0.86, "learning_rate": 4.764504857720364e-05, "loss": 1.4372, "step": 4274 }, { "epoch": 0.86, "learning_rate": 4.750564101648575e-05, "loss": 1.4532, "step": 4275 }, { "epoch": 0.86, "learning_rate": 4.736642753249615e-05, "loss": 1.4294, "step": 4276 }, { "epoch": 0.86, "learning_rate": 4.7227408184944145e-05, "loss": 1.4516, "step": 4277 }, { "epoch": 0.86, "learning_rate": 4.70885830334557e-05, "loss": 1.4176, "step": 4278 }, { "epoch": 0.87, "learning_rate": 4.6949952137573524e-05, "loss": 1.4381, "step": 4279 }, { "epoch": 0.87, "learning_rate": 4.681151555675689e-05, "loss": 1.4514, "step": 4280 }, { "epoch": 0.87, "learning_rate": 4.6673273350381906e-05, "loss": 1.4307, "step": 4281 }, { "epoch": 0.87, "learning_rate": 4.65352255777412e-05, "loss": 1.4392, "step": 4282 }, { "epoch": 0.87, "learning_rate": 4.639737229804403e-05, "loss": 1.4111, "step": 4283 }, { "epoch": 0.87, "learning_rate": 4.625971357041631e-05, "loss": 1.4147, "step": 4284 }, { "epoch": 0.87, "learning_rate": 4.6122249453900424e-05, "loss": 1.3992, "step": 4285 }, { "epoch": 0.87, "learning_rate": 4.5984980007455265e-05, "loss": 1.4063, "step": 4286 }, { "epoch": 0.87, "learning_rate": 4.5847905289956304e-05, "loss": 1.4473, "step": 4287 }, { "epoch": 0.87, "learning_rate": 4.57110253601955e-05, "loss": 1.4067, "step": 4288 }, { "epoch": 0.87, "learning_rate": 4.557434027688118e-05, "loss": 1.4579, "step": 4289 }, { "epoch": 0.87, "learning_rate": 4.54378500986381e-05, "loss": 1.4519, "step": 4290 }, { "epoch": 0.87, "learning_rate": 4.5301554884007725e-05, "loss": 1.4109, "step": 4291 }, { "epoch": 0.87, "learning_rate": 4.516545469144728e-05, "loss": 1.4052, "step": 4292 }, { "epoch": 0.87, "learning_rate": 4.502954957933092e-05, "loss": 1.4168, "step": 4293 }, { "epoch": 0.87, "learning_rate": 4.489383960594884e-05, "loss": 1.4302, "step": 4294 }, { "epoch": 0.87, "learning_rate": 4.4758324829507646e-05, "loss": 1.4252, "step": 4295 }, { "epoch": 0.87, "learning_rate": 4.462300530813024e-05, "loss": 1.427, "step": 4296 }, { "epoch": 0.87, "learning_rate": 4.44878810998558e-05, "loss": 1.4268, "step": 4297 }, { "epoch": 0.87, "learning_rate": 4.4352952262639414e-05, "loss": 1.4355, "step": 4298 }, { "epoch": 0.87, "learning_rate": 4.421821885435279e-05, "loss": 1.4393, "step": 4299 }, { "epoch": 0.87, "learning_rate": 4.408368093278364e-05, "loss": 1.4366, "step": 4300 }, { "epoch": 0.87, "learning_rate": 4.394933855563582e-05, "loss": 1.4571, "step": 4301 }, { "epoch": 0.87, "learning_rate": 4.38151917805294e-05, "loss": 1.442, "step": 4302 }, { "epoch": 0.87, "learning_rate": 4.368124066500051e-05, "loss": 1.4264, "step": 4303 }, { "epoch": 0.87, "learning_rate": 4.354748526650115e-05, "loss": 1.4271, "step": 4304 }, { "epoch": 0.87, "learning_rate": 4.3413925642399765e-05, "loss": 1.413, "step": 4305 }, { "epoch": 0.87, "learning_rate": 4.328056184998058e-05, "loss": 1.4638, "step": 4306 }, { "epoch": 0.87, "learning_rate": 4.3147393946443845e-05, "loss": 1.4306, "step": 4307 }, { "epoch": 0.87, "learning_rate": 4.3014421988905874e-05, "loss": 1.43, "step": 4308 }, { "epoch": 0.87, "learning_rate": 4.288164603439892e-05, "loss": 1.4142, "step": 4309 }, { "epoch": 0.87, "learning_rate": 4.274906613987123e-05, "loss": 1.3948, "step": 4310 }, { "epoch": 0.87, "learning_rate": 4.261668236218663e-05, "loss": 1.3979, "step": 4311 }, { "epoch": 0.87, "learning_rate": 4.248449475812521e-05, "loss": 1.4106, "step": 4312 }, { "epoch": 0.87, "learning_rate": 4.2352503384382846e-05, "loss": 1.4313, "step": 4313 }, { "epoch": 0.87, "learning_rate": 4.222070829757107e-05, "loss": 1.4281, "step": 4314 }, { "epoch": 0.87, "learning_rate": 4.208910955421747e-05, "loss": 1.3852, "step": 4315 }, { "epoch": 0.87, "learning_rate": 4.195770721076525e-05, "loss": 1.4586, "step": 4316 }, { "epoch": 0.87, "learning_rate": 4.182650132357341e-05, "loss": 1.4189, "step": 4317 }, { "epoch": 0.87, "learning_rate": 4.1695491948916695e-05, "loss": 1.4353, "step": 4318 }, { "epoch": 0.87, "learning_rate": 4.156467914298562e-05, "loss": 1.4551, "step": 4319 }, { "epoch": 0.87, "learning_rate": 4.1434062961886285e-05, "loss": 1.4616, "step": 4320 }, { "epoch": 0.87, "learning_rate": 4.130364346164056e-05, "loss": 1.4239, "step": 4321 }, { "epoch": 0.87, "learning_rate": 4.1173420698186025e-05, "loss": 1.4561, "step": 4322 }, { "epoch": 0.87, "learning_rate": 4.104339472737556e-05, "loss": 1.4426, "step": 4323 }, { "epoch": 0.87, "learning_rate": 4.091356560497794e-05, "loss": 1.4972, "step": 4324 }, { "epoch": 0.87, "learning_rate": 4.0783933386677464e-05, "loss": 1.4447, "step": 4325 }, { "epoch": 0.87, "learning_rate": 4.065449812807387e-05, "loss": 1.4427, "step": 4326 }, { "epoch": 0.87, "learning_rate": 4.0525259884682475e-05, "loss": 1.4502, "step": 4327 }, { "epoch": 0.87, "learning_rate": 4.0396218711934276e-05, "loss": 1.444, "step": 4328 }, { "epoch": 0.88, "learning_rate": 4.026737466517533e-05, "loss": 1.4053, "step": 4329 }, { "epoch": 0.88, "learning_rate": 4.0138727799667484e-05, "loss": 1.3965, "step": 4330 }, { "epoch": 0.88, "learning_rate": 4.001027817058789e-05, "loss": 1.436, "step": 4331 }, { "epoch": 0.88, "learning_rate": 3.988202583302925e-05, "loss": 1.4782, "step": 4332 }, { "epoch": 0.88, "learning_rate": 3.975397084199939e-05, "loss": 1.4381, "step": 4333 }, { "epoch": 0.88, "learning_rate": 3.962611325242177e-05, "loss": 1.4413, "step": 4334 }, { "epoch": 0.88, "learning_rate": 3.949845311913491e-05, "loss": 1.3994, "step": 4335 }, { "epoch": 0.88, "learning_rate": 3.9370990496892856e-05, "loss": 1.4294, "step": 4336 }, { "epoch": 0.88, "learning_rate": 3.9243725440364806e-05, "loss": 1.4042, "step": 4337 }, { "epoch": 0.88, "learning_rate": 3.911665800413533e-05, "loss": 1.415, "step": 4338 }, { "epoch": 0.88, "learning_rate": 3.898978824270416e-05, "loss": 1.4347, "step": 4339 }, { "epoch": 0.88, "learning_rate": 3.886311621048638e-05, "loss": 1.4532, "step": 4340 }, { "epoch": 0.88, "learning_rate": 3.8736641961811925e-05, "loss": 1.3926, "step": 4341 }, { "epoch": 0.88, "learning_rate": 3.8610365550926285e-05, "loss": 1.4512, "step": 4342 }, { "epoch": 0.88, "learning_rate": 3.8484287031989864e-05, "loss": 1.4433, "step": 4343 }, { "epoch": 0.88, "learning_rate": 3.83584064590784e-05, "loss": 1.4103, "step": 4344 }, { "epoch": 0.88, "learning_rate": 3.823272388618243e-05, "loss": 1.4546, "step": 4345 }, { "epoch": 0.88, "learning_rate": 3.810723936720789e-05, "loss": 1.4334, "step": 4346 }, { "epoch": 0.88, "learning_rate": 3.7981952955975505e-05, "loss": 1.4381, "step": 4347 }, { "epoch": 0.88, "learning_rate": 3.785686470622118e-05, "loss": 1.4238, "step": 4348 }, { "epoch": 0.88, "learning_rate": 3.7731974671595835e-05, "loss": 1.4107, "step": 4349 }, { "epoch": 0.88, "learning_rate": 3.760728290566523e-05, "loss": 1.4288, "step": 4350 }, { "epoch": 0.88, "learning_rate": 3.7482789461910305e-05, "loss": 1.3985, "step": 4351 }, { "epoch": 0.88, "learning_rate": 3.735849439372685e-05, "loss": 1.4649, "step": 4352 }, { "epoch": 0.88, "learning_rate": 3.723439775442533e-05, "loss": 1.4094, "step": 4353 }, { "epoch": 0.88, "learning_rate": 3.711049959723145e-05, "loss": 1.4202, "step": 4354 }, { "epoch": 0.88, "learning_rate": 3.698679997528559e-05, "loss": 1.435, "step": 4355 }, { "epoch": 0.88, "learning_rate": 3.6863298941643144e-05, "loss": 1.4142, "step": 4356 }, { "epoch": 0.88, "learning_rate": 3.673999654927413e-05, "loss": 1.4255, "step": 4357 }, { "epoch": 0.88, "learning_rate": 3.661689285106362e-05, "loss": 1.4257, "step": 4358 }, { "epoch": 0.88, "learning_rate": 3.6493987899811066e-05, "loss": 1.4358, "step": 4359 }, { "epoch": 0.88, "learning_rate": 3.637128174823101e-05, "loss": 1.4011, "step": 4360 }, { "epoch": 0.88, "learning_rate": 3.624877444895269e-05, "loss": 1.4342, "step": 4361 }, { "epoch": 0.88, "learning_rate": 3.612646605451997e-05, "loss": 1.4372, "step": 4362 }, { "epoch": 0.88, "learning_rate": 3.6004356617391375e-05, "loss": 1.4276, "step": 4363 }, { "epoch": 0.88, "learning_rate": 3.5882446189940355e-05, "loss": 1.4483, "step": 4364 }, { "epoch": 0.88, "learning_rate": 3.576073482445452e-05, "loss": 1.4263, "step": 4365 }, { "epoch": 0.88, "learning_rate": 3.5639222573136566e-05, "loss": 1.4113, "step": 4366 }, { "epoch": 0.88, "learning_rate": 3.5517909488103514e-05, "loss": 1.4949, "step": 4367 }, { "epoch": 0.88, "learning_rate": 3.5396795621387144e-05, "loss": 1.4565, "step": 4368 }, { "epoch": 0.88, "learning_rate": 3.5275881024933675e-05, "loss": 1.4057, "step": 4369 }, { "epoch": 0.88, "learning_rate": 3.51551657506039e-05, "loss": 1.4478, "step": 4370 }, { "epoch": 0.88, "learning_rate": 3.5034649850173074e-05, "loss": 1.4603, "step": 4371 }, { "epoch": 0.88, "learning_rate": 3.49143333753309e-05, "loss": 1.4174, "step": 4372 }, { "epoch": 0.88, "learning_rate": 3.479421637768176e-05, "loss": 1.3977, "step": 4373 }, { "epoch": 0.88, "learning_rate": 3.467429890874424e-05, "loss": 1.394, "step": 4374 }, { "epoch": 0.88, "learning_rate": 3.455458101995146e-05, "loss": 1.4374, "step": 4375 }, { "epoch": 0.88, "learning_rate": 3.443506276265096e-05, "loss": 1.452, "step": 4376 }, { "epoch": 0.88, "learning_rate": 3.4315744188104626e-05, "loss": 1.4342, "step": 4377 }, { "epoch": 0.89, "learning_rate": 3.419662534748863e-05, "loss": 1.4756, "step": 4378 }, { "epoch": 0.89, "learning_rate": 3.407770629189361e-05, "loss": 1.4209, "step": 4379 }, { "epoch": 0.89, "learning_rate": 3.395898707232442e-05, "loss": 1.428, "step": 4380 }, { "epoch": 0.89, "learning_rate": 3.384046773970023e-05, "loss": 1.4041, "step": 4381 }, { "epoch": 0.89, "learning_rate": 3.372214834485449e-05, "loss": 1.4345, "step": 4382 }, { "epoch": 0.89, "learning_rate": 3.360402893853498e-05, "loss": 1.4634, "step": 4383 }, { "epoch": 0.89, "learning_rate": 3.3486109571403414e-05, "loss": 1.4496, "step": 4384 }, { "epoch": 0.89, "learning_rate": 3.336839029403599e-05, "loss": 1.3995, "step": 4385 }, { "epoch": 0.89, "learning_rate": 3.325087115692299e-05, "loss": 1.4381, "step": 4386 }, { "epoch": 0.89, "learning_rate": 3.3133552210468876e-05, "loss": 1.4463, "step": 4387 }, { "epoch": 0.89, "learning_rate": 3.301643350499228e-05, "loss": 1.4154, "step": 4388 }, { "epoch": 0.89, "learning_rate": 3.2899515090725916e-05, "loss": 1.4526, "step": 4389 }, { "epoch": 0.89, "learning_rate": 3.2782797017816504e-05, "loss": 1.3987, "step": 4390 }, { "epoch": 0.89, "learning_rate": 3.266627933632488e-05, "loss": 1.3876, "step": 4391 }, { "epoch": 0.89, "learning_rate": 3.254996209622607e-05, "loss": 1.4174, "step": 4392 }, { "epoch": 0.89, "learning_rate": 3.2433845347408955e-05, "loss": 1.4418, "step": 4393 }, { "epoch": 0.89, "learning_rate": 3.231792913967657e-05, "loss": 1.4329, "step": 4394 }, { "epoch": 0.89, "learning_rate": 3.2202213522745825e-05, "loss": 1.4199, "step": 4395 }, { "epoch": 0.89, "learning_rate": 3.208669854624763e-05, "loss": 1.4002, "step": 4396 }, { "epoch": 0.89, "learning_rate": 3.197138425972679e-05, "loss": 1.4221, "step": 4397 }, { "epoch": 0.89, "learning_rate": 3.1856270712642185e-05, "loss": 1.3894, "step": 4398 }, { "epoch": 0.89, "learning_rate": 3.174135795436639e-05, "loss": 1.4278, "step": 4399 }, { "epoch": 0.89, "learning_rate": 3.162664603418608e-05, "loss": 1.4502, "step": 4400 }, { "epoch": 0.89, "learning_rate": 3.151213500130168e-05, "loss": 1.4273, "step": 4401 }, { "epoch": 0.89, "learning_rate": 3.1397824904827345e-05, "loss": 1.4289, "step": 4402 }, { "epoch": 0.89, "learning_rate": 3.128371579379119e-05, "loss": 1.4351, "step": 4403 }, { "epoch": 0.89, "learning_rate": 3.1169807717135126e-05, "loss": 1.4569, "step": 4404 }, { "epoch": 0.89, "learning_rate": 3.105610072371473e-05, "loss": 1.4412, "step": 4405 }, { "epoch": 0.89, "learning_rate": 3.094259486229956e-05, "loss": 1.4231, "step": 4406 }, { "epoch": 0.89, "learning_rate": 3.08292901815726e-05, "loss": 1.4445, "step": 4407 }, { "epoch": 0.89, "learning_rate": 3.071618673013077e-05, "loss": 1.4285, "step": 4408 }, { "epoch": 0.89, "learning_rate": 3.060328455648464e-05, "loss": 1.4287, "step": 4409 }, { "epoch": 0.89, "learning_rate": 3.0490583709058384e-05, "loss": 1.4029, "step": 4410 }, { "epoch": 0.89, "learning_rate": 3.037808423618993e-05, "loss": 1.4246, "step": 4411 }, { "epoch": 0.89, "learning_rate": 3.0265786186130627e-05, "loss": 1.4112, "step": 4412 }, { "epoch": 0.89, "learning_rate": 3.0153689607045842e-05, "loss": 1.4556, "step": 4413 }, { "epoch": 0.89, "learning_rate": 3.0041794547014e-05, "loss": 1.4332, "step": 4414 }, { "epoch": 0.89, "learning_rate": 2.9930101054027437e-05, "loss": 1.4284, "step": 4415 }, { "epoch": 0.89, "learning_rate": 2.981860917599205e-05, "loss": 1.4217, "step": 4416 }, { "epoch": 0.89, "learning_rate": 2.9707318960727038e-05, "loss": 1.428, "step": 4417 }, { "epoch": 0.89, "learning_rate": 2.959623045596538e-05, "loss": 1.4133, "step": 4418 }, { "epoch": 0.89, "learning_rate": 2.948534370935335e-05, "loss": 1.4541, "step": 4419 }, { "epoch": 0.89, "learning_rate": 2.937465876845069e-05, "loss": 1.4515, "step": 4420 }, { "epoch": 0.89, "learning_rate": 2.9264175680730698e-05, "loss": 1.4563, "step": 4421 }, { "epoch": 0.89, "learning_rate": 2.915389449357997e-05, "loss": 1.4824, "step": 4422 }, { "epoch": 0.89, "learning_rate": 2.9043815254298612e-05, "loss": 1.4509, "step": 4423 }, { "epoch": 0.89, "learning_rate": 2.8933938010100026e-05, "loss": 1.4154, "step": 4424 }, { "epoch": 0.89, "learning_rate": 2.8824262808111177e-05, "loss": 1.4609, "step": 4425 }, { "epoch": 0.89, "learning_rate": 2.8714789695372055e-05, "loss": 1.4009, "step": 4426 }, { "epoch": 0.89, "learning_rate": 2.8605518718836156e-05, "loss": 1.4342, "step": 4427 }, { "epoch": 0.9, "learning_rate": 2.849644992537026e-05, "loss": 1.44, "step": 4428 }, { "epoch": 0.9, "learning_rate": 2.838758336175451e-05, "loss": 1.4196, "step": 4429 }, { "epoch": 0.9, "learning_rate": 2.8278919074682163e-05, "loss": 1.433, "step": 4430 }, { "epoch": 0.9, "learning_rate": 2.8170457110759885e-05, "loss": 1.4495, "step": 4431 }, { "epoch": 0.9, "learning_rate": 2.8062197516507348e-05, "loss": 1.4258, "step": 4432 }, { "epoch": 0.9, "learning_rate": 2.7954140338357526e-05, "loss": 1.4115, "step": 4433 }, { "epoch": 0.9, "learning_rate": 2.7846285622656732e-05, "loss": 1.3877, "step": 4434 }, { "epoch": 0.9, "learning_rate": 2.7738633415664183e-05, "loss": 1.4368, "step": 4435 }, { "epoch": 0.9, "learning_rate": 2.7631183763552393e-05, "loss": 1.3988, "step": 4436 }, { "epoch": 0.9, "learning_rate": 2.7523936712406995e-05, "loss": 1.4177, "step": 4437 }, { "epoch": 0.9, "learning_rate": 2.7416892308226694e-05, "loss": 1.4197, "step": 4438 }, { "epoch": 0.9, "learning_rate": 2.731005059692332e-05, "loss": 1.4028, "step": 4439 }, { "epoch": 0.9, "learning_rate": 2.720341162432166e-05, "loss": 1.4147, "step": 4440 }, { "epoch": 0.9, "learning_rate": 2.7096975436159633e-05, "loss": 1.4172, "step": 4441 }, { "epoch": 0.9, "learning_rate": 2.699074207808816e-05, "loss": 1.4409, "step": 4442 }, { "epoch": 0.9, "learning_rate": 2.688471159567124e-05, "loss": 1.4301, "step": 4443 }, { "epoch": 0.9, "learning_rate": 2.677888403438583e-05, "loss": 1.4659, "step": 4444 }, { "epoch": 0.9, "learning_rate": 2.667325943962162e-05, "loss": 1.4557, "step": 4445 }, { "epoch": 0.9, "learning_rate": 2.656783785668154e-05, "loss": 1.4314, "step": 4446 }, { "epoch": 0.9, "learning_rate": 2.6462619330781424e-05, "loss": 1.459, "step": 4447 }, { "epoch": 0.9, "learning_rate": 2.635760390704983e-05, "loss": 1.4366, "step": 4448 }, { "epoch": 0.9, "learning_rate": 2.625279163052835e-05, "loss": 1.4191, "step": 4449 }, { "epoch": 0.9, "learning_rate": 2.614818254617152e-05, "loss": 1.4164, "step": 4450 }, { "epoch": 0.9, "learning_rate": 2.6043776698846445e-05, "loss": 1.4007, "step": 4451 }, { "epoch": 0.9, "learning_rate": 2.593957413333331e-05, "loss": 1.4725, "step": 4452 }, { "epoch": 0.9, "learning_rate": 2.5835574894324975e-05, "loss": 1.4025, "step": 4453 }, { "epoch": 0.9, "learning_rate": 2.573177902642726e-05, "loss": 1.414, "step": 4454 }, { "epoch": 0.9, "learning_rate": 2.562818657415855e-05, "loss": 1.4174, "step": 4455 }, { "epoch": 0.9, "learning_rate": 2.5524797581950143e-05, "loss": 1.4341, "step": 4456 }, { "epoch": 0.9, "learning_rate": 2.5421612094146008e-05, "loss": 1.4019, "step": 4457 }, { "epoch": 0.9, "learning_rate": 2.531863015500274e-05, "loss": 1.4126, "step": 4458 }, { "epoch": 0.9, "learning_rate": 2.5215851808689795e-05, "loss": 1.4257, "step": 4459 }, { "epoch": 0.9, "learning_rate": 2.511327709928929e-05, "loss": 1.4147, "step": 4460 }, { "epoch": 0.9, "learning_rate": 2.501090607079587e-05, "loss": 1.4224, "step": 4461 }, { "epoch": 0.9, "learning_rate": 2.490873876711697e-05, "loss": 1.4167, "step": 4462 }, { "epoch": 0.9, "learning_rate": 2.4806775232072532e-05, "loss": 1.3974, "step": 4463 }, { "epoch": 0.9, "learning_rate": 2.470501550939508e-05, "loss": 1.4425, "step": 4464 }, { "epoch": 0.9, "learning_rate": 2.4603459642729864e-05, "loss": 1.4074, "step": 4465 }, { "epoch": 0.9, "learning_rate": 2.45021076756346e-05, "loss": 1.4096, "step": 4466 }, { "epoch": 0.9, "learning_rate": 2.440095965157957e-05, "loss": 1.4436, "step": 4467 }, { "epoch": 0.9, "learning_rate": 2.4300015613947568e-05, "loss": 1.3753, "step": 4468 }, { "epoch": 0.9, "learning_rate": 2.4199275606033966e-05, "loss": 1.4018, "step": 4469 }, { "epoch": 0.9, "learning_rate": 2.409873967104659e-05, "loss": 1.4055, "step": 4470 }, { "epoch": 0.9, "learning_rate": 2.3998407852105607e-05, "loss": 1.3872, "step": 4471 }, { "epoch": 0.9, "learning_rate": 2.389828019224388e-05, "loss": 1.4131, "step": 4472 }, { "epoch": 0.9, "learning_rate": 2.3798356734406502e-05, "loss": 1.4614, "step": 4473 }, { "epoch": 0.9, "learning_rate": 2.3698637521451184e-05, "loss": 1.3908, "step": 4474 }, { "epoch": 0.9, "learning_rate": 2.359912259614777e-05, "loss": 1.4069, "step": 4475 }, { "epoch": 0.9, "learning_rate": 2.3499812001178678e-05, "loss": 1.4529, "step": 4476 }, { "epoch": 0.91, "learning_rate": 2.340070577913861e-05, "loss": 1.4355, "step": 4477 }, { "epoch": 0.91, "learning_rate": 2.330180397253473e-05, "loss": 1.4595, "step": 4478 }, { "epoch": 0.91, "learning_rate": 2.320310662378633e-05, "loss": 1.4071, "step": 4479 }, { "epoch": 0.91, "learning_rate": 2.310461377522527e-05, "loss": 1.412, "step": 4480 }, { "epoch": 0.91, "learning_rate": 2.300632546909537e-05, "loss": 1.4457, "step": 4481 }, { "epoch": 0.91, "learning_rate": 2.290824174755296e-05, "loss": 1.4364, "step": 4482 }, { "epoch": 0.91, "learning_rate": 2.281036265266656e-05, "loss": 1.4042, "step": 4483 }, { "epoch": 0.91, "learning_rate": 2.2712688226416976e-05, "loss": 1.384, "step": 4484 }, { "epoch": 0.91, "learning_rate": 2.2615218510697088e-05, "loss": 1.4303, "step": 4485 }, { "epoch": 0.91, "learning_rate": 2.251795354731223e-05, "loss": 1.4563, "step": 4486 }, { "epoch": 0.91, "learning_rate": 2.2420893377979533e-05, "loss": 1.5022, "step": 4487 }, { "epoch": 0.91, "learning_rate": 2.232403804432864e-05, "loss": 1.4051, "step": 4488 }, { "epoch": 0.91, "learning_rate": 2.2227387587901216e-05, "loss": 1.4082, "step": 4489 }, { "epoch": 0.91, "learning_rate": 2.213094205015098e-05, "loss": 1.4317, "step": 4490 }, { "epoch": 0.91, "learning_rate": 2.203470147244385e-05, "loss": 1.4312, "step": 4491 }, { "epoch": 0.91, "learning_rate": 2.1938665896057918e-05, "loss": 1.4507, "step": 4492 }, { "epoch": 0.91, "learning_rate": 2.1842835362183068e-05, "loss": 1.409, "step": 4493 }, { "epoch": 0.91, "learning_rate": 2.1747209911921528e-05, "loss": 1.4384, "step": 4494 }, { "epoch": 0.91, "learning_rate": 2.165178958628744e-05, "loss": 1.4372, "step": 4495 }, { "epoch": 0.91, "learning_rate": 2.1556574426206954e-05, "loss": 1.4338, "step": 4496 }, { "epoch": 0.91, "learning_rate": 2.146156447251829e-05, "loss": 1.4098, "step": 4497 }, { "epoch": 0.91, "learning_rate": 2.1366759765971577e-05, "loss": 1.3828, "step": 4498 }, { "epoch": 0.91, "learning_rate": 2.1272160347229007e-05, "loss": 1.4455, "step": 4499 }, { "epoch": 0.91, "learning_rate": 2.1177766256864628e-05, "loss": 1.4202, "step": 4500 }, { "epoch": 0.91, "learning_rate": 2.108357753536444e-05, "loss": 1.4457, "step": 4501 }, { "epoch": 0.91, "learning_rate": 2.098959422312641e-05, "loss": 1.4251, "step": 4502 }, { "epoch": 0.91, "learning_rate": 2.0895816360460395e-05, "loss": 1.4138, "step": 4503 }, { "epoch": 0.91, "learning_rate": 2.0802243987588066e-05, "loss": 1.4663, "step": 4504 }, { "epoch": 0.91, "learning_rate": 2.070887714464309e-05, "loss": 1.4336, "step": 4505 }, { "epoch": 0.91, "learning_rate": 2.061571587167077e-05, "loss": 1.4241, "step": 4506 }, { "epoch": 0.91, "learning_rate": 2.0522760208628423e-05, "loss": 1.416, "step": 4507 }, { "epoch": 0.91, "learning_rate": 2.043001019538515e-05, "loss": 1.405, "step": 4508 }, { "epoch": 0.91, "learning_rate": 2.033746587172186e-05, "loss": 1.3909, "step": 4509 }, { "epoch": 0.91, "learning_rate": 2.0245127277331076e-05, "loss": 1.4535, "step": 4510 }, { "epoch": 0.91, "learning_rate": 2.015299445181734e-05, "loss": 1.4721, "step": 4511 }, { "epoch": 0.91, "learning_rate": 2.0061067434696712e-05, "loss": 1.4123, "step": 4512 }, { "epoch": 0.91, "learning_rate": 1.9969346265397094e-05, "loss": 1.4163, "step": 4513 }, { "epoch": 0.91, "learning_rate": 1.9877830983258126e-05, "loss": 1.4716, "step": 4514 }, { "epoch": 0.91, "learning_rate": 1.978652162753103e-05, "loss": 1.4407, "step": 4515 }, { "epoch": 0.91, "learning_rate": 1.969541823737886e-05, "loss": 1.442, "step": 4516 }, { "epoch": 0.91, "learning_rate": 1.9604520851876197e-05, "loss": 1.4459, "step": 4517 }, { "epoch": 0.91, "learning_rate": 1.951382951000924e-05, "loss": 1.4537, "step": 4518 }, { "epoch": 0.91, "learning_rate": 1.942334425067599e-05, "loss": 1.4084, "step": 4519 }, { "epoch": 0.91, "learning_rate": 1.9333065112685845e-05, "loss": 1.43, "step": 4520 }, { "epoch": 0.91, "learning_rate": 1.9242992134760052e-05, "loss": 1.4296, "step": 4521 }, { "epoch": 0.91, "learning_rate": 1.9153125355531153e-05, "loss": 1.4317, "step": 4522 }, { "epoch": 0.91, "learning_rate": 1.906346481354354e-05, "loss": 1.4368, "step": 4523 }, { "epoch": 0.91, "learning_rate": 1.897401054725284e-05, "loss": 1.4023, "step": 4524 }, { "epoch": 0.91, "learning_rate": 1.888476259502636e-05, "loss": 1.4083, "step": 4525 }, { "epoch": 0.91, "learning_rate": 1.879572099514304e-05, "loss": 1.4871, "step": 4526 }, { "epoch": 0.92, "learning_rate": 1.870688578579316e-05, "loss": 1.4368, "step": 4527 }, { "epoch": 0.92, "learning_rate": 1.8618257005078464e-05, "loss": 1.4271, "step": 4528 }, { "epoch": 0.92, "learning_rate": 1.8529834691012216e-05, "loss": 1.421, "step": 4529 }, { "epoch": 0.92, "learning_rate": 1.8441618881519185e-05, "loss": 1.4173, "step": 4530 }, { "epoch": 0.92, "learning_rate": 1.835360961443544e-05, "loss": 1.4558, "step": 4531 }, { "epoch": 0.92, "learning_rate": 1.826580692750851e-05, "loss": 1.4294, "step": 4532 }, { "epoch": 0.92, "learning_rate": 1.817821085839738e-05, "loss": 1.4542, "step": 4533 }, { "epoch": 0.92, "learning_rate": 1.809082144467239e-05, "loss": 1.4172, "step": 4534 }, { "epoch": 0.92, "learning_rate": 1.800363872381522e-05, "loss": 1.4495, "step": 4535 }, { "epoch": 0.92, "learning_rate": 1.7916662733218848e-05, "loss": 1.4506, "step": 4536 }, { "epoch": 0.92, "learning_rate": 1.78298935101876e-05, "loss": 1.4172, "step": 4537 }, { "epoch": 0.92, "learning_rate": 1.774333109193732e-05, "loss": 1.4351, "step": 4538 }, { "epoch": 0.92, "learning_rate": 1.7656975515594863e-05, "loss": 1.442, "step": 4539 }, { "epoch": 0.92, "learning_rate": 1.7570826818198495e-05, "loss": 1.457, "step": 4540 }, { "epoch": 0.92, "learning_rate": 1.7484885036697928e-05, "loss": 1.4435, "step": 4541 }, { "epoch": 0.92, "learning_rate": 1.739915020795374e-05, "loss": 1.3879, "step": 4542 }, { "epoch": 0.92, "learning_rate": 1.7313622368738013e-05, "loss": 1.4396, "step": 4543 }, { "epoch": 0.92, "learning_rate": 1.7228301555734016e-05, "loss": 1.414, "step": 4544 }, { "epoch": 0.92, "learning_rate": 1.7143187805536254e-05, "loss": 1.4542, "step": 4545 }, { "epoch": 0.92, "learning_rate": 1.70582811546503e-05, "loss": 1.4001, "step": 4546 }, { "epoch": 0.92, "learning_rate": 1.6973581639493085e-05, "loss": 1.4422, "step": 4547 }, { "epoch": 0.92, "learning_rate": 1.6889089296392435e-05, "loss": 1.4456, "step": 4548 }, { "epoch": 0.92, "learning_rate": 1.6804804161587526e-05, "loss": 1.4334, "step": 4549 }, { "epoch": 0.92, "learning_rate": 1.6720726271228615e-05, "loss": 1.436, "step": 4550 }, { "epoch": 0.92, "learning_rate": 1.6636855661377013e-05, "loss": 1.3965, "step": 4551 }, { "epoch": 0.92, "learning_rate": 1.6553192368005286e-05, "loss": 1.4377, "step": 4552 }, { "epoch": 0.92, "learning_rate": 1.6469736426997008e-05, "loss": 1.4164, "step": 4553 }, { "epoch": 0.92, "learning_rate": 1.6386487874146537e-05, "loss": 1.4713, "step": 4554 }, { "epoch": 0.92, "learning_rate": 1.630344674515971e-05, "loss": 1.4471, "step": 4555 }, { "epoch": 0.92, "learning_rate": 1.6220613075653202e-05, "loss": 1.4224, "step": 4556 }, { "epoch": 0.92, "learning_rate": 1.61379869011547e-05, "loss": 1.4537, "step": 4557 }, { "epoch": 0.92, "learning_rate": 1.6055568257102914e-05, "loss": 1.485, "step": 4558 }, { "epoch": 0.92, "learning_rate": 1.5973357178847515e-05, "loss": 1.4253, "step": 4559 }, { "epoch": 0.92, "learning_rate": 1.5891353701649235e-05, "loss": 1.4349, "step": 4560 }, { "epoch": 0.92, "learning_rate": 1.5809557860679723e-05, "loss": 1.3967, "step": 4561 }, { "epoch": 0.92, "learning_rate": 1.572796969102147e-05, "loss": 1.4008, "step": 4562 }, { "epoch": 0.92, "learning_rate": 1.564658922766804e-05, "loss": 1.4228, "step": 4563 }, { "epoch": 0.92, "learning_rate": 1.55654165055239e-05, "loss": 1.4165, "step": 4564 }, { "epoch": 0.92, "learning_rate": 1.5484451559404312e-05, "loss": 1.4109, "step": 4565 }, { "epoch": 0.92, "learning_rate": 1.5403694424035498e-05, "loss": 1.4863, "step": 4566 }, { "epoch": 0.92, "learning_rate": 1.5323145134054526e-05, "loss": 1.4297, "step": 4567 }, { "epoch": 0.92, "learning_rate": 1.524280372400927e-05, "loss": 1.4324, "step": 4568 }, { "epoch": 0.92, "learning_rate": 1.51626702283586e-05, "loss": 1.4561, "step": 4569 }, { "epoch": 0.92, "learning_rate": 1.5082744681472027e-05, "loss": 1.3898, "step": 4570 }, { "epoch": 0.92, "learning_rate": 1.5003027117630019e-05, "loss": 1.4379, "step": 4571 }, { "epoch": 0.92, "learning_rate": 1.4923517571023782e-05, "loss": 1.4216, "step": 4572 }, { "epoch": 0.92, "learning_rate": 1.4844216075755201e-05, "loss": 1.3956, "step": 4573 }, { "epoch": 0.92, "learning_rate": 1.4765122665837073e-05, "loss": 1.3857, "step": 4574 }, { "epoch": 0.92, "learning_rate": 1.4686237375192868e-05, "loss": 1.4588, "step": 4575 }, { "epoch": 0.93, "learning_rate": 1.4607560237656858e-05, "loss": 1.4143, "step": 4576 }, { "epoch": 0.93, "learning_rate": 1.4529091286973995e-05, "loss": 1.4263, "step": 4577 }, { "epoch": 0.93, "learning_rate": 1.4450830556799965e-05, "loss": 1.4413, "step": 4578 }, { "epoch": 0.93, "learning_rate": 1.4372778080701032e-05, "loss": 1.4021, "step": 4579 }, { "epoch": 0.93, "learning_rate": 1.429493389215425e-05, "loss": 1.3752, "step": 4580 }, { "epoch": 0.93, "learning_rate": 1.421729802454741e-05, "loss": 1.4441, "step": 4581 }, { "epoch": 0.93, "learning_rate": 1.4139870511178766e-05, "loss": 1.4418, "step": 4582 }, { "epoch": 0.93, "learning_rate": 1.4062651385257364e-05, "loss": 1.4282, "step": 4583 }, { "epoch": 0.93, "learning_rate": 1.3985640679902877e-05, "loss": 1.3885, "step": 4584 }, { "epoch": 0.93, "learning_rate": 1.3908838428145331e-05, "loss": 1.4437, "step": 4585 }, { "epoch": 0.93, "learning_rate": 1.3832244662925598e-05, "loss": 1.3809, "step": 4586 }, { "epoch": 0.93, "learning_rate": 1.3755859417095174e-05, "loss": 1.4228, "step": 4587 }, { "epoch": 0.93, "learning_rate": 1.367968272341591e-05, "loss": 1.3936, "step": 4588 }, { "epoch": 0.93, "learning_rate": 1.3603714614560337e-05, "loss": 1.402, "step": 4589 }, { "epoch": 0.93, "learning_rate": 1.3527955123111447e-05, "loss": 1.4276, "step": 4590 }, { "epoch": 0.93, "learning_rate": 1.3452404281562913e-05, "loss": 1.4804, "step": 4591 }, { "epoch": 0.93, "learning_rate": 1.3377062122318705e-05, "loss": 1.4032, "step": 4592 }, { "epoch": 0.93, "learning_rate": 1.3301928677693475e-05, "loss": 1.442, "step": 4593 }, { "epoch": 0.93, "learning_rate": 1.3227003979912278e-05, "loss": 1.3879, "step": 4594 }, { "epoch": 0.93, "learning_rate": 1.3152288061110518e-05, "loss": 1.4129, "step": 4595 }, { "epoch": 0.93, "learning_rate": 1.307778095333423e-05, "loss": 1.4077, "step": 4596 }, { "epoch": 0.93, "learning_rate": 1.3003482688539792e-05, "loss": 1.4464, "step": 4597 }, { "epoch": 0.93, "learning_rate": 1.2929393298594106e-05, "loss": 1.41, "step": 4598 }, { "epoch": 0.93, "learning_rate": 1.2855512815274418e-05, "loss": 1.4033, "step": 4599 }, { "epoch": 0.93, "learning_rate": 1.2781841270268324e-05, "loss": 1.3863, "step": 4600 }, { "epoch": 0.93, "learning_rate": 1.2708378695173884e-05, "loss": 1.4384, "step": 4601 }, { "epoch": 0.93, "learning_rate": 1.2635125121499558e-05, "loss": 1.4181, "step": 4602 }, { "epoch": 0.93, "learning_rate": 1.2562080580664048e-05, "loss": 1.4051, "step": 4603 }, { "epoch": 0.93, "learning_rate": 1.2489245103996405e-05, "loss": 1.4455, "step": 4604 }, { "epoch": 0.93, "learning_rate": 1.2416618722736195e-05, "loss": 1.4474, "step": 4605 }, { "epoch": 0.93, "learning_rate": 1.2344201468033112e-05, "loss": 1.4238, "step": 4606 }, { "epoch": 0.93, "learning_rate": 1.22719933709472e-05, "loss": 1.4304, "step": 4607 }, { "epoch": 0.93, "learning_rate": 1.2199994462448905e-05, "loss": 1.4208, "step": 4608 }, { "epoch": 0.93, "learning_rate": 1.2128204773418806e-05, "loss": 1.416, "step": 4609 }, { "epoch": 0.93, "learning_rate": 1.2056624334647715e-05, "loss": 1.4112, "step": 4610 }, { "epoch": 0.93, "learning_rate": 1.1985253176836908e-05, "loss": 1.4386, "step": 4611 }, { "epoch": 0.93, "learning_rate": 1.1914091330597732e-05, "loss": 1.4208, "step": 4612 }, { "epoch": 0.93, "learning_rate": 1.1843138826451826e-05, "loss": 1.4597, "step": 4613 }, { "epoch": 0.93, "learning_rate": 1.1772395694831017e-05, "loss": 1.4481, "step": 4614 }, { "epoch": 0.93, "learning_rate": 1.1701861966077254e-05, "loss": 1.4237, "step": 4615 }, { "epoch": 0.93, "learning_rate": 1.1631537670442781e-05, "loss": 1.4158, "step": 4616 }, { "epoch": 0.93, "learning_rate": 1.156142283809003e-05, "loss": 1.3714, "step": 4617 }, { "epoch": 0.93, "learning_rate": 1.1491517499091497e-05, "loss": 1.4081, "step": 4618 }, { "epoch": 0.93, "learning_rate": 1.1421821683429923e-05, "loss": 1.4165, "step": 4619 }, { "epoch": 0.93, "learning_rate": 1.135233542099806e-05, "loss": 1.4339, "step": 4620 }, { "epoch": 0.93, "learning_rate": 1.1283058741598962e-05, "loss": 1.442, "step": 4621 }, { "epoch": 0.93, "learning_rate": 1.1213991674945634e-05, "loss": 1.4003, "step": 4622 }, { "epoch": 0.93, "learning_rate": 1.1145134250661216e-05, "loss": 1.4048, "step": 4623 }, { "epoch": 0.93, "learning_rate": 1.1076486498278915e-05, "loss": 1.3874, "step": 4624 }, { "epoch": 0.94, "learning_rate": 1.1008048447242126e-05, "loss": 1.4318, "step": 4625 }, { "epoch": 0.94, "learning_rate": 1.0939820126904143e-05, "loss": 1.4166, "step": 4626 }, { "epoch": 0.94, "learning_rate": 1.087180156652845e-05, "loss": 1.4011, "step": 4627 }, { "epoch": 0.94, "learning_rate": 1.0803992795288431e-05, "loss": 1.4144, "step": 4628 }, { "epoch": 0.94, "learning_rate": 1.0736393842267545e-05, "loss": 1.4013, "step": 4629 }, { "epoch": 0.94, "learning_rate": 1.0669004736459376e-05, "loss": 1.4039, "step": 4630 }, { "epoch": 0.94, "learning_rate": 1.0601825506767248e-05, "loss": 1.4197, "step": 4631 }, { "epoch": 0.94, "learning_rate": 1.053485618200467e-05, "loss": 1.4383, "step": 4632 }, { "epoch": 0.94, "learning_rate": 1.0468096790895166e-05, "loss": 1.4411, "step": 4633 }, { "epoch": 0.94, "learning_rate": 1.0401547362071938e-05, "loss": 1.4564, "step": 4634 }, { "epoch": 0.94, "learning_rate": 1.0335207924078439e-05, "loss": 1.4204, "step": 4635 }, { "epoch": 0.94, "learning_rate": 1.0269078505367901e-05, "loss": 1.4138, "step": 4636 }, { "epoch": 0.94, "learning_rate": 1.0203159134303474e-05, "loss": 1.3986, "step": 4637 }, { "epoch": 0.94, "learning_rate": 1.0137449839158319e-05, "loss": 1.4551, "step": 4638 }, { "epoch": 0.94, "learning_rate": 1.007195064811539e-05, "loss": 1.4141, "step": 4639 }, { "epoch": 0.94, "learning_rate": 1.0006661589267552e-05, "loss": 1.4192, "step": 4640 }, { "epoch": 0.94, "learning_rate": 9.941582690617623e-06, "loss": 1.4127, "step": 4641 }, { "epoch": 0.94, "learning_rate": 9.876713980078112e-06, "loss": 1.4302, "step": 4642 }, { "epoch": 0.94, "learning_rate": 9.812055485471539e-06, "loss": 1.4461, "step": 4643 }, { "epoch": 0.94, "learning_rate": 9.74760723453022e-06, "loss": 1.4678, "step": 4644 }, { "epoch": 0.94, "learning_rate": 9.683369254896268e-06, "loss": 1.4133, "step": 4645 }, { "epoch": 0.94, "learning_rate": 9.619341574121642e-06, "loss": 1.3979, "step": 4646 }, { "epoch": 0.94, "learning_rate": 9.555524219667989e-06, "loss": 1.4394, "step": 4647 }, { "epoch": 0.94, "learning_rate": 9.49191721890691e-06, "loss": 1.4733, "step": 4648 }, { "epoch": 0.94, "learning_rate": 9.42852059911975e-06, "loss": 1.4259, "step": 4649 }, { "epoch": 0.94, "learning_rate": 9.365334387497481e-06, "loss": 1.4266, "step": 4650 }, { "epoch": 0.94, "learning_rate": 9.302358611141093e-06, "loss": 1.3942, "step": 4651 }, { "epoch": 0.94, "learning_rate": 9.239593297061032e-06, "loss": 1.4112, "step": 4652 }, { "epoch": 0.94, "learning_rate": 9.177038472177601e-06, "loss": 1.4146, "step": 4653 }, { "epoch": 0.94, "learning_rate": 9.114694163320891e-06, "loss": 1.3924, "step": 4654 }, { "epoch": 0.94, "learning_rate": 9.052560397230625e-06, "loss": 1.4145, "step": 4655 }, { "epoch": 0.94, "learning_rate": 8.990637200556151e-06, "loss": 1.4058, "step": 4656 }, { "epoch": 0.94, "learning_rate": 8.928924599856725e-06, "loss": 1.4519, "step": 4657 }, { "epoch": 0.94, "learning_rate": 8.867422621601062e-06, "loss": 1.4193, "step": 4658 }, { "epoch": 0.94, "learning_rate": 8.806131292167618e-06, "loss": 1.3996, "step": 4659 }, { "epoch": 0.94, "learning_rate": 8.745050637844532e-06, "loss": 1.4233, "step": 4660 }, { "epoch": 0.94, "learning_rate": 8.684180684829512e-06, "loss": 1.4021, "step": 4661 }, { "epoch": 0.94, "learning_rate": 8.623521459229955e-06, "loss": 1.4364, "step": 4662 }, { "epoch": 0.94, "learning_rate": 8.563072987062882e-06, "loss": 1.4584, "step": 4663 }, { "epoch": 0.94, "learning_rate": 8.502835294254885e-06, "loss": 1.4293, "step": 4664 }, { "epoch": 0.94, "learning_rate": 8.442808406642132e-06, "loss": 1.4364, "step": 4665 }, { "epoch": 0.94, "learning_rate": 8.38299234997042e-06, "loss": 1.3754, "step": 4666 }, { "epoch": 0.94, "learning_rate": 8.323387149895113e-06, "loss": 1.4094, "step": 4667 }, { "epoch": 0.94, "learning_rate": 8.263992831981149e-06, "loss": 1.4066, "step": 4668 }, { "epoch": 0.94, "learning_rate": 8.204809421702986e-06, "loss": 1.3941, "step": 4669 }, { "epoch": 0.94, "learning_rate": 8.145836944444651e-06, "loss": 1.4594, "step": 4670 }, { "epoch": 0.94, "learning_rate": 8.087075425499634e-06, "loss": 1.4135, "step": 4671 }, { "epoch": 0.94, "learning_rate": 8.028524890071054e-06, "loss": 1.4525, "step": 4672 }, { "epoch": 0.94, "learning_rate": 7.970185363271432e-06, "loss": 1.3848, "step": 4673 }, { "epoch": 0.94, "learning_rate": 7.912056870122863e-06, "loss": 1.4221, "step": 4674 }, { "epoch": 0.95, "learning_rate": 7.854139435556962e-06, "loss": 1.4439, "step": 4675 }, { "epoch": 0.95, "learning_rate": 7.79643308441469e-06, "loss": 1.4423, "step": 4676 }, { "epoch": 0.95, "learning_rate": 7.73893784144647e-06, "loss": 1.4314, "step": 4677 }, { "epoch": 0.95, "learning_rate": 7.68165373131241e-06, "loss": 1.449, "step": 4678 }, { "epoch": 0.95, "learning_rate": 7.6245807785818025e-06, "loss": 1.4097, "step": 4679 }, { "epoch": 0.95, "learning_rate": 7.567719007733509e-06, "loss": 1.3803, "step": 4680 }, { "epoch": 0.95, "learning_rate": 7.5110684431558e-06, "loss": 1.4098, "step": 4681 }, { "epoch": 0.95, "learning_rate": 7.454629109146294e-06, "loss": 1.4222, "step": 4682 }, { "epoch": 0.95, "learning_rate": 7.398401029912127e-06, "loss": 1.3881, "step": 4683 }, { "epoch": 0.95, "learning_rate": 7.342384229569621e-06, "loss": 1.4146, "step": 4684 }, { "epoch": 0.95, "learning_rate": 7.286578732144778e-06, "loss": 1.4858, "step": 4685 }, { "epoch": 0.95, "learning_rate": 7.230984561572729e-06, "loss": 1.4162, "step": 4686 }, { "epoch": 0.95, "learning_rate": 7.1756017416980126e-06, "loss": 1.4183, "step": 4687 }, { "epoch": 0.95, "learning_rate": 7.120430296274683e-06, "loss": 1.3945, "step": 4688 }, { "epoch": 0.95, "learning_rate": 7.065470248965867e-06, "loss": 1.4165, "step": 4689 }, { "epoch": 0.95, "learning_rate": 7.010721623344207e-06, "loss": 1.4428, "step": 4690 }, { "epoch": 0.95, "learning_rate": 6.956184442891589e-06, "loss": 1.3848, "step": 4691 }, { "epoch": 0.95, "learning_rate": 6.901858730999355e-06, "loss": 1.4074, "step": 4692 }, { "epoch": 0.95, "learning_rate": 6.8477445109678685e-06, "loss": 1.4276, "step": 4693 }, { "epoch": 0.95, "learning_rate": 6.793841806007117e-06, "loss": 1.4017, "step": 4694 }, { "epoch": 0.95, "learning_rate": 6.740150639236053e-06, "loss": 1.4318, "step": 4695 }, { "epoch": 0.95, "learning_rate": 6.686671033683089e-06, "loss": 1.4546, "step": 4696 }, { "epoch": 0.95, "learning_rate": 6.633403012285877e-06, "loss": 1.4467, "step": 4697 }, { "epoch": 0.95, "learning_rate": 6.580346597891251e-06, "loss": 1.4594, "step": 4698 }, { "epoch": 0.95, "learning_rate": 6.5275018132553435e-06, "loss": 1.4376, "step": 4699 }, { "epoch": 0.95, "learning_rate": 6.474868681043577e-06, "loss": 1.4309, "step": 4700 }, { "epoch": 0.95, "learning_rate": 6.422447223830452e-06, "loss": 1.4203, "step": 4701 }, { "epoch": 0.95, "learning_rate": 6.370237464099704e-06, "loss": 1.4347, "step": 4702 }, { "epoch": 0.95, "learning_rate": 6.3182394242444205e-06, "loss": 1.4744, "step": 4703 }, { "epoch": 0.95, "learning_rate": 6.266453126566707e-06, "loss": 1.4244, "step": 4704 }, { "epoch": 0.95, "learning_rate": 6.214878593277962e-06, "loss": 1.4029, "step": 4705 }, { "epoch": 0.95, "learning_rate": 6.163515846498713e-06, "loss": 1.4706, "step": 4706 }, { "epoch": 0.95, "learning_rate": 6.112364908258616e-06, "loss": 1.4409, "step": 4707 }, { "epoch": 0.95, "learning_rate": 6.061425800496567e-06, "loss": 1.4233, "step": 4708 }, { "epoch": 0.95, "learning_rate": 6.010698545060589e-06, "loss": 1.3977, "step": 4709 }, { "epoch": 0.95, "learning_rate": 5.9601831637077775e-06, "loss": 1.4179, "step": 4710 }, { "epoch": 0.95, "learning_rate": 5.909879678104357e-06, "loss": 1.4033, "step": 4711 }, { "epoch": 0.95, "learning_rate": 5.859788109825792e-06, "loss": 1.4206, "step": 4712 }, { "epoch": 0.95, "learning_rate": 5.809908480356452e-06, "loss": 1.4125, "step": 4713 }, { "epoch": 0.95, "learning_rate": 5.760240811090001e-06, "loss": 1.4136, "step": 4714 }, { "epoch": 0.95, "learning_rate": 5.7107851233291205e-06, "loss": 1.4038, "step": 4715 }, { "epoch": 0.95, "learning_rate": 5.661541438285512e-06, "loss": 1.4677, "step": 4716 }, { "epoch": 0.95, "learning_rate": 5.612509777079999e-06, "loss": 1.4142, "step": 4717 }, { "epoch": 0.95, "learning_rate": 5.563690160742485e-06, "loss": 1.413, "step": 4718 }, { "epoch": 0.95, "learning_rate": 5.515082610211941e-06, "loss": 1.4414, "step": 4719 }, { "epoch": 0.95, "learning_rate": 5.466687146336302e-06, "loss": 1.4191, "step": 4720 }, { "epoch": 0.95, "learning_rate": 5.418503789872575e-06, "loss": 1.4442, "step": 4721 }, { "epoch": 0.95, "learning_rate": 5.370532561486896e-06, "loss": 1.4204, "step": 4722 }, { "epoch": 0.95, "learning_rate": 5.322773481754195e-06, "loss": 1.4215, "step": 4723 }, { "epoch": 0.96, "learning_rate": 5.275226571158697e-06, "loss": 1.4592, "step": 4724 }, { "epoch": 0.96, "learning_rate": 5.227891850093314e-06, "loss": 1.421, "step": 4725 }, { "epoch": 0.96, "learning_rate": 5.1807693388601916e-06, "loss": 1.4586, "step": 4726 }, { "epoch": 0.96, "learning_rate": 5.133859057670332e-06, "loss": 1.4429, "step": 4727 }, { "epoch": 0.96, "learning_rate": 5.087161026643749e-06, "loss": 1.3782, "step": 4728 }, { "epoch": 0.96, "learning_rate": 5.040675265809536e-06, "loss": 1.4179, "step": 4729 }, { "epoch": 0.96, "learning_rate": 4.994401795105519e-06, "loss": 1.4117, "step": 4730 }, { "epoch": 0.96, "learning_rate": 4.948340634378545e-06, "loss": 1.423, "step": 4731 }, { "epoch": 0.96, "learning_rate": 4.90249180338459e-06, "loss": 1.399, "step": 4732 }, { "epoch": 0.96, "learning_rate": 4.856855321788256e-06, "loss": 1.4139, "step": 4733 }, { "epoch": 0.96, "learning_rate": 4.81143120916333e-06, "loss": 1.4542, "step": 4734 }, { "epoch": 0.96, "learning_rate": 4.76621948499234e-06, "loss": 1.3971, "step": 4735 }, { "epoch": 0.96, "learning_rate": 4.7212201686668285e-06, "loss": 1.4284, "step": 4736 }, { "epoch": 0.96, "learning_rate": 4.676433279487135e-06, "loss": 1.4357, "step": 4737 }, { "epoch": 0.96, "learning_rate": 4.631858836662562e-06, "loss": 1.4296, "step": 4738 }, { "epoch": 0.96, "learning_rate": 4.587496859311313e-06, "loss": 1.3867, "step": 4739 }, { "epoch": 0.96, "learning_rate": 4.543347366460448e-06, "loss": 1.447, "step": 4740 }, { "epoch": 0.96, "learning_rate": 4.499410377045765e-06, "loss": 1.4055, "step": 4741 }, { "epoch": 0.96, "learning_rate": 4.4556859099121325e-06, "loss": 1.4141, "step": 4742 }, { "epoch": 0.96, "learning_rate": 4.412173983813106e-06, "loss": 1.3907, "step": 4743 }, { "epoch": 0.96, "learning_rate": 4.368874617411089e-06, "loss": 1.4037, "step": 4744 }, { "epoch": 0.96, "learning_rate": 4.32578782927745e-06, "loss": 1.4151, "step": 4745 }, { "epoch": 0.96, "learning_rate": 4.282913637892239e-06, "loss": 1.4009, "step": 4746 }, { "epoch": 0.96, "learning_rate": 4.240252061644412e-06, "loss": 1.4361, "step": 4747 }, { "epoch": 0.96, "learning_rate": 4.197803118831611e-06, "loss": 1.4722, "step": 4748 }, { "epoch": 0.96, "learning_rate": 4.155566827660495e-06, "loss": 1.3899, "step": 4749 }, { "epoch": 0.96, "learning_rate": 4.113543206246295e-06, "loss": 1.4396, "step": 4750 }, { "epoch": 0.96, "learning_rate": 4.071732272613149e-06, "loss": 1.4371, "step": 4751 }, { "epoch": 0.96, "learning_rate": 4.0301340446939334e-06, "loss": 1.4099, "step": 4752 }, { "epoch": 0.96, "learning_rate": 3.988748540330267e-06, "loss": 1.4071, "step": 4753 }, { "epoch": 0.96, "learning_rate": 3.9475757772726716e-06, "loss": 1.4126, "step": 4754 }, { "epoch": 0.96, "learning_rate": 3.906615773180244e-06, "loss": 1.4182, "step": 4755 }, { "epoch": 0.96, "learning_rate": 3.865868545620876e-06, "loss": 1.4293, "step": 4756 }, { "epoch": 0.96, "learning_rate": 3.8253341120713105e-06, "loss": 1.4018, "step": 4757 }, { "epoch": 0.96, "learning_rate": 3.7850124899168616e-06, "loss": 1.4424, "step": 4758 }, { "epoch": 0.96, "learning_rate": 3.7449036964516403e-06, "loss": 1.4204, "step": 4759 }, { "epoch": 0.96, "learning_rate": 3.705007748878497e-06, "loss": 1.4271, "step": 4760 }, { "epoch": 0.96, "learning_rate": 3.665324664309022e-06, "loss": 1.4496, "step": 4761 }, { "epoch": 0.96, "learning_rate": 3.625854459763378e-06, "loss": 1.4221, "step": 4762 }, { "epoch": 0.96, "learning_rate": 3.5865971521705233e-06, "loss": 1.4489, "step": 4763 }, { "epoch": 0.96, "learning_rate": 3.5475527583681e-06, "loss": 1.4106, "step": 4764 }, { "epoch": 0.96, "learning_rate": 3.508721295102435e-06, "loss": 1.4006, "step": 4765 }, { "epoch": 0.96, "learning_rate": 3.4701027790284277e-06, "loss": 1.4292, "step": 4766 }, { "epoch": 0.96, "learning_rate": 3.4316972267097734e-06, "loss": 1.4506, "step": 4767 }, { "epoch": 0.96, "learning_rate": 3.393504654618795e-06, "loss": 1.3996, "step": 4768 }, { "epoch": 0.96, "learning_rate": 3.35552507913639e-06, "loss": 1.4103, "step": 4769 }, { "epoch": 0.96, "learning_rate": 3.3177585165522495e-06, "loss": 1.426, "step": 4770 }, { "epoch": 0.96, "learning_rate": 3.280204983064472e-06, "loss": 1.441, "step": 4771 }, { "epoch": 0.96, "learning_rate": 3.2428644947800624e-06, "loss": 1.4474, "step": 4772 }, { "epoch": 0.96, "learning_rate": 3.2057370677144316e-06, "loss": 1.4317, "step": 4773 }, { "epoch": 0.97, "learning_rate": 3.168822717791675e-06, "loss": 1.4338, "step": 4774 }, { "epoch": 0.97, "learning_rate": 3.132121460844628e-06, "loss": 1.4132, "step": 4775 }, { "epoch": 0.97, "learning_rate": 3.0956333126144764e-06, "loss": 1.4222, "step": 4776 }, { "epoch": 0.97, "learning_rate": 3.0593582887512015e-06, "loss": 1.4524, "step": 4777 }, { "epoch": 0.97, "learning_rate": 3.023296404813303e-06, "loss": 1.4418, "step": 4778 }, { "epoch": 0.97, "learning_rate": 2.987447676267907e-06, "loss": 1.429, "step": 4779 }, { "epoch": 0.97, "learning_rate": 2.9518121184906044e-06, "loss": 1.4383, "step": 4780 }, { "epoch": 0.97, "learning_rate": 2.916389746765724e-06, "loss": 1.4384, "step": 4781 }, { "epoch": 0.97, "learning_rate": 2.8811805762860577e-06, "loss": 1.4148, "step": 4782 }, { "epoch": 0.97, "learning_rate": 2.8461846221529143e-06, "loss": 1.3984, "step": 4783 }, { "epoch": 0.97, "learning_rate": 2.8114018993762314e-06, "loss": 1.4404, "step": 4784 }, { "epoch": 0.97, "learning_rate": 2.7768324228744644e-06, "loss": 1.424, "step": 4785 }, { "epoch": 0.97, "learning_rate": 2.742476207474587e-06, "loss": 1.3933, "step": 4786 }, { "epoch": 0.97, "learning_rate": 2.7083332679122e-06, "loss": 1.3999, "step": 4787 }, { "epoch": 0.97, "learning_rate": 2.674403618831256e-06, "loss": 1.4507, "step": 4788 }, { "epoch": 0.97, "learning_rate": 2.6406872747843925e-06, "loss": 1.4329, "step": 4789 }, { "epoch": 0.97, "learning_rate": 2.6071842502326525e-06, "loss": 1.4244, "step": 4790 }, { "epoch": 0.97, "learning_rate": 2.573894559545653e-06, "loss": 1.4498, "step": 4791 }, { "epoch": 0.97, "learning_rate": 2.5408182170014173e-06, "loss": 1.4088, "step": 4792 }, { "epoch": 0.97, "learning_rate": 2.5079552367865965e-06, "loss": 1.3958, "step": 4793 }, { "epoch": 0.97, "learning_rate": 2.4753056329962496e-06, "loss": 1.4222, "step": 4794 }, { "epoch": 0.97, "learning_rate": 2.442869419633953e-06, "loss": 1.4127, "step": 4795 }, { "epoch": 0.97, "learning_rate": 2.4106466106116333e-06, "loss": 1.4496, "step": 4796 }, { "epoch": 0.97, "learning_rate": 2.378637219749902e-06, "loss": 1.4104, "step": 4797 }, { "epoch": 0.97, "learning_rate": 2.3468412607776655e-06, "loss": 1.4013, "step": 4798 }, { "epoch": 0.97, "learning_rate": 2.3152587473323495e-06, "loss": 1.4101, "step": 4799 }, { "epoch": 0.97, "learning_rate": 2.2838896929598397e-06, "loss": 1.4389, "step": 4800 }, { "epoch": 0.97, "learning_rate": 2.2527341111144293e-06, "loss": 1.457, "step": 4801 }, { "epoch": 0.97, "learning_rate": 2.2217920151588746e-06, "loss": 1.4498, "step": 4802 }, { "epoch": 0.97, "learning_rate": 2.1910634183644475e-06, "loss": 1.4156, "step": 4803 }, { "epoch": 0.97, "learning_rate": 2.1605483339106614e-06, "loss": 1.4477, "step": 4804 }, { "epoch": 0.97, "learning_rate": 2.130246774885658e-06, "loss": 1.429, "step": 4805 }, { "epoch": 0.97, "learning_rate": 2.1001587542858193e-06, "loss": 1.4392, "step": 4806 }, { "epoch": 0.97, "learning_rate": 2.0702842850160995e-06, "loss": 1.4584, "step": 4807 }, { "epoch": 0.97, "learning_rate": 2.0406233798896944e-06, "loss": 1.4189, "step": 4808 }, { "epoch": 0.97, "learning_rate": 2.0111760516284273e-06, "loss": 1.4305, "step": 4809 }, { "epoch": 0.97, "learning_rate": 1.9819423128622505e-06, "loss": 1.4104, "step": 4810 }, { "epoch": 0.97, "learning_rate": 1.95292217612969e-06, "loss": 1.429, "step": 4811 }, { "epoch": 0.97, "learning_rate": 1.924115653877567e-06, "loss": 1.4146, "step": 4812 }, { "epoch": 0.97, "learning_rate": 1.8955227584612212e-06, "loss": 1.4255, "step": 4813 }, { "epoch": 0.97, "learning_rate": 1.867143502144175e-06, "loss": 1.4132, "step": 4814 }, { "epoch": 0.97, "learning_rate": 1.838977897098415e-06, "loss": 1.4603, "step": 4815 }, { "epoch": 0.97, "learning_rate": 1.811025955404333e-06, "loss": 1.4067, "step": 4816 }, { "epoch": 0.97, "learning_rate": 1.7832876890505612e-06, "loss": 1.4497, "step": 4817 }, { "epoch": 0.97, "learning_rate": 1.7557631099342497e-06, "loss": 1.4497, "step": 4818 }, { "epoch": 0.97, "learning_rate": 1.728452229860733e-06, "loss": 1.4452, "step": 4819 }, { "epoch": 0.97, "learning_rate": 1.7013550605438078e-06, "loss": 1.4582, "step": 4820 }, { "epoch": 0.97, "learning_rate": 1.6744716136055105e-06, "loss": 1.4264, "step": 4821 }, { "epoch": 0.97, "learning_rate": 1.64780190057634e-06, "loss": 1.4283, "step": 4822 }, { "epoch": 0.98, "learning_rate": 1.6213459328950354e-06, "loss": 1.4317, "step": 4823 }, { "epoch": 0.98, "learning_rate": 1.595103721908575e-06, "loss": 1.4069, "step": 4824 }, { "epoch": 0.98, "learning_rate": 1.569075278872456e-06, "loss": 1.4253, "step": 4825 }, { "epoch": 0.98, "learning_rate": 1.5432606149503036e-06, "loss": 1.4268, "step": 4826 }, { "epoch": 0.98, "learning_rate": 1.5176597412142613e-06, "loss": 1.4187, "step": 4827 }, { "epoch": 0.98, "learning_rate": 1.4922726686445453e-06, "loss": 1.4291, "step": 4828 }, { "epoch": 0.98, "learning_rate": 1.4670994081297795e-06, "loss": 1.4383, "step": 4829 }, { "epoch": 0.98, "learning_rate": 1.442139970466938e-06, "loss": 1.4354, "step": 4830 }, { "epoch": 0.98, "learning_rate": 1.41739436636118e-06, "loss": 1.3987, "step": 4831 }, { "epoch": 0.98, "learning_rate": 1.3928626064260153e-06, "loss": 1.4314, "step": 4832 }, { "epoch": 0.98, "learning_rate": 1.3685447011831941e-06, "loss": 1.425, "step": 4833 }, { "epoch": 0.98, "learning_rate": 1.3444406610628734e-06, "loss": 1.4327, "step": 4834 }, { "epoch": 0.98, "learning_rate": 1.3205504964032278e-06, "loss": 1.3955, "step": 4835 }, { "epoch": 0.98, "learning_rate": 1.2968742174509495e-06, "loss": 1.4128, "step": 4836 }, { "epoch": 0.98, "learning_rate": 1.2734118343608603e-06, "loss": 1.4417, "step": 4837 }, { "epoch": 0.98, "learning_rate": 1.250163357196077e-06, "loss": 1.4428, "step": 4838 }, { "epoch": 0.98, "learning_rate": 1.2271287959279564e-06, "loss": 1.4417, "step": 4839 }, { "epoch": 0.98, "learning_rate": 1.2043081604360962e-06, "loss": 1.4473, "step": 4840 }, { "epoch": 0.98, "learning_rate": 1.1817014605084443e-06, "loss": 1.4113, "step": 4841 }, { "epoch": 0.98, "learning_rate": 1.1593087058410779e-06, "loss": 1.4667, "step": 4842 }, { "epoch": 0.98, "learning_rate": 1.1371299060383144e-06, "loss": 1.4111, "step": 4843 }, { "epoch": 0.98, "learning_rate": 1.1151650706127115e-06, "loss": 1.4194, "step": 4844 }, { "epoch": 0.98, "learning_rate": 1.0934142089851773e-06, "loss": 1.4085, "step": 4845 }, { "epoch": 0.98, "learning_rate": 1.071877330484694e-06, "loss": 1.3991, "step": 4846 }, { "epoch": 0.98, "learning_rate": 1.0505544443484283e-06, "loss": 1.4581, "step": 4847 }, { "epoch": 0.98, "learning_rate": 1.0294455597220088e-06, "loss": 1.4218, "step": 4848 }, { "epoch": 0.98, "learning_rate": 1.0085506856590265e-06, "loss": 1.4329, "step": 4849 }, { "epoch": 0.98, "learning_rate": 9.878698311214241e-07, "loss": 1.4216, "step": 4850 }, { "epoch": 0.98, "learning_rate": 9.674030049792725e-07, "loss": 1.4112, "step": 4851 }, { "epoch": 0.98, "learning_rate": 9.471502160108836e-07, "loss": 1.4426, "step": 4852 }, { "epoch": 0.98, "learning_rate": 9.271114729028085e-07, "loss": 1.444, "step": 4853 }, { "epoch": 0.98, "learning_rate": 9.072867842497279e-07, "loss": 1.4593, "step": 4854 }, { "epoch": 0.98, "learning_rate": 8.876761585545067e-07, "loss": 1.418, "step": 4855 }, { "epoch": 0.98, "learning_rate": 8.682796042282503e-07, "loss": 1.3876, "step": 4856 }, { "epoch": 0.98, "learning_rate": 8.490971295901928e-07, "loss": 1.425, "step": 4857 }, { "epoch": 0.98, "learning_rate": 8.301287428678084e-07, "loss": 1.4074, "step": 4858 }, { "epoch": 0.98, "learning_rate": 8.113744521967004e-07, "loss": 1.4237, "step": 4859 }, { "epoch": 0.98, "learning_rate": 7.928342656206567e-07, "loss": 1.407, "step": 4860 }, { "epoch": 0.98, "learning_rate": 7.745081910917051e-07, "loss": 1.4381, "step": 4861 }, { "epoch": 0.98, "learning_rate": 7.563962364698917e-07, "loss": 1.3935, "step": 4862 }, { "epoch": 0.98, "learning_rate": 7.384984095235025e-07, "loss": 1.3979, "step": 4863 }, { "epoch": 0.98, "learning_rate": 7.208147179291191e-07, "loss": 1.4364, "step": 4864 }, { "epoch": 0.98, "learning_rate": 7.033451692711745e-07, "loss": 1.4019, "step": 4865 }, { "epoch": 0.98, "learning_rate": 6.86089771042564e-07, "loss": 1.4361, "step": 4866 }, { "epoch": 0.98, "learning_rate": 6.690485306442007e-07, "loss": 1.4009, "step": 4867 }, { "epoch": 0.98, "learning_rate": 6.522214553850158e-07, "loss": 1.4205, "step": 4868 }, { "epoch": 0.98, "learning_rate": 6.356085524823474e-07, "loss": 1.4079, "step": 4869 }, { "epoch": 0.98, "learning_rate": 6.192098290614956e-07, "loss": 1.4277, "step": 4870 }, { "epoch": 0.98, "learning_rate": 6.030252921558899e-07, "loss": 1.4454, "step": 4871 }, { "epoch": 0.98, "learning_rate": 5.870549487071997e-07, "loss": 1.3917, "step": 4872 }, { "epoch": 0.99, "learning_rate": 5.71298805565168e-07, "loss": 1.384, "step": 4873 }, { "epoch": 0.99, "learning_rate": 5.557568694877224e-07, "loss": 1.4088, "step": 4874 }, { "epoch": 0.99, "learning_rate": 5.40429147140753e-07, "loss": 1.4354, "step": 4875 }, { "epoch": 0.99, "learning_rate": 5.253156450984453e-07, "loss": 1.4256, "step": 4876 }, { "epoch": 0.99, "learning_rate": 5.104163698430587e-07, "loss": 1.4613, "step": 4877 }, { "epoch": 0.99, "learning_rate": 4.957313277648701e-07, "loss": 1.4483, "step": 4878 }, { "epoch": 0.99, "learning_rate": 4.812605251624525e-07, "loss": 1.4387, "step": 4879 }, { "epoch": 0.99, "learning_rate": 4.6700396824239656e-07, "loss": 1.3917, "step": 4880 }, { "epoch": 0.99, "learning_rate": 4.529616631193112e-07, "loss": 1.3988, "step": 4881 }, { "epoch": 0.99, "learning_rate": 4.3913361581610076e-07, "loss": 1.3693, "step": 4882 }, { "epoch": 0.99, "learning_rate": 4.255198322636322e-07, "loss": 1.424, "step": 4883 }, { "epoch": 0.99, "learning_rate": 4.1212031830084594e-07, "loss": 1.3911, "step": 4884 }, { "epoch": 0.99, "learning_rate": 3.989350796749225e-07, "loss": 1.4248, "step": 4885 }, { "epoch": 0.99, "learning_rate": 3.8596412204106037e-07, "loss": 1.4373, "step": 4886 }, { "epoch": 0.99, "learning_rate": 3.7320745096258715e-07, "loss": 1.4286, "step": 4887 }, { "epoch": 0.99, "learning_rate": 3.606650719108484e-07, "loss": 1.4817, "step": 4888 }, { "epoch": 0.99, "learning_rate": 3.483369902653188e-07, "loss": 1.4315, "step": 4889 }, { "epoch": 0.99, "learning_rate": 3.362232113135466e-07, "loss": 1.4588, "step": 4890 }, { "epoch": 0.99, "learning_rate": 3.243237402512089e-07, "loss": 1.4318, "step": 4891 }, { "epoch": 0.99, "learning_rate": 3.1263858218205657e-07, "loss": 1.4693, "step": 4892 }, { "epoch": 0.99, "learning_rate": 3.0116774211791377e-07, "loss": 1.4334, "step": 4893 }, { "epoch": 0.99, "learning_rate": 2.899112249786229e-07, "loss": 1.4569, "step": 4894 }, { "epoch": 0.99, "learning_rate": 2.7886903559221076e-07, "loss": 1.4263, "step": 4895 }, { "epoch": 0.99, "learning_rate": 2.680411786946668e-07, "loss": 1.4504, "step": 4896 }, { "epoch": 0.99, "learning_rate": 2.5742765893010943e-07, "loss": 1.4167, "step": 4897 }, { "epoch": 0.99, "learning_rate": 2.470284808507306e-07, "loss": 1.4107, "step": 4898 }, { "epoch": 0.99, "learning_rate": 2.3684364891674026e-07, "loss": 1.4076, "step": 4899 }, { "epoch": 0.99, "learning_rate": 2.268731674965885e-07, "loss": 1.4557, "step": 4900 }, { "epoch": 0.99, "learning_rate": 2.1711704086646577e-07, "loss": 1.4022, "step": 4901 }, { "epoch": 0.99, "learning_rate": 2.0757527321096924e-07, "loss": 1.4192, "step": 4902 }, { "epoch": 0.99, "learning_rate": 1.9824786862260303e-07, "loss": 1.4049, "step": 4903 }, { "epoch": 0.99, "learning_rate": 1.891348311017782e-07, "loss": 1.417, "step": 4904 }, { "epoch": 0.99, "learning_rate": 1.8023616455731251e-07, "loss": 1.4104, "step": 4905 }, { "epoch": 0.99, "learning_rate": 1.715518728057086e-07, "loss": 1.4285, "step": 4906 }, { "epoch": 0.99, "learning_rate": 1.6308195957182026e-07, "loss": 1.4434, "step": 4907 }, { "epoch": 0.99, "learning_rate": 1.5482642848835272e-07, "loss": 1.4368, "step": 4908 }, { "epoch": 0.99, "learning_rate": 1.4678528309614025e-07, "loss": 1.4111, "step": 4909 }, { "epoch": 0.99, "learning_rate": 1.3895852684409072e-07, "loss": 1.4393, "step": 4910 }, { "epoch": 0.99, "learning_rate": 1.3134616308918545e-07, "loss": 1.4638, "step": 4911 }, { "epoch": 0.99, "learning_rate": 1.2394819509631282e-07, "loss": 1.4174, "step": 4912 }, { "epoch": 0.99, "learning_rate": 1.1676462603849026e-07, "loss": 1.4329, "step": 4913 }, { "epoch": 0.99, "learning_rate": 1.0979545899686416e-07, "loss": 1.4226, "step": 4914 }, { "epoch": 0.99, "learning_rate": 1.030406969604325e-07, "loss": 1.4092, "step": 4915 }, { "epoch": 0.99, "learning_rate": 9.650034282643327e-08, "loss": 1.4086, "step": 4916 }, { "epoch": 0.99, "learning_rate": 9.017439940001148e-08, "loss": 1.4212, "step": 4917 }, { "epoch": 0.99, "learning_rate": 8.406286939438568e-08, "loss": 1.4316, "step": 4918 }, { "epoch": 0.99, "learning_rate": 7.816575543084791e-08, "loss": 1.4248, "step": 4919 }, { "epoch": 0.99, "learning_rate": 7.24830600386528e-08, "loss": 1.4279, "step": 4920 }, { "epoch": 0.99, "learning_rate": 6.701478565518392e-08, "loss": 1.4242, "step": 4921 }, { "epoch": 1.0, "learning_rate": 6.176093462573196e-08, "loss": 1.4229, "step": 4922 }, { "epoch": 1.0, "learning_rate": 5.672150920377206e-08, "loss": 1.433, "step": 4923 }, { "epoch": 1.0, "learning_rate": 5.189651155068642e-08, "loss": 1.4382, "step": 4924 }, { "epoch": 1.0, "learning_rate": 4.728594373593076e-08, "loss": 1.4153, "step": 4925 }, { "epoch": 1.0, "learning_rate": 4.288980773703433e-08, "loss": 1.4213, "step": 4926 }, { "epoch": 1.0, "learning_rate": 3.870810543948888e-08, "loss": 1.4217, "step": 4927 }, { "epoch": 1.0, "learning_rate": 3.474083863691524e-08, "loss": 1.4396, "step": 4928 }, { "epoch": 1.0, "learning_rate": 3.098800903078569e-08, "loss": 1.4367, "step": 4929 }, { "epoch": 1.0, "learning_rate": 2.7449618230757088e-08, "loss": 1.4426, "step": 4930 }, { "epoch": 1.0, "learning_rate": 2.41256677544488e-08, "loss": 1.431, "step": 4931 }, { "epoch": 1.0, "learning_rate": 2.1016159027553716e-08, "loss": 1.4204, "step": 4932 }, { "epoch": 1.0, "learning_rate": 1.812109338367174e-08, "loss": 1.3903, "step": 4933 }, { "epoch": 1.0, "learning_rate": 1.5440472064587317e-08, "loss": 1.4237, "step": 4934 }, { "epoch": 1.0, "learning_rate": 1.2974296220047421e-08, "loss": 1.3888, "step": 4935 }, { "epoch": 1.0, "learning_rate": 1.072256690770601e-08, "loss": 1.4361, "step": 4936 }, { "epoch": 1.0, "learning_rate": 8.685285093401606e-09, "loss": 1.4189, "step": 4937 }, { "epoch": 1.0, "learning_rate": 6.8624516509352415e-09, "loss": 1.3926, "step": 4938 }, { "epoch": 1.0, "learning_rate": 5.254067362125969e-09, "loss": 1.4115, "step": 4939 }, { "epoch": 1.0, "learning_rate": 3.8601329167553544e-09, "loss": 1.4031, "step": 4940 }, { "epoch": 1.0, "learning_rate": 2.6806489127895183e-09, "loss": 1.4785, "step": 4941 }, { "epoch": 1.0, "learning_rate": 1.715615856046071e-09, "loss": 1.4038, "step": 4942 }, { "epoch": 1.0, "learning_rate": 9.650341604716673e-10, "loss": 1.4029, "step": 4943 }, { "epoch": 1.0, "learning_rate": 4.2890414797547294e-10, "loss": 1.4361, "step": 4944 }, { "epoch": 1.0, "learning_rate": 1.0722604848467655e-10, "loss": 1.4318, "step": 4945 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.4198, "step": 4946 }, { "epoch": 1.0, "step": 4946, "total_flos": 1.5823970447172567e+19, "train_loss": 1.513081687112786, "train_runtime": 54263.8475, "train_samples_per_second": 23.335, "train_steps_per_second": 0.091 } ], "logging_steps": 1.0, "max_steps": 4946, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 24000, "total_flos": 1.5823970447172567e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }