diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,60825 @@ +{ + "best_metric": 0.8935389133627019, + "best_model_checkpoint": "xtreme_s_xlsr_t5lephone-small_minds14.en-all/checkpoint-9400", + "epoch": 149.99628252788105, + "global_step": 10050, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.9999999999999996e-07, + "loss": 2.6343, + "step": 1 + }, + { + "epoch": 0.03, + "learning_rate": 3.9999999999999993e-07, + "loss": 2.6511, + "step": 2 + }, + { + "epoch": 0.04, + "learning_rate": 6e-07, + "loss": 2.637, + "step": 3 + }, + { + "epoch": 0.06, + "learning_rate": 7.999999999999999e-07, + "loss": 2.6419, + "step": 4 + }, + { + "epoch": 0.07, + "learning_rate": 1e-06, + "loss": 2.6541, + "step": 5 + }, + { + "epoch": 0.09, + "learning_rate": 1.2e-06, + "loss": 2.6411, + "step": 6 + }, + { + "epoch": 0.1, + "learning_rate": 1.4e-06, + "loss": 2.6381, + "step": 7 + }, + { + "epoch": 0.12, + "learning_rate": 1.5999999999999997e-06, + "loss": 2.6385, + "step": 8 + }, + { + "epoch": 0.13, + "learning_rate": 1.8e-06, + "loss": 2.6384, + "step": 9 + }, + { + "epoch": 0.15, + "learning_rate": 2e-06, + "loss": 2.6368, + "step": 10 + }, + { + "epoch": 0.16, + "learning_rate": 2.1999999999999997e-06, + "loss": 2.6385, + "step": 11 + }, + { + "epoch": 0.18, + "learning_rate": 2.4e-06, + "loss": 2.6431, + "step": 12 + }, + { + "epoch": 0.19, + "learning_rate": 2.5999999999999997e-06, + "loss": 2.6428, + "step": 13 + }, + { + "epoch": 0.21, + "learning_rate": 2.8e-06, + "loss": 2.6378, + "step": 14 + }, + { + "epoch": 0.22, + "learning_rate": 2.9999999999999997e-06, + "loss": 2.6362, + "step": 15 + }, + { + "epoch": 0.24, + "learning_rate": 3.1999999999999994e-06, + "loss": 2.6277, + "step": 16 + }, + { + "epoch": 0.25, + "learning_rate": 3.4e-06, + "loss": 2.6533, + "step": 17 + }, + { + "epoch": 0.27, + "learning_rate": 3.6e-06, + "loss": 2.645, + "step": 18 + }, + { + "epoch": 0.28, + "learning_rate": 3.7999999999999996e-06, + "loss": 2.644, + "step": 19 + }, + { + "epoch": 0.3, + "learning_rate": 4e-06, + "loss": 2.6494, + "step": 20 + }, + { + "epoch": 0.31, + "learning_rate": 4.2e-06, + "loss": 2.6442, + "step": 21 + }, + { + "epoch": 0.33, + "learning_rate": 4.399999999999999e-06, + "loss": 2.638, + "step": 22 + }, + { + "epoch": 0.34, + "learning_rate": 4.599999999999999e-06, + "loss": 2.6362, + "step": 23 + }, + { + "epoch": 0.36, + "learning_rate": 4.8e-06, + "loss": 2.6443, + "step": 24 + }, + { + "epoch": 0.37, + "learning_rate": 4.9999999999999996e-06, + "loss": 2.6295, + "step": 25 + }, + { + "epoch": 0.39, + "learning_rate": 5.199999999999999e-06, + "loss": 2.6204, + "step": 26 + }, + { + "epoch": 0.4, + "learning_rate": 5.399999999999999e-06, + "loss": 2.6425, + "step": 27 + }, + { + "epoch": 0.42, + "learning_rate": 5.6e-06, + "loss": 2.6329, + "step": 28 + }, + { + "epoch": 0.43, + "learning_rate": 5.7999999999999995e-06, + "loss": 2.6295, + "step": 29 + }, + { + "epoch": 0.45, + "learning_rate": 5.999999999999999e-06, + "loss": 2.6281, + "step": 30 + }, + { + "epoch": 0.46, + "learning_rate": 6.199999999999999e-06, + "loss": 2.6481, + "step": 31 + }, + { + "epoch": 0.48, + "learning_rate": 6.399999999999999e-06, + "loss": 2.6351, + "step": 32 + }, + { + "epoch": 0.49, + "learning_rate": 6.599999999999999e-06, + "loss": 2.6362, + "step": 33 + }, + { + "epoch": 0.51, + "learning_rate": 6.8e-06, + "loss": 2.6711, + "step": 34 + }, + { + "epoch": 0.52, + "learning_rate": 7e-06, + "loss": 2.6709, + "step": 35 + }, + { + "epoch": 0.54, + "learning_rate": 7.2e-06, + "loss": 2.6567, + "step": 36 + }, + { + "epoch": 0.55, + "learning_rate": 7.3999999999999995e-06, + "loss": 2.6572, + "step": 37 + }, + { + "epoch": 0.57, + "learning_rate": 7.599999999999999e-06, + "loss": 2.6388, + "step": 38 + }, + { + "epoch": 0.58, + "learning_rate": 7.799999999999998e-06, + "loss": 2.6439, + "step": 39 + }, + { + "epoch": 0.59, + "learning_rate": 8e-06, + "loss": 2.6409, + "step": 40 + }, + { + "epoch": 0.61, + "learning_rate": 8.2e-06, + "loss": 2.6417, + "step": 41 + }, + { + "epoch": 0.62, + "learning_rate": 8.4e-06, + "loss": 2.6226, + "step": 42 + }, + { + "epoch": 0.64, + "learning_rate": 8.599999999999999e-06, + "loss": 2.6615, + "step": 43 + }, + { + "epoch": 0.65, + "learning_rate": 8.799999999999999e-06, + "loss": 2.6323, + "step": 44 + }, + { + "epoch": 0.67, + "learning_rate": 8.999999999999999e-06, + "loss": 2.6342, + "step": 45 + }, + { + "epoch": 0.68, + "learning_rate": 9.199999999999998e-06, + "loss": 2.6494, + "step": 46 + }, + { + "epoch": 0.7, + "learning_rate": 9.399999999999998e-06, + "loss": 2.6318, + "step": 47 + }, + { + "epoch": 0.71, + "learning_rate": 9.6e-06, + "loss": 2.6485, + "step": 48 + }, + { + "epoch": 0.73, + "learning_rate": 9.799999999999998e-06, + "loss": 2.6277, + "step": 49 + }, + { + "epoch": 0.74, + "learning_rate": 9.999999999999999e-06, + "loss": 2.644, + "step": 50 + }, + { + "epoch": 0.76, + "learning_rate": 1.02e-05, + "loss": 2.6556, + "step": 51 + }, + { + "epoch": 0.77, + "learning_rate": 1.0399999999999999e-05, + "loss": 2.6486, + "step": 52 + }, + { + "epoch": 0.79, + "learning_rate": 1.06e-05, + "loss": 2.632, + "step": 53 + }, + { + "epoch": 0.8, + "learning_rate": 1.0799999999999998e-05, + "loss": 2.6526, + "step": 54 + }, + { + "epoch": 0.82, + "learning_rate": 1.1e-05, + "loss": 2.6667, + "step": 55 + }, + { + "epoch": 0.83, + "learning_rate": 1.12e-05, + "loss": 2.6432, + "step": 56 + }, + { + "epoch": 0.85, + "learning_rate": 1.14e-05, + "loss": 2.6647, + "step": 57 + }, + { + "epoch": 0.86, + "learning_rate": 1.1599999999999999e-05, + "loss": 2.6317, + "step": 58 + }, + { + "epoch": 0.88, + "learning_rate": 1.1799999999999999e-05, + "loss": 2.6349, + "step": 59 + }, + { + "epoch": 0.89, + "learning_rate": 1.1999999999999999e-05, + "loss": 2.6423, + "step": 60 + }, + { + "epoch": 0.91, + "learning_rate": 1.2199999999999998e-05, + "loss": 2.6268, + "step": 61 + }, + { + "epoch": 0.92, + "learning_rate": 1.2399999999999998e-05, + "loss": 2.6271, + "step": 62 + }, + { + "epoch": 0.94, + "learning_rate": 1.26e-05, + "loss": 2.603, + "step": 63 + }, + { + "epoch": 0.95, + "learning_rate": 1.2799999999999998e-05, + "loss": 2.5958, + "step": 64 + }, + { + "epoch": 0.97, + "learning_rate": 1.3e-05, + "loss": 2.6169, + "step": 65 + }, + { + "epoch": 0.98, + "learning_rate": 1.3199999999999997e-05, + "loss": 2.5735, + "step": 66 + }, + { + "epoch": 1.0, + "learning_rate": 1.3399999999999999e-05, + "loss": 2.6467, + "step": 67 + }, + { + "epoch": 1.01, + "learning_rate": 1.36e-05, + "loss": 3.3148, + "step": 68 + }, + { + "epoch": 1.03, + "learning_rate": 1.3799999999999998e-05, + "loss": 2.6297, + "step": 69 + }, + { + "epoch": 1.04, + "learning_rate": 1.4e-05, + "loss": 2.6577, + "step": 70 + }, + { + "epoch": 1.06, + "learning_rate": 1.4199999999999998e-05, + "loss": 2.6311, + "step": 71 + }, + { + "epoch": 1.07, + "learning_rate": 1.44e-05, + "loss": 2.616, + "step": 72 + }, + { + "epoch": 1.09, + "learning_rate": 1.4599999999999997e-05, + "loss": 2.6987, + "step": 73 + }, + { + "epoch": 1.1, + "learning_rate": 1.4799999999999999e-05, + "loss": 2.6268, + "step": 74 + }, + { + "epoch": 1.12, + "learning_rate": 1.4999999999999999e-05, + "loss": 2.565, + "step": 75 + }, + { + "epoch": 1.13, + "learning_rate": 1.5199999999999998e-05, + "loss": 2.6689, + "step": 76 + }, + { + "epoch": 1.15, + "learning_rate": 1.5399999999999998e-05, + "loss": 2.6792, + "step": 77 + }, + { + "epoch": 1.16, + "learning_rate": 1.5599999999999996e-05, + "loss": 2.5829, + "step": 78 + }, + { + "epoch": 1.18, + "learning_rate": 1.5799999999999998e-05, + "loss": 2.6033, + "step": 79 + }, + { + "epoch": 1.19, + "learning_rate": 1.6e-05, + "loss": 2.5901, + "step": 80 + }, + { + "epoch": 1.21, + "learning_rate": 1.6199999999999997e-05, + "loss": 2.5608, + "step": 81 + }, + { + "epoch": 1.22, + "learning_rate": 1.64e-05, + "loss": 2.5485, + "step": 82 + }, + { + "epoch": 1.24, + "learning_rate": 1.6599999999999997e-05, + "loss": 2.5484, + "step": 83 + }, + { + "epoch": 1.25, + "learning_rate": 1.68e-05, + "loss": 2.602, + "step": 84 + }, + { + "epoch": 1.27, + "learning_rate": 1.6999999999999996e-05, + "loss": 2.6658, + "step": 85 + }, + { + "epoch": 1.28, + "learning_rate": 1.7199999999999998e-05, + "loss": 2.6671, + "step": 86 + }, + { + "epoch": 1.3, + "learning_rate": 1.74e-05, + "loss": 2.676, + "step": 87 + }, + { + "epoch": 1.31, + "learning_rate": 1.7599999999999998e-05, + "loss": 2.6539, + "step": 88 + }, + { + "epoch": 1.33, + "learning_rate": 1.78e-05, + "loss": 2.6765, + "step": 89 + }, + { + "epoch": 1.34, + "learning_rate": 1.7999999999999997e-05, + "loss": 2.6133, + "step": 90 + }, + { + "epoch": 1.36, + "learning_rate": 1.82e-05, + "loss": 2.639, + "step": 91 + }, + { + "epoch": 1.37, + "learning_rate": 1.8399999999999997e-05, + "loss": 2.6448, + "step": 92 + }, + { + "epoch": 1.39, + "learning_rate": 1.8599999999999998e-05, + "loss": 2.6431, + "step": 93 + }, + { + "epoch": 1.4, + "learning_rate": 1.8799999999999996e-05, + "loss": 2.6401, + "step": 94 + }, + { + "epoch": 1.42, + "learning_rate": 1.9e-05, + "loss": 2.6407, + "step": 95 + }, + { + "epoch": 1.43, + "learning_rate": 1.92e-05, + "loss": 2.617, + "step": 96 + }, + { + "epoch": 1.45, + "learning_rate": 1.9399999999999997e-05, + "loss": 2.6529, + "step": 97 + }, + { + "epoch": 1.46, + "learning_rate": 1.9599999999999995e-05, + "loss": 2.6185, + "step": 98 + }, + { + "epoch": 1.48, + "learning_rate": 1.98e-05, + "loss": 2.6339, + "step": 99 + }, + { + "epoch": 1.49, + "learning_rate": 1.9999999999999998e-05, + "loss": 2.6058, + "step": 100 + }, + { + "epoch": 1.51, + "learning_rate": 2.0199999999999996e-05, + "loss": 2.6031, + "step": 101 + }, + { + "epoch": 1.52, + "learning_rate": 2.04e-05, + "loss": 2.6588, + "step": 102 + }, + { + "epoch": 1.54, + "learning_rate": 2.06e-05, + "loss": 2.6377, + "step": 103 + }, + { + "epoch": 1.55, + "learning_rate": 2.0799999999999997e-05, + "loss": 2.6863, + "step": 104 + }, + { + "epoch": 1.57, + "learning_rate": 2.1e-05, + "loss": 2.6617, + "step": 105 + }, + { + "epoch": 1.58, + "learning_rate": 2.12e-05, + "loss": 2.6473, + "step": 106 + }, + { + "epoch": 1.59, + "learning_rate": 2.14e-05, + "loss": 2.6711, + "step": 107 + }, + { + "epoch": 1.61, + "learning_rate": 2.1599999999999996e-05, + "loss": 2.6132, + "step": 108 + }, + { + "epoch": 1.62, + "learning_rate": 2.1799999999999998e-05, + "loss": 2.6237, + "step": 109 + }, + { + "epoch": 1.64, + "learning_rate": 2.2e-05, + "loss": 2.645, + "step": 110 + }, + { + "epoch": 1.65, + "learning_rate": 2.2199999999999998e-05, + "loss": 2.6069, + "step": 111 + }, + { + "epoch": 1.67, + "learning_rate": 2.24e-05, + "loss": 2.642, + "step": 112 + }, + { + "epoch": 1.68, + "learning_rate": 2.2599999999999997e-05, + "loss": 2.6126, + "step": 113 + }, + { + "epoch": 1.7, + "learning_rate": 2.28e-05, + "loss": 2.6048, + "step": 114 + }, + { + "epoch": 1.71, + "learning_rate": 2.2999999999999997e-05, + "loss": 2.6229, + "step": 115 + }, + { + "epoch": 1.73, + "learning_rate": 2.3199999999999998e-05, + "loss": 2.5935, + "step": 116 + }, + { + "epoch": 1.74, + "learning_rate": 2.34e-05, + "loss": 2.5616, + "step": 117 + }, + { + "epoch": 1.76, + "learning_rate": 2.3599999999999998e-05, + "loss": 2.6095, + "step": 118 + }, + { + "epoch": 1.77, + "learning_rate": 2.38e-05, + "loss": 2.5821, + "step": 119 + }, + { + "epoch": 1.79, + "learning_rate": 2.3999999999999997e-05, + "loss": 2.6188, + "step": 120 + }, + { + "epoch": 1.8, + "learning_rate": 2.42e-05, + "loss": 2.6443, + "step": 121 + }, + { + "epoch": 1.82, + "learning_rate": 2.4399999999999997e-05, + "loss": 2.6284, + "step": 122 + }, + { + "epoch": 1.83, + "learning_rate": 2.4599999999999998e-05, + "loss": 2.6273, + "step": 123 + }, + { + "epoch": 1.85, + "learning_rate": 2.4799999999999996e-05, + "loss": 2.6608, + "step": 124 + }, + { + "epoch": 1.86, + "learning_rate": 2.4999999999999998e-05, + "loss": 2.6192, + "step": 125 + }, + { + "epoch": 1.88, + "learning_rate": 2.52e-05, + "loss": 2.6312, + "step": 126 + }, + { + "epoch": 1.89, + "learning_rate": 2.5399999999999997e-05, + "loss": 2.6786, + "step": 127 + }, + { + "epoch": 1.91, + "learning_rate": 2.5599999999999995e-05, + "loss": 2.6266, + "step": 128 + }, + { + "epoch": 1.92, + "learning_rate": 2.5799999999999997e-05, + "loss": 2.6833, + "step": 129 + }, + { + "epoch": 1.94, + "learning_rate": 2.6e-05, + "loss": 2.6481, + "step": 130 + }, + { + "epoch": 1.95, + "learning_rate": 2.6199999999999996e-05, + "loss": 2.5106, + "step": 131 + }, + { + "epoch": 1.97, + "learning_rate": 2.6399999999999995e-05, + "loss": 2.5399, + "step": 132 + }, + { + "epoch": 1.98, + "learning_rate": 2.66e-05, + "loss": 2.5729, + "step": 133 + }, + { + "epoch": 2.0, + "learning_rate": 2.6799999999999998e-05, + "loss": 2.6409, + "step": 134 + }, + { + "epoch": 2.01, + "learning_rate": 2.6999999999999996e-05, + "loss": 3.1924, + "step": 135 + }, + { + "epoch": 2.03, + "learning_rate": 2.72e-05, + "loss": 2.5826, + "step": 136 + }, + { + "epoch": 2.04, + "learning_rate": 2.74e-05, + "loss": 2.6108, + "step": 137 + }, + { + "epoch": 2.06, + "learning_rate": 2.7599999999999997e-05, + "loss": 2.5618, + "step": 138 + }, + { + "epoch": 2.07, + "learning_rate": 2.7799999999999995e-05, + "loss": 2.6187, + "step": 139 + }, + { + "epoch": 2.09, + "learning_rate": 2.8e-05, + "loss": 2.5749, + "step": 140 + }, + { + "epoch": 2.1, + "learning_rate": 2.8199999999999998e-05, + "loss": 2.5772, + "step": 141 + }, + { + "epoch": 2.12, + "learning_rate": 2.8399999999999996e-05, + "loss": 2.6725, + "step": 142 + }, + { + "epoch": 2.13, + "learning_rate": 2.86e-05, + "loss": 2.5364, + "step": 143 + }, + { + "epoch": 2.15, + "learning_rate": 2.88e-05, + "loss": 2.6227, + "step": 144 + }, + { + "epoch": 2.16, + "learning_rate": 2.8999999999999997e-05, + "loss": 2.6355, + "step": 145 + }, + { + "epoch": 2.18, + "learning_rate": 2.9199999999999995e-05, + "loss": 2.4874, + "step": 146 + }, + { + "epoch": 2.19, + "learning_rate": 2.94e-05, + "loss": 2.5832, + "step": 147 + }, + { + "epoch": 2.21, + "learning_rate": 2.9599999999999998e-05, + "loss": 2.5628, + "step": 148 + }, + { + "epoch": 2.22, + "learning_rate": 2.9799999999999996e-05, + "loss": 2.5375, + "step": 149 + }, + { + "epoch": 2.24, + "learning_rate": 2.9999999999999997e-05, + "loss": 2.5401, + "step": 150 + }, + { + "epoch": 2.25, + "learning_rate": 3.02e-05, + "loss": 2.6058, + "step": 151 + }, + { + "epoch": 2.27, + "learning_rate": 3.0399999999999997e-05, + "loss": 2.6267, + "step": 152 + }, + { + "epoch": 2.28, + "learning_rate": 3.06e-05, + "loss": 2.5839, + "step": 153 + }, + { + "epoch": 2.3, + "learning_rate": 3.0799999999999996e-05, + "loss": 2.6529, + "step": 154 + }, + { + "epoch": 2.31, + "learning_rate": 3.0999999999999995e-05, + "loss": 2.6135, + "step": 155 + }, + { + "epoch": 2.33, + "learning_rate": 3.119999999999999e-05, + "loss": 2.6257, + "step": 156 + }, + { + "epoch": 2.34, + "learning_rate": 3.14e-05, + "loss": 2.7242, + "step": 157 + }, + { + "epoch": 2.36, + "learning_rate": 3.1599999999999996e-05, + "loss": 2.6371, + "step": 158 + }, + { + "epoch": 2.37, + "learning_rate": 3.1799999999999994e-05, + "loss": 2.5878, + "step": 159 + }, + { + "epoch": 2.39, + "learning_rate": 3.2e-05, + "loss": 2.6008, + "step": 160 + }, + { + "epoch": 2.4, + "learning_rate": 3.22e-05, + "loss": 2.5713, + "step": 161 + }, + { + "epoch": 2.42, + "learning_rate": 3.2399999999999995e-05, + "loss": 2.6273, + "step": 162 + }, + { + "epoch": 2.43, + "learning_rate": 3.259999999999999e-05, + "loss": 2.5838, + "step": 163 + }, + { + "epoch": 2.45, + "learning_rate": 3.28e-05, + "loss": 2.5846, + "step": 164 + }, + { + "epoch": 2.46, + "learning_rate": 3.2999999999999996e-05, + "loss": 2.562, + "step": 165 + }, + { + "epoch": 2.48, + "learning_rate": 3.3199999999999994e-05, + "loss": 2.5598, + "step": 166 + }, + { + "epoch": 2.49, + "learning_rate": 3.34e-05, + "loss": 2.4896, + "step": 167 + }, + { + "epoch": 2.51, + "learning_rate": 3.36e-05, + "loss": 2.5917, + "step": 168 + }, + { + "epoch": 2.52, + "learning_rate": 3.3799999999999995e-05, + "loss": 2.6609, + "step": 169 + }, + { + "epoch": 2.54, + "learning_rate": 3.399999999999999e-05, + "loss": 2.653, + "step": 170 + }, + { + "epoch": 2.55, + "learning_rate": 3.42e-05, + "loss": 2.6373, + "step": 171 + }, + { + "epoch": 2.57, + "learning_rate": 3.4399999999999996e-05, + "loss": 2.6079, + "step": 172 + }, + { + "epoch": 2.58, + "learning_rate": 3.4599999999999994e-05, + "loss": 2.616, + "step": 173 + }, + { + "epoch": 2.59, + "learning_rate": 3.48e-05, + "loss": 2.6555, + "step": 174 + }, + { + "epoch": 2.61, + "learning_rate": 3.5e-05, + "loss": 2.6044, + "step": 175 + }, + { + "epoch": 2.62, + "learning_rate": 3.5199999999999995e-05, + "loss": 2.6301, + "step": 176 + }, + { + "epoch": 2.64, + "learning_rate": 3.539999999999999e-05, + "loss": 2.6324, + "step": 177 + }, + { + "epoch": 2.65, + "learning_rate": 3.56e-05, + "loss": 2.5571, + "step": 178 + }, + { + "epoch": 2.67, + "learning_rate": 3.5799999999999996e-05, + "loss": 2.5637, + "step": 179 + }, + { + "epoch": 2.68, + "learning_rate": 3.5999999999999994e-05, + "loss": 2.4853, + "step": 180 + }, + { + "epoch": 2.7, + "learning_rate": 3.62e-05, + "loss": 2.5275, + "step": 181 + }, + { + "epoch": 2.71, + "learning_rate": 3.64e-05, + "loss": 2.4241, + "step": 182 + }, + { + "epoch": 2.73, + "learning_rate": 3.6599999999999995e-05, + "loss": 2.4874, + "step": 183 + }, + { + "epoch": 2.74, + "learning_rate": 3.679999999999999e-05, + "loss": 2.6265, + "step": 184 + }, + { + "epoch": 2.76, + "learning_rate": 3.7e-05, + "loss": 2.5432, + "step": 185 + }, + { + "epoch": 2.77, + "learning_rate": 3.7199999999999996e-05, + "loss": 2.6158, + "step": 186 + }, + { + "epoch": 2.79, + "learning_rate": 3.7399999999999994e-05, + "loss": 2.5841, + "step": 187 + }, + { + "epoch": 2.8, + "learning_rate": 3.759999999999999e-05, + "loss": 2.5802, + "step": 188 + }, + { + "epoch": 2.82, + "learning_rate": 3.78e-05, + "loss": 2.5986, + "step": 189 + }, + { + "epoch": 2.83, + "learning_rate": 3.8e-05, + "loss": 2.499, + "step": 190 + }, + { + "epoch": 2.85, + "learning_rate": 3.8199999999999993e-05, + "loss": 2.6219, + "step": 191 + }, + { + "epoch": 2.86, + "learning_rate": 3.84e-05, + "loss": 2.6017, + "step": 192 + }, + { + "epoch": 2.88, + "learning_rate": 3.86e-05, + "loss": 2.5413, + "step": 193 + }, + { + "epoch": 2.89, + "learning_rate": 3.8799999999999994e-05, + "loss": 2.5604, + "step": 194 + }, + { + "epoch": 2.91, + "learning_rate": 3.9e-05, + "loss": 2.5409, + "step": 195 + }, + { + "epoch": 2.92, + "learning_rate": 3.919999999999999e-05, + "loss": 2.5019, + "step": 196 + }, + { + "epoch": 2.94, + "learning_rate": 3.9399999999999995e-05, + "loss": 2.5966, + "step": 197 + }, + { + "epoch": 2.95, + "learning_rate": 3.96e-05, + "loss": 2.5088, + "step": 198 + }, + { + "epoch": 2.97, + "learning_rate": 3.979999999999999e-05, + "loss": 2.5385, + "step": 199 + }, + { + "epoch": 2.98, + "learning_rate": 3.9999999999999996e-05, + "loss": 2.3561, + "step": 200 + }, + { + "epoch": 2.98, + "eval_accuracy": 0.13338228095937346, + "eval_f1": 0.06813158475964522, + "eval_loss": 2.5464296340942383, + "eval_runtime": 349.0, + "eval_samples_per_second": 11.708, + "eval_steps_per_second": 0.734, + "step": 200 + }, + { + "epoch": 3.0, + "learning_rate": 4.02e-05, + "loss": 2.4665, + "step": 201 + }, + { + "epoch": 3.01, + "learning_rate": 4.039999999999999e-05, + "loss": 3.2467, + "step": 202 + }, + { + "epoch": 3.03, + "learning_rate": 4.06e-05, + "loss": 2.5004, + "step": 203 + }, + { + "epoch": 3.04, + "learning_rate": 4.08e-05, + "loss": 2.4464, + "step": 204 + }, + { + "epoch": 3.06, + "learning_rate": 4.0999999999999994e-05, + "loss": 2.5131, + "step": 205 + }, + { + "epoch": 3.07, + "learning_rate": 4.12e-05, + "loss": 2.6149, + "step": 206 + }, + { + "epoch": 3.09, + "learning_rate": 4.14e-05, + "loss": 2.4677, + "step": 207 + }, + { + "epoch": 3.1, + "learning_rate": 4.1599999999999995e-05, + "loss": 2.5008, + "step": 208 + }, + { + "epoch": 3.12, + "learning_rate": 4.18e-05, + "loss": 2.5037, + "step": 209 + }, + { + "epoch": 3.13, + "learning_rate": 4.2e-05, + "loss": 2.4629, + "step": 210 + }, + { + "epoch": 3.15, + "learning_rate": 4.2199999999999996e-05, + "loss": 2.4151, + "step": 211 + }, + { + "epoch": 3.16, + "learning_rate": 4.24e-05, + "loss": 2.3735, + "step": 212 + }, + { + "epoch": 3.18, + "learning_rate": 4.259999999999999e-05, + "loss": 2.4441, + "step": 213 + }, + { + "epoch": 3.19, + "learning_rate": 4.28e-05, + "loss": 2.432, + "step": 214 + }, + { + "epoch": 3.21, + "learning_rate": 4.3e-05, + "loss": 2.4883, + "step": 215 + }, + { + "epoch": 3.22, + "learning_rate": 4.319999999999999e-05, + "loss": 2.4319, + "step": 216 + }, + { + "epoch": 3.24, + "learning_rate": 4.34e-05, + "loss": 2.4026, + "step": 217 + }, + { + "epoch": 3.25, + "learning_rate": 4.3599999999999996e-05, + "loss": 2.357, + "step": 218 + }, + { + "epoch": 3.27, + "learning_rate": 4.3799999999999994e-05, + "loss": 2.5772, + "step": 219 + }, + { + "epoch": 3.28, + "learning_rate": 4.4e-05, + "loss": 2.3947, + "step": 220 + }, + { + "epoch": 3.3, + "learning_rate": 4.42e-05, + "loss": 2.4569, + "step": 221 + }, + { + "epoch": 3.31, + "learning_rate": 4.4399999999999995e-05, + "loss": 2.3699, + "step": 222 + }, + { + "epoch": 3.33, + "learning_rate": 4.46e-05, + "loss": 2.7355, + "step": 223 + }, + { + "epoch": 3.34, + "learning_rate": 4.48e-05, + "loss": 2.7759, + "step": 224 + }, + { + "epoch": 3.36, + "learning_rate": 4.4999999999999996e-05, + "loss": 2.6568, + "step": 225 + }, + { + "epoch": 3.37, + "learning_rate": 4.5199999999999994e-05, + "loss": 2.5626, + "step": 226 + }, + { + "epoch": 3.39, + "learning_rate": 4.539999999999999e-05, + "loss": 2.282, + "step": 227 + }, + { + "epoch": 3.4, + "learning_rate": 4.56e-05, + "loss": 2.3982, + "step": 228 + }, + { + "epoch": 3.42, + "learning_rate": 4.5799999999999995e-05, + "loss": 2.4393, + "step": 229 + }, + { + "epoch": 3.43, + "learning_rate": 4.599999999999999e-05, + "loss": 2.5938, + "step": 230 + }, + { + "epoch": 3.45, + "learning_rate": 4.62e-05, + "loss": 2.6461, + "step": 231 + }, + { + "epoch": 3.46, + "learning_rate": 4.6399999999999996e-05, + "loss": 2.327, + "step": 232 + }, + { + "epoch": 3.48, + "learning_rate": 4.6599999999999994e-05, + "loss": 2.3971, + "step": 233 + }, + { + "epoch": 3.49, + "learning_rate": 4.68e-05, + "loss": 2.515, + "step": 234 + }, + { + "epoch": 3.51, + "learning_rate": 4.7e-05, + "loss": 2.505, + "step": 235 + }, + { + "epoch": 3.52, + "learning_rate": 4.7199999999999995e-05, + "loss": 2.5023, + "step": 236 + }, + { + "epoch": 3.54, + "learning_rate": 4.7399999999999993e-05, + "loss": 2.4241, + "step": 237 + }, + { + "epoch": 3.55, + "learning_rate": 4.76e-05, + "loss": 2.4735, + "step": 238 + }, + { + "epoch": 3.57, + "learning_rate": 4.7799999999999996e-05, + "loss": 2.503, + "step": 239 + }, + { + "epoch": 3.58, + "learning_rate": 4.7999999999999994e-05, + "loss": 2.468, + "step": 240 + }, + { + "epoch": 3.59, + "learning_rate": 4.82e-05, + "loss": 2.4701, + "step": 241 + }, + { + "epoch": 3.61, + "learning_rate": 4.84e-05, + "loss": 2.4992, + "step": 242 + }, + { + "epoch": 3.62, + "learning_rate": 4.8599999999999995e-05, + "loss": 2.557, + "step": 243 + }, + { + "epoch": 3.64, + "learning_rate": 4.8799999999999994e-05, + "loss": 2.4359, + "step": 244 + }, + { + "epoch": 3.65, + "learning_rate": 4.899999999999999e-05, + "loss": 2.508, + "step": 245 + }, + { + "epoch": 3.67, + "learning_rate": 4.9199999999999997e-05, + "loss": 2.4404, + "step": 246 + }, + { + "epoch": 3.68, + "learning_rate": 4.9399999999999995e-05, + "loss": 2.2592, + "step": 247 + }, + { + "epoch": 3.7, + "learning_rate": 4.959999999999999e-05, + "loss": 2.4594, + "step": 248 + }, + { + "epoch": 3.71, + "learning_rate": 4.98e-05, + "loss": 2.3305, + "step": 249 + }, + { + "epoch": 3.73, + "learning_rate": 4.9999999999999996e-05, + "loss": 2.3883, + "step": 250 + }, + { + "epoch": 3.74, + "learning_rate": 5.0199999999999994e-05, + "loss": 2.2801, + "step": 251 + }, + { + "epoch": 3.76, + "learning_rate": 5.04e-05, + "loss": 2.4757, + "step": 252 + }, + { + "epoch": 3.77, + "learning_rate": 5.06e-05, + "loss": 2.4579, + "step": 253 + }, + { + "epoch": 3.79, + "learning_rate": 5.0799999999999995e-05, + "loss": 2.3734, + "step": 254 + }, + { + "epoch": 3.8, + "learning_rate": 5.1e-05, + "loss": 2.3411, + "step": 255 + }, + { + "epoch": 3.82, + "learning_rate": 5.119999999999999e-05, + "loss": 2.3687, + "step": 256 + }, + { + "epoch": 3.83, + "learning_rate": 5.1399999999999996e-05, + "loss": 2.4481, + "step": 257 + }, + { + "epoch": 3.85, + "learning_rate": 5.1599999999999994e-05, + "loss": 2.471, + "step": 258 + }, + { + "epoch": 3.86, + "learning_rate": 5.179999999999999e-05, + "loss": 2.4718, + "step": 259 + }, + { + "epoch": 3.88, + "learning_rate": 5.2e-05, + "loss": 2.4932, + "step": 260 + }, + { + "epoch": 3.89, + "learning_rate": 5.2199999999999995e-05, + "loss": 2.3567, + "step": 261 + }, + { + "epoch": 3.91, + "learning_rate": 5.239999999999999e-05, + "loss": 2.3381, + "step": 262 + }, + { + "epoch": 3.92, + "learning_rate": 5.26e-05, + "loss": 2.3708, + "step": 263 + }, + { + "epoch": 3.94, + "learning_rate": 5.279999999999999e-05, + "loss": 2.4944, + "step": 264 + }, + { + "epoch": 3.95, + "learning_rate": 5.2999999999999994e-05, + "loss": 2.3488, + "step": 265 + }, + { + "epoch": 3.97, + "learning_rate": 5.32e-05, + "loss": 2.4171, + "step": 266 + }, + { + "epoch": 3.98, + "learning_rate": 5.339999999999999e-05, + "loss": 2.3411, + "step": 267 + }, + { + "epoch": 4.0, + "learning_rate": 5.3599999999999995e-05, + "loss": 2.3492, + "step": 268 + }, + { + "epoch": 4.01, + "learning_rate": 5.38e-05, + "loss": 3.0346, + "step": 269 + }, + { + "epoch": 4.03, + "learning_rate": 5.399999999999999e-05, + "loss": 2.2454, + "step": 270 + }, + { + "epoch": 4.04, + "learning_rate": 5.4199999999999996e-05, + "loss": 2.2054, + "step": 271 + }, + { + "epoch": 4.06, + "learning_rate": 5.44e-05, + "loss": 2.2441, + "step": 272 + }, + { + "epoch": 4.07, + "learning_rate": 5.459999999999999e-05, + "loss": 2.3293, + "step": 273 + }, + { + "epoch": 4.09, + "learning_rate": 5.48e-05, + "loss": 2.2064, + "step": 274 + }, + { + "epoch": 4.1, + "learning_rate": 5.499999999999999e-05, + "loss": 2.3777, + "step": 275 + }, + { + "epoch": 4.12, + "learning_rate": 5.519999999999999e-05, + "loss": 2.1567, + "step": 276 + }, + { + "epoch": 4.13, + "learning_rate": 5.54e-05, + "loss": 2.1665, + "step": 277 + }, + { + "epoch": 4.15, + "learning_rate": 5.559999999999999e-05, + "loss": 2.249, + "step": 278 + }, + { + "epoch": 4.16, + "learning_rate": 5.5799999999999994e-05, + "loss": 2.1989, + "step": 279 + }, + { + "epoch": 4.18, + "learning_rate": 5.6e-05, + "loss": 2.2401, + "step": 280 + }, + { + "epoch": 4.19, + "learning_rate": 5.619999999999999e-05, + "loss": 2.1298, + "step": 281 + }, + { + "epoch": 4.21, + "learning_rate": 5.6399999999999995e-05, + "loss": 2.2022, + "step": 282 + }, + { + "epoch": 4.22, + "learning_rate": 5.66e-05, + "loss": 2.0053, + "step": 283 + }, + { + "epoch": 4.24, + "learning_rate": 5.679999999999999e-05, + "loss": 2.1011, + "step": 284 + }, + { + "epoch": 4.25, + "learning_rate": 5.6999999999999996e-05, + "loss": 2.0889, + "step": 285 + }, + { + "epoch": 4.27, + "learning_rate": 5.72e-05, + "loss": 2.2418, + "step": 286 + }, + { + "epoch": 4.28, + "learning_rate": 5.739999999999999e-05, + "loss": 2.1132, + "step": 287 + }, + { + "epoch": 4.3, + "learning_rate": 5.76e-05, + "loss": 2.2312, + "step": 288 + }, + { + "epoch": 4.31, + "learning_rate": 5.78e-05, + "loss": 1.9977, + "step": 289 + }, + { + "epoch": 4.33, + "learning_rate": 5.7999999999999994e-05, + "loss": 2.1724, + "step": 290 + }, + { + "epoch": 4.34, + "learning_rate": 5.82e-05, + "loss": 2.1606, + "step": 291 + }, + { + "epoch": 4.36, + "learning_rate": 5.839999999999999e-05, + "loss": 2.1853, + "step": 292 + }, + { + "epoch": 4.37, + "learning_rate": 5.8599999999999995e-05, + "loss": 2.1057, + "step": 293 + }, + { + "epoch": 4.39, + "learning_rate": 5.88e-05, + "loss": 2.0778, + "step": 294 + }, + { + "epoch": 4.4, + "learning_rate": 5.899999999999999e-05, + "loss": 1.967, + "step": 295 + }, + { + "epoch": 4.42, + "learning_rate": 5.9199999999999996e-05, + "loss": 2.3167, + "step": 296 + }, + { + "epoch": 4.43, + "learning_rate": 5.94e-05, + "loss": 2.0744, + "step": 297 + }, + { + "epoch": 4.45, + "learning_rate": 5.959999999999999e-05, + "loss": 2.1545, + "step": 298 + }, + { + "epoch": 4.46, + "learning_rate": 5.98e-05, + "loss": 2.2421, + "step": 299 + }, + { + "epoch": 4.48, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.9392, + "step": 300 + }, + { + "epoch": 4.49, + "learning_rate": 6.019999999999999e-05, + "loss": 1.8914, + "step": 301 + }, + { + "epoch": 4.51, + "learning_rate": 6.04e-05, + "loss": 2.2063, + "step": 302 + }, + { + "epoch": 4.52, + "learning_rate": 6.0599999999999996e-05, + "loss": 2.0361, + "step": 303 + }, + { + "epoch": 4.54, + "learning_rate": 6.0799999999999994e-05, + "loss": 2.1857, + "step": 304 + }, + { + "epoch": 4.55, + "learning_rate": 6.1e-05, + "loss": 1.8373, + "step": 305 + }, + { + "epoch": 4.57, + "learning_rate": 6.12e-05, + "loss": 1.9555, + "step": 306 + }, + { + "epoch": 4.58, + "learning_rate": 6.139999999999999e-05, + "loss": 2.0524, + "step": 307 + }, + { + "epoch": 4.59, + "learning_rate": 6.159999999999999e-05, + "loss": 1.9854, + "step": 308 + }, + { + "epoch": 4.61, + "learning_rate": 6.18e-05, + "loss": 1.9668, + "step": 309 + }, + { + "epoch": 4.62, + "learning_rate": 6.199999999999999e-05, + "loss": 1.8757, + "step": 310 + }, + { + "epoch": 4.64, + "learning_rate": 6.22e-05, + "loss": 1.7739, + "step": 311 + }, + { + "epoch": 4.65, + "learning_rate": 6.239999999999999e-05, + "loss": 1.9759, + "step": 312 + }, + { + "epoch": 4.67, + "learning_rate": 6.259999999999999e-05, + "loss": 1.8669, + "step": 313 + }, + { + "epoch": 4.68, + "learning_rate": 6.28e-05, + "loss": 1.9185, + "step": 314 + }, + { + "epoch": 4.7, + "learning_rate": 6.299999999999999e-05, + "loss": 1.8514, + "step": 315 + }, + { + "epoch": 4.71, + "learning_rate": 6.319999999999999e-05, + "loss": 1.5974, + "step": 316 + }, + { + "epoch": 4.73, + "learning_rate": 6.34e-05, + "loss": 1.6711, + "step": 317 + }, + { + "epoch": 4.74, + "learning_rate": 6.359999999999999e-05, + "loss": 1.8531, + "step": 318 + }, + { + "epoch": 4.76, + "learning_rate": 6.379999999999999e-05, + "loss": 2.0521, + "step": 319 + }, + { + "epoch": 4.77, + "learning_rate": 6.4e-05, + "loss": 1.7659, + "step": 320 + }, + { + "epoch": 4.79, + "learning_rate": 6.419999999999999e-05, + "loss": 1.9653, + "step": 321 + }, + { + "epoch": 4.8, + "learning_rate": 6.44e-05, + "loss": 1.9988, + "step": 322 + }, + { + "epoch": 4.82, + "learning_rate": 6.459999999999998e-05, + "loss": 2.0249, + "step": 323 + }, + { + "epoch": 4.83, + "learning_rate": 6.479999999999999e-05, + "loss": 1.783, + "step": 324 + }, + { + "epoch": 4.85, + "learning_rate": 6.5e-05, + "loss": 2.1492, + "step": 325 + }, + { + "epoch": 4.86, + "learning_rate": 6.519999999999999e-05, + "loss": 1.7947, + "step": 326 + }, + { + "epoch": 4.88, + "learning_rate": 6.539999999999999e-05, + "loss": 1.9124, + "step": 327 + }, + { + "epoch": 4.89, + "learning_rate": 6.56e-05, + "loss": 1.7927, + "step": 328 + }, + { + "epoch": 4.91, + "learning_rate": 6.579999999999999e-05, + "loss": 1.7357, + "step": 329 + }, + { + "epoch": 4.92, + "learning_rate": 6.599999999999999e-05, + "loss": 1.733, + "step": 330 + }, + { + "epoch": 4.94, + "learning_rate": 6.62e-05, + "loss": 2.0315, + "step": 331 + }, + { + "epoch": 4.95, + "learning_rate": 6.639999999999999e-05, + "loss": 1.9838, + "step": 332 + }, + { + "epoch": 4.97, + "learning_rate": 6.659999999999999e-05, + "loss": 1.9308, + "step": 333 + }, + { + "epoch": 4.98, + "learning_rate": 6.68e-05, + "loss": 1.8901, + "step": 334 + }, + { + "epoch": 5.0, + "learning_rate": 6.699999999999999e-05, + "loss": 1.9154, + "step": 335 + }, + { + "epoch": 5.01, + "learning_rate": 6.72e-05, + "loss": 2.1272, + "step": 336 + }, + { + "epoch": 5.03, + "learning_rate": 6.739999999999998e-05, + "loss": 1.7548, + "step": 337 + }, + { + "epoch": 5.04, + "learning_rate": 6.759999999999999e-05, + "loss": 1.6528, + "step": 338 + }, + { + "epoch": 5.06, + "learning_rate": 6.78e-05, + "loss": 1.7716, + "step": 339 + }, + { + "epoch": 5.07, + "learning_rate": 6.799999999999999e-05, + "loss": 1.4649, + "step": 340 + }, + { + "epoch": 5.09, + "learning_rate": 6.819999999999999e-05, + "loss": 1.6351, + "step": 341 + }, + { + "epoch": 5.1, + "learning_rate": 6.84e-05, + "loss": 1.534, + "step": 342 + }, + { + "epoch": 5.12, + "learning_rate": 6.859999999999999e-05, + "loss": 1.6599, + "step": 343 + }, + { + "epoch": 5.13, + "learning_rate": 6.879999999999999e-05, + "loss": 1.6582, + "step": 344 + }, + { + "epoch": 5.15, + "learning_rate": 6.9e-05, + "loss": 1.8148, + "step": 345 + }, + { + "epoch": 5.16, + "learning_rate": 6.919999999999999e-05, + "loss": 1.3586, + "step": 346 + }, + { + "epoch": 5.18, + "learning_rate": 6.939999999999999e-05, + "loss": 1.8631, + "step": 347 + }, + { + "epoch": 5.19, + "learning_rate": 6.96e-05, + "loss": 1.785, + "step": 348 + }, + { + "epoch": 5.21, + "learning_rate": 6.979999999999999e-05, + "loss": 1.913, + "step": 349 + }, + { + "epoch": 5.22, + "learning_rate": 6.979999999999999e-05, + "loss": 1.6482, + "step": 350 + }, + { + "epoch": 5.24, + "learning_rate": 7e-05, + "loss": 1.5916, + "step": 351 + }, + { + "epoch": 5.25, + "learning_rate": 7.02e-05, + "loss": 1.6887, + "step": 352 + }, + { + "epoch": 5.27, + "learning_rate": 7.039999999999999e-05, + "loss": 1.6632, + "step": 353 + }, + { + "epoch": 5.28, + "learning_rate": 7.06e-05, + "loss": 1.4964, + "step": 354 + }, + { + "epoch": 5.3, + "learning_rate": 7.079999999999999e-05, + "loss": 1.8404, + "step": 355 + }, + { + "epoch": 5.31, + "learning_rate": 7.099999999999999e-05, + "loss": 1.6802, + "step": 356 + }, + { + "epoch": 5.33, + "learning_rate": 7.12e-05, + "loss": 1.7256, + "step": 357 + }, + { + "epoch": 5.34, + "learning_rate": 7.139999999999999e-05, + "loss": 1.7233, + "step": 358 + }, + { + "epoch": 5.36, + "learning_rate": 7.159999999999999e-05, + "loss": 1.7826, + "step": 359 + }, + { + "epoch": 5.37, + "learning_rate": 7.18e-05, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 5.39, + "learning_rate": 7.199999999999999e-05, + "loss": 1.5327, + "step": 361 + }, + { + "epoch": 5.4, + "learning_rate": 7.219999999999999e-05, + "loss": 1.4375, + "step": 362 + }, + { + "epoch": 5.42, + "learning_rate": 7.24e-05, + "loss": 1.6509, + "step": 363 + }, + { + "epoch": 5.43, + "learning_rate": 7.259999999999999e-05, + "loss": 1.628, + "step": 364 + }, + { + "epoch": 5.45, + "learning_rate": 7.28e-05, + "loss": 1.5856, + "step": 365 + }, + { + "epoch": 5.46, + "learning_rate": 7.3e-05, + "loss": 1.4559, + "step": 366 + }, + { + "epoch": 5.48, + "learning_rate": 7.319999999999999e-05, + "loss": 1.7826, + "step": 367 + }, + { + "epoch": 5.49, + "learning_rate": 7.34e-05, + "loss": 1.5844, + "step": 368 + }, + { + "epoch": 5.51, + "learning_rate": 7.359999999999999e-05, + "loss": 1.7874, + "step": 369 + }, + { + "epoch": 5.52, + "learning_rate": 7.379999999999999e-05, + "loss": 1.4583, + "step": 370 + }, + { + "epoch": 5.54, + "learning_rate": 7.4e-05, + "loss": 1.7617, + "step": 371 + }, + { + "epoch": 5.55, + "learning_rate": 7.419999999999999e-05, + "loss": 1.445, + "step": 372 + }, + { + "epoch": 5.57, + "learning_rate": 7.439999999999999e-05, + "loss": 1.5213, + "step": 373 + }, + { + "epoch": 5.58, + "learning_rate": 7.46e-05, + "loss": 1.6538, + "step": 374 + }, + { + "epoch": 5.59, + "learning_rate": 7.479999999999999e-05, + "loss": 1.642, + "step": 375 + }, + { + "epoch": 5.61, + "learning_rate": 7.5e-05, + "loss": 1.5272, + "step": 376 + }, + { + "epoch": 5.62, + "learning_rate": 7.519999999999998e-05, + "loss": 1.615, + "step": 377 + }, + { + "epoch": 5.64, + "learning_rate": 7.54e-05, + "loss": 1.4302, + "step": 378 + }, + { + "epoch": 5.65, + "learning_rate": 7.56e-05, + "loss": 1.8705, + "step": 379 + }, + { + "epoch": 5.67, + "learning_rate": 7.579999999999999e-05, + "loss": 1.7639, + "step": 380 + }, + { + "epoch": 5.68, + "learning_rate": 7.6e-05, + "loss": 1.3727, + "step": 381 + }, + { + "epoch": 5.7, + "learning_rate": 7.62e-05, + "loss": 1.7963, + "step": 382 + }, + { + "epoch": 5.71, + "learning_rate": 7.639999999999999e-05, + "loss": 1.3527, + "step": 383 + }, + { + "epoch": 5.73, + "learning_rate": 7.66e-05, + "loss": 1.3017, + "step": 384 + }, + { + "epoch": 5.74, + "learning_rate": 7.68e-05, + "loss": 1.5047, + "step": 385 + }, + { + "epoch": 5.76, + "learning_rate": 7.699999999999999e-05, + "loss": 1.6173, + "step": 386 + }, + { + "epoch": 5.77, + "learning_rate": 7.72e-05, + "loss": 1.3575, + "step": 387 + }, + { + "epoch": 5.79, + "learning_rate": 7.74e-05, + "loss": 1.6024, + "step": 388 + }, + { + "epoch": 5.8, + "learning_rate": 7.759999999999999e-05, + "loss": 1.3431, + "step": 389 + }, + { + "epoch": 5.82, + "learning_rate": 7.780000000000001e-05, + "loss": 1.6089, + "step": 390 + }, + { + "epoch": 5.83, + "learning_rate": 7.8e-05, + "loss": 1.6595, + "step": 391 + }, + { + "epoch": 5.85, + "learning_rate": 7.819999999999999e-05, + "loss": 1.4502, + "step": 392 + }, + { + "epoch": 5.86, + "learning_rate": 7.839999999999998e-05, + "loss": 1.7518, + "step": 393 + }, + { + "epoch": 5.88, + "learning_rate": 7.86e-05, + "loss": 1.4151, + "step": 394 + }, + { + "epoch": 5.89, + "learning_rate": 7.879999999999999e-05, + "loss": 1.4656, + "step": 395 + }, + { + "epoch": 5.91, + "learning_rate": 7.899999999999998e-05, + "loss": 1.6396, + "step": 396 + }, + { + "epoch": 5.92, + "learning_rate": 7.92e-05, + "loss": 1.3383, + "step": 397 + }, + { + "epoch": 5.94, + "learning_rate": 7.939999999999999e-05, + "loss": 1.5093, + "step": 398 + }, + { + "epoch": 5.95, + "learning_rate": 7.959999999999998e-05, + "loss": 1.3226, + "step": 399 + }, + { + "epoch": 5.97, + "learning_rate": 7.98e-05, + "loss": 1.1851, + "step": 400 + }, + { + "epoch": 5.97, + "eval_accuracy": 0.5861478218306412, + "eval_f1": 0.5583492858181879, + "eval_loss": 1.5055691003799438, + "eval_runtime": 343.8006, + "eval_samples_per_second": 11.885, + "eval_steps_per_second": 0.745, + "step": 400 + }, + { + "epoch": 5.98, + "learning_rate": 7.999999999999999e-05, + "loss": 1.3192, + "step": 401 + }, + { + "epoch": 6.0, + "learning_rate": 8.019999999999998e-05, + "loss": 1.6715, + "step": 402 + }, + { + "epoch": 6.01, + "learning_rate": 8.04e-05, + "loss": 1.7022, + "step": 403 + }, + { + "epoch": 6.03, + "learning_rate": 8.06e-05, + "loss": 1.2076, + "step": 404 + }, + { + "epoch": 6.04, + "learning_rate": 8.079999999999999e-05, + "loss": 1.451, + "step": 405 + }, + { + "epoch": 6.06, + "learning_rate": 8.1e-05, + "loss": 1.811, + "step": 406 + }, + { + "epoch": 6.07, + "learning_rate": 8.12e-05, + "loss": 1.6629, + "step": 407 + }, + { + "epoch": 6.09, + "learning_rate": 8.139999999999999e-05, + "loss": 1.3388, + "step": 408 + }, + { + "epoch": 6.1, + "learning_rate": 8.16e-05, + "loss": 1.5293, + "step": 409 + }, + { + "epoch": 6.12, + "learning_rate": 8.18e-05, + "loss": 1.4277, + "step": 410 + }, + { + "epoch": 6.13, + "learning_rate": 8.199999999999999e-05, + "loss": 1.195, + "step": 411 + }, + { + "epoch": 6.15, + "learning_rate": 8.22e-05, + "loss": 1.461, + "step": 412 + }, + { + "epoch": 6.16, + "learning_rate": 8.24e-05, + "loss": 1.5956, + "step": 413 + }, + { + "epoch": 6.18, + "learning_rate": 8.259999999999999e-05, + "loss": 1.2268, + "step": 414 + }, + { + "epoch": 6.19, + "learning_rate": 8.28e-05, + "loss": 1.6036, + "step": 415 + }, + { + "epoch": 6.21, + "learning_rate": 8.3e-05, + "loss": 1.4096, + "step": 416 + }, + { + "epoch": 6.22, + "learning_rate": 8.319999999999999e-05, + "loss": 1.3096, + "step": 417 + }, + { + "epoch": 6.24, + "learning_rate": 8.34e-05, + "loss": 1.1763, + "step": 418 + }, + { + "epoch": 6.25, + "learning_rate": 8.36e-05, + "loss": 1.5893, + "step": 419 + }, + { + "epoch": 6.27, + "learning_rate": 8.379999999999999e-05, + "loss": 1.2182, + "step": 420 + }, + { + "epoch": 6.28, + "learning_rate": 8.4e-05, + "loss": 1.243, + "step": 421 + }, + { + "epoch": 6.3, + "learning_rate": 8.42e-05, + "loss": 1.5265, + "step": 422 + }, + { + "epoch": 6.31, + "learning_rate": 8.439999999999999e-05, + "loss": 1.4368, + "step": 423 + }, + { + "epoch": 6.33, + "learning_rate": 8.459999999999998e-05, + "loss": 1.4227, + "step": 424 + }, + { + "epoch": 6.34, + "learning_rate": 8.48e-05, + "loss": 1.4331, + "step": 425 + }, + { + "epoch": 6.36, + "learning_rate": 8.499999999999999e-05, + "loss": 1.3622, + "step": 426 + }, + { + "epoch": 6.37, + "learning_rate": 8.519999999999998e-05, + "loss": 1.5802, + "step": 427 + }, + { + "epoch": 6.39, + "learning_rate": 8.54e-05, + "loss": 1.4871, + "step": 428 + }, + { + "epoch": 6.4, + "learning_rate": 8.56e-05, + "loss": 1.207, + "step": 429 + }, + { + "epoch": 6.42, + "learning_rate": 8.579999999999998e-05, + "loss": 1.2297, + "step": 430 + }, + { + "epoch": 6.43, + "learning_rate": 8.6e-05, + "loss": 1.5138, + "step": 431 + }, + { + "epoch": 6.45, + "learning_rate": 8.62e-05, + "loss": 1.6429, + "step": 432 + }, + { + "epoch": 6.46, + "learning_rate": 8.639999999999999e-05, + "loss": 1.3145, + "step": 433 + }, + { + "epoch": 6.48, + "learning_rate": 8.659999999999999e-05, + "loss": 1.2543, + "step": 434 + }, + { + "epoch": 6.49, + "learning_rate": 8.68e-05, + "loss": 1.0758, + "step": 435 + }, + { + "epoch": 6.51, + "learning_rate": 8.699999999999999e-05, + "loss": 1.6667, + "step": 436 + }, + { + "epoch": 6.52, + "learning_rate": 8.719999999999999e-05, + "loss": 1.3135, + "step": 437 + }, + { + "epoch": 6.54, + "learning_rate": 8.74e-05, + "loss": 1.3914, + "step": 438 + }, + { + "epoch": 6.55, + "learning_rate": 8.759999999999999e-05, + "loss": 1.3286, + "step": 439 + }, + { + "epoch": 6.57, + "learning_rate": 8.779999999999999e-05, + "loss": 1.3077, + "step": 440 + }, + { + "epoch": 6.58, + "learning_rate": 8.8e-05, + "loss": 1.3647, + "step": 441 + }, + { + "epoch": 6.59, + "learning_rate": 8.819999999999999e-05, + "loss": 1.3532, + "step": 442 + }, + { + "epoch": 6.61, + "learning_rate": 8.84e-05, + "loss": 1.5361, + "step": 443 + }, + { + "epoch": 6.62, + "learning_rate": 8.86e-05, + "loss": 1.2559, + "step": 444 + }, + { + "epoch": 6.64, + "learning_rate": 8.879999999999999e-05, + "loss": 1.4815, + "step": 445 + }, + { + "epoch": 6.65, + "learning_rate": 8.9e-05, + "loss": 0.9722, + "step": 446 + }, + { + "epoch": 6.67, + "learning_rate": 8.92e-05, + "loss": 1.6683, + "step": 447 + }, + { + "epoch": 6.68, + "learning_rate": 8.939999999999999e-05, + "loss": 1.5446, + "step": 448 + }, + { + "epoch": 6.7, + "learning_rate": 8.96e-05, + "loss": 1.5083, + "step": 449 + }, + { + "epoch": 6.71, + "learning_rate": 8.98e-05, + "loss": 1.4544, + "step": 450 + }, + { + "epoch": 6.73, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5404, + "step": 451 + }, + { + "epoch": 6.74, + "learning_rate": 9.02e-05, + "loss": 1.3788, + "step": 452 + }, + { + "epoch": 6.76, + "learning_rate": 9.039999999999999e-05, + "loss": 1.7929, + "step": 453 + }, + { + "epoch": 6.77, + "learning_rate": 9.059999999999999e-05, + "loss": 1.6221, + "step": 454 + }, + { + "epoch": 6.79, + "learning_rate": 9.079999999999998e-05, + "loss": 2.0289, + "step": 455 + }, + { + "epoch": 6.8, + "learning_rate": 9.099999999999999e-05, + "loss": 1.3513, + "step": 456 + }, + { + "epoch": 6.82, + "learning_rate": 9.12e-05, + "loss": 1.1809, + "step": 457 + }, + { + "epoch": 6.83, + "learning_rate": 9.139999999999999e-05, + "loss": 1.659, + "step": 458 + }, + { + "epoch": 6.85, + "learning_rate": 9.159999999999999e-05, + "loss": 1.3772, + "step": 459 + }, + { + "epoch": 6.86, + "learning_rate": 9.18e-05, + "loss": 1.0775, + "step": 460 + }, + { + "epoch": 6.88, + "learning_rate": 9.199999999999999e-05, + "loss": 1.6983, + "step": 461 + }, + { + "epoch": 6.89, + "learning_rate": 9.219999999999999e-05, + "loss": 1.1782, + "step": 462 + }, + { + "epoch": 6.91, + "learning_rate": 9.24e-05, + "loss": 1.3642, + "step": 463 + }, + { + "epoch": 6.92, + "learning_rate": 9.259999999999999e-05, + "loss": 1.1298, + "step": 464 + }, + { + "epoch": 6.94, + "learning_rate": 9.279999999999999e-05, + "loss": 1.3433, + "step": 465 + }, + { + "epoch": 6.95, + "learning_rate": 9.3e-05, + "loss": 1.2165, + "step": 466 + }, + { + "epoch": 6.97, + "learning_rate": 9.319999999999999e-05, + "loss": 1.5146, + "step": 467 + }, + { + "epoch": 6.98, + "learning_rate": 9.34e-05, + "loss": 1.4164, + "step": 468 + }, + { + "epoch": 7.0, + "learning_rate": 9.36e-05, + "loss": 1.2011, + "step": 469 + }, + { + "epoch": 7.01, + "learning_rate": 9.379999999999999e-05, + "loss": 1.6677, + "step": 470 + }, + { + "epoch": 7.03, + "learning_rate": 9.4e-05, + "loss": 1.0573, + "step": 471 + }, + { + "epoch": 7.04, + "learning_rate": 9.419999999999999e-05, + "loss": 1.3889, + "step": 472 + }, + { + "epoch": 7.06, + "learning_rate": 9.439999999999999e-05, + "loss": 1.4316, + "step": 473 + }, + { + "epoch": 7.07, + "learning_rate": 9.46e-05, + "loss": 1.0497, + "step": 474 + }, + { + "epoch": 7.09, + "learning_rate": 9.479999999999999e-05, + "loss": 1.203, + "step": 475 + }, + { + "epoch": 7.1, + "learning_rate": 9.499999999999999e-05, + "loss": 1.543, + "step": 476 + }, + { + "epoch": 7.12, + "learning_rate": 9.52e-05, + "loss": 1.3419, + "step": 477 + }, + { + "epoch": 7.13, + "learning_rate": 9.539999999999999e-05, + "loss": 1.3901, + "step": 478 + }, + { + "epoch": 7.15, + "learning_rate": 9.559999999999999e-05, + "loss": 1.152, + "step": 479 + }, + { + "epoch": 7.16, + "learning_rate": 9.58e-05, + "loss": 1.1519, + "step": 480 + }, + { + "epoch": 7.18, + "learning_rate": 9.599999999999999e-05, + "loss": 1.2613, + "step": 481 + }, + { + "epoch": 7.19, + "learning_rate": 9.62e-05, + "loss": 1.5289, + "step": 482 + }, + { + "epoch": 7.21, + "learning_rate": 9.64e-05, + "loss": 1.3045, + "step": 483 + }, + { + "epoch": 7.22, + "learning_rate": 9.659999999999999e-05, + "loss": 1.1174, + "step": 484 + }, + { + "epoch": 7.24, + "learning_rate": 9.68e-05, + "loss": 0.9389, + "step": 485 + }, + { + "epoch": 7.25, + "learning_rate": 9.699999999999999e-05, + "loss": 1.277, + "step": 486 + }, + { + "epoch": 7.27, + "learning_rate": 9.719999999999999e-05, + "loss": 1.4324, + "step": 487 + }, + { + "epoch": 7.28, + "learning_rate": 9.74e-05, + "loss": 1.3783, + "step": 488 + }, + { + "epoch": 7.3, + "learning_rate": 9.759999999999999e-05, + "loss": 0.9759, + "step": 489 + }, + { + "epoch": 7.31, + "learning_rate": 9.779999999999999e-05, + "loss": 0.9928, + "step": 490 + }, + { + "epoch": 7.33, + "learning_rate": 9.799999999999998e-05, + "loss": 1.0354, + "step": 491 + }, + { + "epoch": 7.34, + "learning_rate": 9.819999999999999e-05, + "loss": 1.4557, + "step": 492 + }, + { + "epoch": 7.36, + "learning_rate": 9.839999999999999e-05, + "loss": 1.3679, + "step": 493 + }, + { + "epoch": 7.37, + "learning_rate": 9.859999999999998e-05, + "loss": 1.3464, + "step": 494 + }, + { + "epoch": 7.39, + "learning_rate": 9.879999999999999e-05, + "loss": 1.1559, + "step": 495 + }, + { + "epoch": 7.4, + "learning_rate": 9.9e-05, + "loss": 1.1564, + "step": 496 + }, + { + "epoch": 7.42, + "learning_rate": 9.919999999999999e-05, + "loss": 0.9569, + "step": 497 + }, + { + "epoch": 7.43, + "learning_rate": 9.939999999999999e-05, + "loss": 1.3608, + "step": 498 + }, + { + "epoch": 7.45, + "learning_rate": 9.96e-05, + "loss": 0.9687, + "step": 499 + }, + { + "epoch": 7.46, + "learning_rate": 9.979999999999999e-05, + "loss": 1.1822, + "step": 500 + }, + { + "epoch": 7.48, + "learning_rate": 9.999999999999999e-05, + "loss": 0.9472, + "step": 501 + }, + { + "epoch": 7.49, + "learning_rate": 0.0001002, + "loss": 1.1478, + "step": 502 + }, + { + "epoch": 7.51, + "learning_rate": 0.00010039999999999999, + "loss": 1.7424, + "step": 503 + }, + { + "epoch": 7.52, + "learning_rate": 0.00010059999999999999, + "loss": 1.2375, + "step": 504 + }, + { + "epoch": 7.54, + "learning_rate": 0.0001008, + "loss": 1.6025, + "step": 505 + }, + { + "epoch": 7.55, + "learning_rate": 0.00010099999999999999, + "loss": 1.0611, + "step": 506 + }, + { + "epoch": 7.57, + "learning_rate": 0.0001012, + "loss": 1.1944, + "step": 507 + }, + { + "epoch": 7.58, + "learning_rate": 0.0001014, + "loss": 1.2099, + "step": 508 + }, + { + "epoch": 7.59, + "learning_rate": 0.00010159999999999999, + "loss": 1.0249, + "step": 509 + }, + { + "epoch": 7.61, + "learning_rate": 0.00010179999999999998, + "loss": 1.1262, + "step": 510 + }, + { + "epoch": 7.62, + "learning_rate": 0.000102, + "loss": 1.1328, + "step": 511 + }, + { + "epoch": 7.64, + "learning_rate": 0.00010219999999999999, + "loss": 0.8573, + "step": 512 + }, + { + "epoch": 7.65, + "learning_rate": 0.00010239999999999998, + "loss": 1.3475, + "step": 513 + }, + { + "epoch": 7.67, + "learning_rate": 0.0001026, + "loss": 1.0557, + "step": 514 + }, + { + "epoch": 7.68, + "learning_rate": 0.00010279999999999999, + "loss": 1.043, + "step": 515 + }, + { + "epoch": 7.7, + "learning_rate": 0.00010299999999999998, + "loss": 1.2895, + "step": 516 + }, + { + "epoch": 7.71, + "learning_rate": 0.00010319999999999999, + "loss": 1.2999, + "step": 517 + }, + { + "epoch": 7.73, + "learning_rate": 0.00010339999999999999, + "loss": 0.8422, + "step": 518 + }, + { + "epoch": 7.74, + "learning_rate": 0.00010359999999999998, + "loss": 1.3048, + "step": 519 + }, + { + "epoch": 7.76, + "learning_rate": 0.00010379999999999999, + "loss": 1.054, + "step": 520 + }, + { + "epoch": 7.77, + "learning_rate": 0.000104, + "loss": 1.0085, + "step": 521 + }, + { + "epoch": 7.79, + "learning_rate": 0.00010419999999999998, + "loss": 1.372, + "step": 522 + }, + { + "epoch": 7.8, + "learning_rate": 0.00010439999999999999, + "loss": 1.2399, + "step": 523 + }, + { + "epoch": 7.82, + "learning_rate": 0.0001046, + "loss": 1.1393, + "step": 524 + }, + { + "epoch": 7.83, + "learning_rate": 0.00010479999999999999, + "loss": 1.1062, + "step": 525 + }, + { + "epoch": 7.85, + "learning_rate": 0.00010499999999999999, + "loss": 1.1683, + "step": 526 + }, + { + "epoch": 7.86, + "learning_rate": 0.0001052, + "loss": 1.4802, + "step": 527 + }, + { + "epoch": 7.88, + "learning_rate": 0.00010539999999999999, + "loss": 0.9131, + "step": 528 + }, + { + "epoch": 7.89, + "learning_rate": 0.00010559999999999998, + "loss": 0.8833, + "step": 529 + }, + { + "epoch": 7.91, + "learning_rate": 0.0001058, + "loss": 0.8876, + "step": 530 + }, + { + "epoch": 7.92, + "learning_rate": 0.00010599999999999999, + "loss": 1.2304, + "step": 531 + }, + { + "epoch": 7.94, + "learning_rate": 0.00010619999999999998, + "loss": 1.0087, + "step": 532 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001064, + "loss": 1.0249, + "step": 533 + }, + { + "epoch": 7.97, + "learning_rate": 0.00010659999999999999, + "loss": 1.4578, + "step": 534 + }, + { + "epoch": 7.98, + "learning_rate": 0.00010679999999999998, + "loss": 0.8898, + "step": 535 + }, + { + "epoch": 8.0, + "learning_rate": 0.000107, + "loss": 1.1781, + "step": 536 + }, + { + "epoch": 8.01, + "learning_rate": 0.00010719999999999999, + "loss": 1.749, + "step": 537 + }, + { + "epoch": 8.03, + "learning_rate": 0.00010739999999999998, + "loss": 0.9843, + "step": 538 + }, + { + "epoch": 8.04, + "learning_rate": 0.0001076, + "loss": 1.1008, + "step": 539 + }, + { + "epoch": 8.06, + "learning_rate": 0.00010779999999999999, + "loss": 1.1893, + "step": 540 + }, + { + "epoch": 8.07, + "learning_rate": 0.00010799999999999998, + "loss": 0.9748, + "step": 541 + }, + { + "epoch": 8.09, + "learning_rate": 0.0001082, + "loss": 1.2305, + "step": 542 + }, + { + "epoch": 8.1, + "learning_rate": 0.00010839999999999999, + "loss": 1.154, + "step": 543 + }, + { + "epoch": 8.12, + "learning_rate": 0.00010859999999999998, + "loss": 1.2386, + "step": 544 + }, + { + "epoch": 8.13, + "learning_rate": 0.0001088, + "loss": 1.2909, + "step": 545 + }, + { + "epoch": 8.15, + "learning_rate": 0.00010899999999999999, + "loss": 1.1747, + "step": 546 + }, + { + "epoch": 8.16, + "learning_rate": 0.00010919999999999998, + "loss": 1.2138, + "step": 547 + }, + { + "epoch": 8.18, + "learning_rate": 0.00010939999999999998, + "loss": 1.421, + "step": 548 + }, + { + "epoch": 8.19, + "learning_rate": 0.0001096, + "loss": 1.2327, + "step": 549 + }, + { + "epoch": 8.21, + "learning_rate": 0.00010979999999999999, + "loss": 1.1603, + "step": 550 + }, + { + "epoch": 8.22, + "learning_rate": 0.00010999999999999998, + "loss": 1.0756, + "step": 551 + }, + { + "epoch": 8.24, + "learning_rate": 0.0001102, + "loss": 0.8484, + "step": 552 + }, + { + "epoch": 8.25, + "learning_rate": 0.00011039999999999999, + "loss": 0.8909, + "step": 553 + }, + { + "epoch": 8.27, + "learning_rate": 0.00011059999999999998, + "loss": 1.1358, + "step": 554 + }, + { + "epoch": 8.28, + "learning_rate": 0.0001108, + "loss": 1.3485, + "step": 555 + }, + { + "epoch": 8.3, + "learning_rate": 0.00011099999999999999, + "loss": 0.9957, + "step": 556 + }, + { + "epoch": 8.31, + "learning_rate": 0.00011119999999999998, + "loss": 1.0312, + "step": 557 + }, + { + "epoch": 8.33, + "learning_rate": 0.0001114, + "loss": 1.3304, + "step": 558 + }, + { + "epoch": 8.34, + "learning_rate": 0.0001114, + "loss": 1.3952, + "step": 559 + }, + { + "epoch": 8.36, + "learning_rate": 0.00011159999999999999, + "loss": 1.0264, + "step": 560 + }, + { + "epoch": 8.37, + "learning_rate": 0.00011179999999999998, + "loss": 1.1699, + "step": 561 + }, + { + "epoch": 8.39, + "learning_rate": 0.000112, + "loss": 1.2597, + "step": 562 + }, + { + "epoch": 8.4, + "learning_rate": 0.00011219999999999999, + "loss": 0.9144, + "step": 563 + }, + { + "epoch": 8.42, + "learning_rate": 0.00011239999999999998, + "loss": 1.4595, + "step": 564 + }, + { + "epoch": 8.43, + "learning_rate": 0.0001126, + "loss": 0.9706, + "step": 565 + }, + { + "epoch": 8.45, + "learning_rate": 0.00011279999999999999, + "loss": 1.5886, + "step": 566 + }, + { + "epoch": 8.46, + "learning_rate": 0.00011299999999999998, + "loss": 0.974, + "step": 567 + }, + { + "epoch": 8.48, + "learning_rate": 0.0001132, + "loss": 1.135, + "step": 568 + }, + { + "epoch": 8.49, + "learning_rate": 0.00011339999999999999, + "loss": 1.0261, + "step": 569 + }, + { + "epoch": 8.51, + "learning_rate": 0.00011359999999999998, + "loss": 1.3876, + "step": 570 + }, + { + "epoch": 8.52, + "learning_rate": 0.0001138, + "loss": 1.1007, + "step": 571 + }, + { + "epoch": 8.54, + "learning_rate": 0.00011399999999999999, + "loss": 1.0671, + "step": 572 + }, + { + "epoch": 8.55, + "learning_rate": 0.00011419999999999998, + "loss": 1.2932, + "step": 573 + }, + { + "epoch": 8.57, + "learning_rate": 0.0001144, + "loss": 1.4211, + "step": 574 + }, + { + "epoch": 8.58, + "learning_rate": 0.0001146, + "loss": 0.9986, + "step": 575 + }, + { + "epoch": 8.59, + "learning_rate": 0.00011479999999999999, + "loss": 1.0899, + "step": 576 + }, + { + "epoch": 8.61, + "learning_rate": 0.000115, + "loss": 0.8065, + "step": 577 + }, + { + "epoch": 8.62, + "learning_rate": 0.0001152, + "loss": 0.6506, + "step": 578 + }, + { + "epoch": 8.64, + "learning_rate": 0.00011539999999999999, + "loss": 0.8245, + "step": 579 + }, + { + "epoch": 8.65, + "learning_rate": 0.0001156, + "loss": 0.948, + "step": 580 + }, + { + "epoch": 8.67, + "learning_rate": 0.0001158, + "loss": 0.8349, + "step": 581 + }, + { + "epoch": 8.68, + "learning_rate": 0.00011599999999999999, + "loss": 0.8581, + "step": 582 + }, + { + "epoch": 8.7, + "learning_rate": 0.00011619999999999998, + "loss": 0.9276, + "step": 583 + }, + { + "epoch": 8.71, + "learning_rate": 0.0001164, + "loss": 1.0779, + "step": 584 + }, + { + "epoch": 8.73, + "learning_rate": 0.00011659999999999999, + "loss": 1.0055, + "step": 585 + }, + { + "epoch": 8.74, + "learning_rate": 0.00011679999999999998, + "loss": 1.0188, + "step": 586 + }, + { + "epoch": 8.76, + "learning_rate": 0.000117, + "loss": 0.9448, + "step": 587 + }, + { + "epoch": 8.77, + "learning_rate": 0.00011719999999999999, + "loss": 0.9065, + "step": 588 + }, + { + "epoch": 8.79, + "learning_rate": 0.00011739999999999998, + "loss": 1.3076, + "step": 589 + }, + { + "epoch": 8.8, + "learning_rate": 0.0001176, + "loss": 1.124, + "step": 590 + }, + { + "epoch": 8.82, + "learning_rate": 0.00011779999999999999, + "loss": 1.0797, + "step": 591 + }, + { + "epoch": 8.83, + "learning_rate": 0.00011799999999999998, + "loss": 0.9384, + "step": 592 + }, + { + "epoch": 8.85, + "learning_rate": 0.0001182, + "loss": 0.8262, + "step": 593 + }, + { + "epoch": 8.86, + "learning_rate": 0.00011839999999999999, + "loss": 1.4642, + "step": 594 + }, + { + "epoch": 8.88, + "learning_rate": 0.00011859999999999998, + "loss": 1.0777, + "step": 595 + }, + { + "epoch": 8.89, + "learning_rate": 0.0001188, + "loss": 1.0487, + "step": 596 + }, + { + "epoch": 8.91, + "learning_rate": 0.00011899999999999999, + "loss": 1.0663, + "step": 597 + }, + { + "epoch": 8.92, + "learning_rate": 0.00011919999999999998, + "loss": 1.3841, + "step": 598 + }, + { + "epoch": 8.94, + "learning_rate": 0.0001194, + "loss": 1.095, + "step": 599 + }, + { + "epoch": 8.95, + "learning_rate": 0.0001196, + "loss": 1.2805, + "step": 600 + }, + { + "epoch": 8.95, + "eval_accuracy": 0.7043563387175722, + "eval_f1": 0.7105830295720407, + "eval_loss": 1.139738917350769, + "eval_runtime": 343.0859, + "eval_samples_per_second": 11.91, + "eval_steps_per_second": 0.746, + "step": 600 + }, + { + "epoch": 8.97, + "learning_rate": 0.00011979999999999998, + "loss": 0.944, + "step": 601 + }, + { + "epoch": 8.98, + "learning_rate": 0.00011999999999999999, + "loss": 0.8265, + "step": 602 + }, + { + "epoch": 9.0, + "learning_rate": 0.0001202, + "loss": 0.9694, + "step": 603 + }, + { + "epoch": 9.01, + "learning_rate": 0.00012039999999999999, + "loss": 1.4551, + "step": 604 + }, + { + "epoch": 9.03, + "learning_rate": 0.00012059999999999999, + "loss": 1.2523, + "step": 605 + }, + { + "epoch": 9.04, + "learning_rate": 0.0001208, + "loss": 0.7349, + "step": 606 + }, + { + "epoch": 9.06, + "learning_rate": 0.00012099999999999999, + "loss": 1.3356, + "step": 607 + }, + { + "epoch": 9.07, + "learning_rate": 0.00012119999999999999, + "loss": 1.3264, + "step": 608 + }, + { + "epoch": 9.09, + "learning_rate": 0.0001214, + "loss": 0.9967, + "step": 609 + }, + { + "epoch": 9.1, + "learning_rate": 0.00012159999999999999, + "loss": 1.0618, + "step": 610 + }, + { + "epoch": 9.12, + "learning_rate": 0.00012179999999999999, + "loss": 1.1899, + "step": 611 + }, + { + "epoch": 9.13, + "learning_rate": 0.000122, + "loss": 0.8158, + "step": 612 + }, + { + "epoch": 9.15, + "learning_rate": 0.0001222, + "loss": 1.4054, + "step": 613 + }, + { + "epoch": 9.16, + "learning_rate": 0.0001224, + "loss": 0.9819, + "step": 614 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001226, + "loss": 0.8747, + "step": 615 + }, + { + "epoch": 9.19, + "learning_rate": 0.00012279999999999998, + "loss": 0.989, + "step": 616 + }, + { + "epoch": 9.21, + "learning_rate": 0.00012299999999999998, + "loss": 1.0601, + "step": 617 + }, + { + "epoch": 9.22, + "learning_rate": 0.00012319999999999999, + "loss": 1.0786, + "step": 618 + }, + { + "epoch": 9.24, + "learning_rate": 0.0001234, + "loss": 0.8078, + "step": 619 + }, + { + "epoch": 9.25, + "learning_rate": 0.0001236, + "loss": 1.2362, + "step": 620 + }, + { + "epoch": 9.27, + "learning_rate": 0.0001238, + "loss": 0.7693, + "step": 621 + }, + { + "epoch": 9.28, + "learning_rate": 0.00012399999999999998, + "loss": 0.8017, + "step": 622 + }, + { + "epoch": 9.3, + "learning_rate": 0.00012419999999999998, + "loss": 1.3265, + "step": 623 + }, + { + "epoch": 9.31, + "learning_rate": 0.0001244, + "loss": 0.7762, + "step": 624 + }, + { + "epoch": 9.33, + "learning_rate": 0.0001246, + "loss": 1.02, + "step": 625 + }, + { + "epoch": 9.34, + "learning_rate": 0.00012479999999999997, + "loss": 1.1217, + "step": 626 + }, + { + "epoch": 9.36, + "learning_rate": 0.000125, + "loss": 1.0048, + "step": 627 + }, + { + "epoch": 9.37, + "learning_rate": 0.00012519999999999998, + "loss": 0.9923, + "step": 628 + }, + { + "epoch": 9.39, + "learning_rate": 0.00012539999999999999, + "loss": 0.7986, + "step": 629 + }, + { + "epoch": 9.4, + "learning_rate": 0.0001256, + "loss": 0.8572, + "step": 630 + }, + { + "epoch": 9.42, + "learning_rate": 0.0001258, + "loss": 0.7296, + "step": 631 + }, + { + "epoch": 9.43, + "learning_rate": 0.00012599999999999997, + "loss": 1.1171, + "step": 632 + }, + { + "epoch": 9.45, + "learning_rate": 0.0001262, + "loss": 0.9852, + "step": 633 + }, + { + "epoch": 9.46, + "learning_rate": 0.00012639999999999998, + "loss": 0.9377, + "step": 634 + }, + { + "epoch": 9.48, + "learning_rate": 0.0001266, + "loss": 0.8699, + "step": 635 + }, + { + "epoch": 9.49, + "learning_rate": 0.0001268, + "loss": 0.9284, + "step": 636 + }, + { + "epoch": 9.51, + "learning_rate": 0.000127, + "loss": 0.9991, + "step": 637 + }, + { + "epoch": 9.52, + "learning_rate": 0.00012719999999999997, + "loss": 1.0121, + "step": 638 + }, + { + "epoch": 9.54, + "learning_rate": 0.0001274, + "loss": 1.0221, + "step": 639 + }, + { + "epoch": 9.55, + "learning_rate": 0.00012759999999999998, + "loss": 1.0897, + "step": 640 + }, + { + "epoch": 9.57, + "learning_rate": 0.0001278, + "loss": 1.2973, + "step": 641 + }, + { + "epoch": 9.58, + "learning_rate": 0.000128, + "loss": 1.0074, + "step": 642 + }, + { + "epoch": 9.59, + "learning_rate": 0.0001282, + "loss": 1.2608, + "step": 643 + }, + { + "epoch": 9.61, + "learning_rate": 0.00012839999999999998, + "loss": 1.2781, + "step": 644 + }, + { + "epoch": 9.62, + "learning_rate": 0.00012859999999999998, + "loss": 1.029, + "step": 645 + }, + { + "epoch": 9.64, + "learning_rate": 0.0001288, + "loss": 0.8011, + "step": 646 + }, + { + "epoch": 9.65, + "learning_rate": 0.000129, + "loss": 0.7844, + "step": 647 + }, + { + "epoch": 9.67, + "learning_rate": 0.00012919999999999997, + "loss": 1.3425, + "step": 648 + }, + { + "epoch": 9.68, + "learning_rate": 0.0001294, + "loss": 1.4064, + "step": 649 + }, + { + "epoch": 9.7, + "learning_rate": 0.00012959999999999998, + "loss": 1.0535, + "step": 650 + }, + { + "epoch": 9.71, + "learning_rate": 0.00012979999999999998, + "loss": 1.2251, + "step": 651 + }, + { + "epoch": 9.73, + "learning_rate": 0.00013, + "loss": 1.0629, + "step": 652 + }, + { + "epoch": 9.74, + "learning_rate": 0.0001302, + "loss": 0.9377, + "step": 653 + }, + { + "epoch": 9.76, + "learning_rate": 0.00013039999999999997, + "loss": 1.1107, + "step": 654 + }, + { + "epoch": 9.77, + "learning_rate": 0.0001306, + "loss": 1.4752, + "step": 655 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013079999999999998, + "loss": 1.0769, + "step": 656 + }, + { + "epoch": 9.8, + "learning_rate": 0.00013099999999999999, + "loss": 0.988, + "step": 657 + }, + { + "epoch": 9.82, + "learning_rate": 0.0001312, + "loss": 1.4048, + "step": 658 + }, + { + "epoch": 9.83, + "learning_rate": 0.0001314, + "loss": 1.0237, + "step": 659 + }, + { + "epoch": 9.85, + "learning_rate": 0.00013159999999999997, + "loss": 0.6261, + "step": 660 + }, + { + "epoch": 9.86, + "learning_rate": 0.0001318, + "loss": 0.7488, + "step": 661 + }, + { + "epoch": 9.88, + "learning_rate": 0.00013199999999999998, + "loss": 1.1096, + "step": 662 + }, + { + "epoch": 9.89, + "learning_rate": 0.0001322, + "loss": 0.8872, + "step": 663 + }, + { + "epoch": 9.91, + "learning_rate": 0.0001324, + "loss": 0.9701, + "step": 664 + }, + { + "epoch": 9.92, + "learning_rate": 0.0001326, + "loss": 0.7629, + "step": 665 + }, + { + "epoch": 9.94, + "learning_rate": 0.00013279999999999998, + "loss": 0.8341, + "step": 666 + }, + { + "epoch": 9.95, + "learning_rate": 0.000133, + "loss": 1.316, + "step": 667 + }, + { + "epoch": 9.97, + "learning_rate": 0.00013319999999999999, + "loss": 0.8714, + "step": 668 + }, + { + "epoch": 9.98, + "learning_rate": 0.0001334, + "loss": 0.8048, + "step": 669 + }, + { + "epoch": 10.0, + "learning_rate": 0.0001336, + "loss": 1.276, + "step": 670 + }, + { + "epoch": 10.01, + "learning_rate": 0.0001338, + "loss": 1.2213, + "step": 671 + }, + { + "epoch": 10.03, + "learning_rate": 0.00013399999999999998, + "loss": 0.954, + "step": 672 + }, + { + "epoch": 10.04, + "learning_rate": 0.0001342, + "loss": 0.7978, + "step": 673 + }, + { + "epoch": 10.06, + "learning_rate": 0.0001344, + "loss": 0.9366, + "step": 674 + }, + { + "epoch": 10.07, + "learning_rate": 0.0001346, + "loss": 0.9517, + "step": 675 + }, + { + "epoch": 10.09, + "learning_rate": 0.00013479999999999997, + "loss": 0.8653, + "step": 676 + }, + { + "epoch": 10.1, + "learning_rate": 0.000135, + "loss": 0.8692, + "step": 677 + }, + { + "epoch": 10.12, + "learning_rate": 0.00013519999999999998, + "loss": 0.7885, + "step": 678 + }, + { + "epoch": 10.13, + "learning_rate": 0.00013539999999999998, + "loss": 1.0205, + "step": 679 + }, + { + "epoch": 10.15, + "learning_rate": 0.0001356, + "loss": 0.9433, + "step": 680 + }, + { + "epoch": 10.16, + "learning_rate": 0.0001358, + "loss": 0.769, + "step": 681 + }, + { + "epoch": 10.18, + "learning_rate": 0.00013599999999999997, + "loss": 0.7986, + "step": 682 + }, + { + "epoch": 10.19, + "learning_rate": 0.0001362, + "loss": 0.8684, + "step": 683 + }, + { + "epoch": 10.21, + "learning_rate": 0.00013639999999999998, + "loss": 1.1515, + "step": 684 + }, + { + "epoch": 10.22, + "learning_rate": 0.00013659999999999999, + "loss": 0.6979, + "step": 685 + }, + { + "epoch": 10.24, + "learning_rate": 0.0001368, + "loss": 0.6019, + "step": 686 + }, + { + "epoch": 10.25, + "learning_rate": 0.000137, + "loss": 0.9636, + "step": 687 + }, + { + "epoch": 10.27, + "learning_rate": 0.00013719999999999997, + "loss": 0.9187, + "step": 688 + }, + { + "epoch": 10.28, + "learning_rate": 0.0001374, + "loss": 1.0534, + "step": 689 + }, + { + "epoch": 10.3, + "learning_rate": 0.00013759999999999998, + "loss": 0.5844, + "step": 690 + }, + { + "epoch": 10.31, + "learning_rate": 0.0001378, + "loss": 1.2123, + "step": 691 + }, + { + "epoch": 10.33, + "learning_rate": 0.000138, + "loss": 1.0089, + "step": 692 + }, + { + "epoch": 10.34, + "learning_rate": 0.0001382, + "loss": 1.1607, + "step": 693 + }, + { + "epoch": 10.36, + "learning_rate": 0.00013839999999999998, + "loss": 1.3608, + "step": 694 + }, + { + "epoch": 10.37, + "learning_rate": 0.0001386, + "loss": 0.8186, + "step": 695 + }, + { + "epoch": 10.39, + "learning_rate": 0.00013879999999999999, + "loss": 1.2984, + "step": 696 + }, + { + "epoch": 10.4, + "learning_rate": 0.000139, + "loss": 0.8887, + "step": 697 + }, + { + "epoch": 10.42, + "learning_rate": 0.0001392, + "loss": 1.0305, + "step": 698 + }, + { + "epoch": 10.43, + "learning_rate": 0.0001394, + "loss": 0.8821, + "step": 699 + }, + { + "epoch": 10.45, + "learning_rate": 0.00013959999999999998, + "loss": 0.9324, + "step": 700 + }, + { + "epoch": 10.46, + "learning_rate": 0.00013979999999999998, + "loss": 0.7349, + "step": 701 + }, + { + "epoch": 10.48, + "learning_rate": 0.00014, + "loss": 0.9737, + "step": 702 + }, + { + "epoch": 10.49, + "learning_rate": 0.0001402, + "loss": 0.7634, + "step": 703 + }, + { + "epoch": 10.51, + "learning_rate": 0.0001404, + "loss": 1.2076, + "step": 704 + }, + { + "epoch": 10.52, + "learning_rate": 0.0001406, + "loss": 0.6951, + "step": 705 + }, + { + "epoch": 10.54, + "learning_rate": 0.00014079999999999998, + "loss": 0.8113, + "step": 706 + }, + { + "epoch": 10.55, + "learning_rate": 0.00014099999999999998, + "loss": 0.6317, + "step": 707 + }, + { + "epoch": 10.57, + "learning_rate": 0.0001412, + "loss": 1.0482, + "step": 708 + }, + { + "epoch": 10.58, + "learning_rate": 0.0001414, + "loss": 0.8645, + "step": 709 + }, + { + "epoch": 10.59, + "learning_rate": 0.00014159999999999997, + "loss": 1.0108, + "step": 710 + }, + { + "epoch": 10.61, + "learning_rate": 0.0001418, + "loss": 0.9291, + "step": 711 + }, + { + "epoch": 10.62, + "learning_rate": 0.00014199999999999998, + "loss": 0.7584, + "step": 712 + }, + { + "epoch": 10.64, + "learning_rate": 0.0001422, + "loss": 0.9291, + "step": 713 + }, + { + "epoch": 10.65, + "learning_rate": 0.0001424, + "loss": 0.9126, + "step": 714 + }, + { + "epoch": 10.67, + "learning_rate": 0.0001426, + "loss": 0.7771, + "step": 715 + }, + { + "epoch": 10.68, + "learning_rate": 0.00014279999999999997, + "loss": 1.5661, + "step": 716 + }, + { + "epoch": 10.7, + "learning_rate": 0.00014299999999999998, + "loss": 1.2408, + "step": 717 + }, + { + "epoch": 10.71, + "learning_rate": 0.00014319999999999998, + "loss": 0.7724, + "step": 718 + }, + { + "epoch": 10.73, + "learning_rate": 0.0001434, + "loss": 0.8097, + "step": 719 + }, + { + "epoch": 10.74, + "learning_rate": 0.0001436, + "loss": 0.4914, + "step": 720 + }, + { + "epoch": 10.76, + "learning_rate": 0.0001438, + "loss": 0.8946, + "step": 721 + }, + { + "epoch": 10.77, + "learning_rate": 0.00014399999999999998, + "loss": 0.876, + "step": 722 + }, + { + "epoch": 10.79, + "learning_rate": 0.00014419999999999998, + "loss": 0.8784, + "step": 723 + }, + { + "epoch": 10.8, + "learning_rate": 0.00014439999999999999, + "loss": 1.0133, + "step": 724 + }, + { + "epoch": 10.82, + "learning_rate": 0.0001446, + "loss": 1.1433, + "step": 725 + }, + { + "epoch": 10.83, + "learning_rate": 0.0001448, + "loss": 1.361, + "step": 726 + }, + { + "epoch": 10.85, + "learning_rate": 0.000145, + "loss": 1.006, + "step": 727 + }, + { + "epoch": 10.86, + "learning_rate": 0.00014519999999999998, + "loss": 1.2211, + "step": 728 + }, + { + "epoch": 10.88, + "learning_rate": 0.00014539999999999998, + "loss": 0.9271, + "step": 729 + }, + { + "epoch": 10.89, + "learning_rate": 0.0001456, + "loss": 1.0685, + "step": 730 + }, + { + "epoch": 10.91, + "learning_rate": 0.0001458, + "loss": 1.1139, + "step": 731 + }, + { + "epoch": 10.92, + "learning_rate": 0.000146, + "loss": 0.9508, + "step": 732 + }, + { + "epoch": 10.94, + "learning_rate": 0.0001462, + "loss": 0.6874, + "step": 733 + }, + { + "epoch": 10.95, + "learning_rate": 0.00014639999999999998, + "loss": 1.0912, + "step": 734 + }, + { + "epoch": 10.97, + "learning_rate": 0.00014659999999999999, + "loss": 0.5254, + "step": 735 + }, + { + "epoch": 10.98, + "learning_rate": 0.0001468, + "loss": 1.1663, + "step": 736 + }, + { + "epoch": 11.0, + "learning_rate": 0.000147, + "loss": 0.7879, + "step": 737 + }, + { + "epoch": 11.01, + "learning_rate": 0.00014719999999999997, + "loss": 1.0129, + "step": 738 + }, + { + "epoch": 11.03, + "learning_rate": 0.00014739999999999998, + "loss": 0.8334, + "step": 739 + }, + { + "epoch": 11.04, + "learning_rate": 0.00014759999999999998, + "loss": 0.649, + "step": 740 + }, + { + "epoch": 11.06, + "learning_rate": 0.0001478, + "loss": 1.2923, + "step": 741 + }, + { + "epoch": 11.07, + "learning_rate": 0.000148, + "loss": 0.7388, + "step": 742 + }, + { + "epoch": 11.09, + "learning_rate": 0.0001482, + "loss": 1.1052, + "step": 743 + }, + { + "epoch": 11.1, + "learning_rate": 0.00014839999999999998, + "loss": 0.8788, + "step": 744 + }, + { + "epoch": 11.12, + "learning_rate": 0.00014859999999999998, + "loss": 0.8711, + "step": 745 + }, + { + "epoch": 11.13, + "learning_rate": 0.00014879999999999998, + "loss": 1.0916, + "step": 746 + }, + { + "epoch": 11.15, + "learning_rate": 0.000149, + "loss": 0.9535, + "step": 747 + }, + { + "epoch": 11.16, + "learning_rate": 0.0001492, + "loss": 0.8849, + "step": 748 + }, + { + "epoch": 11.18, + "learning_rate": 0.0001494, + "loss": 0.9825, + "step": 749 + }, + { + "epoch": 11.19, + "learning_rate": 0.00014959999999999998, + "loss": 0.9475, + "step": 750 + }, + { + "epoch": 11.21, + "learning_rate": 0.00014979999999999998, + "loss": 1.3699, + "step": 751 + }, + { + "epoch": 11.22, + "learning_rate": 0.00015, + "loss": 0.7989, + "step": 752 + }, + { + "epoch": 11.24, + "learning_rate": 0.0001502, + "loss": 1.1212, + "step": 753 + }, + { + "epoch": 11.25, + "learning_rate": 0.00015039999999999997, + "loss": 0.7149, + "step": 754 + }, + { + "epoch": 11.27, + "learning_rate": 0.00015059999999999997, + "loss": 0.9158, + "step": 755 + }, + { + "epoch": 11.28, + "learning_rate": 0.0001508, + "loss": 0.7415, + "step": 756 + }, + { + "epoch": 11.3, + "learning_rate": 0.00015099999999999998, + "loss": 0.8255, + "step": 757 + }, + { + "epoch": 11.31, + "learning_rate": 0.0001512, + "loss": 1.0209, + "step": 758 + }, + { + "epoch": 11.33, + "learning_rate": 0.0001514, + "loss": 0.8702, + "step": 759 + }, + { + "epoch": 11.34, + "learning_rate": 0.00015159999999999997, + "loss": 1.0192, + "step": 760 + }, + { + "epoch": 11.36, + "learning_rate": 0.00015179999999999998, + "loss": 0.6197, + "step": 761 + }, + { + "epoch": 11.37, + "learning_rate": 0.000152, + "loss": 0.8326, + "step": 762 + }, + { + "epoch": 11.39, + "learning_rate": 0.00015219999999999999, + "loss": 0.8132, + "step": 763 + }, + { + "epoch": 11.4, + "learning_rate": 0.0001524, + "loss": 0.7399, + "step": 764 + }, + { + "epoch": 11.42, + "learning_rate": 0.0001526, + "loss": 1.057, + "step": 765 + }, + { + "epoch": 11.43, + "learning_rate": 0.00015279999999999997, + "loss": 0.7861, + "step": 766 + }, + { + "epoch": 11.45, + "learning_rate": 0.00015299999999999998, + "loss": 0.9496, + "step": 767 + }, + { + "epoch": 11.46, + "learning_rate": 0.0001532, + "loss": 1.1022, + "step": 768 + }, + { + "epoch": 11.48, + "learning_rate": 0.0001534, + "loss": 0.7708, + "step": 769 + }, + { + "epoch": 11.49, + "learning_rate": 0.0001536, + "loss": 0.5718, + "step": 770 + }, + { + "epoch": 11.51, + "learning_rate": 0.0001538, + "loss": 1.4677, + "step": 771 + }, + { + "epoch": 11.52, + "learning_rate": 0.00015399999999999998, + "loss": 0.9794, + "step": 772 + }, + { + "epoch": 11.54, + "learning_rate": 0.00015419999999999998, + "loss": 1.1667, + "step": 773 + }, + { + "epoch": 11.55, + "learning_rate": 0.0001544, + "loss": 1.3804, + "step": 774 + }, + { + "epoch": 11.57, + "learning_rate": 0.0001546, + "loss": 1.3366, + "step": 775 + }, + { + "epoch": 11.58, + "learning_rate": 0.0001548, + "loss": 0.749, + "step": 776 + }, + { + "epoch": 11.59, + "learning_rate": 0.000155, + "loss": 0.6442, + "step": 777 + }, + { + "epoch": 11.61, + "learning_rate": 0.00015519999999999998, + "loss": 1.1415, + "step": 778 + }, + { + "epoch": 11.62, + "learning_rate": 0.00015539999999999998, + "loss": 1.4601, + "step": 779 + }, + { + "epoch": 11.64, + "learning_rate": 0.00015560000000000001, + "loss": 1.3154, + "step": 780 + }, + { + "epoch": 11.65, + "learning_rate": 0.0001558, + "loss": 0.5728, + "step": 781 + }, + { + "epoch": 11.67, + "learning_rate": 0.000156, + "loss": 0.6963, + "step": 782 + }, + { + "epoch": 11.68, + "learning_rate": 0.0001562, + "loss": 0.8572, + "step": 783 + }, + { + "epoch": 11.7, + "learning_rate": 0.00015639999999999998, + "loss": 0.8292, + "step": 784 + }, + { + "epoch": 11.71, + "learning_rate": 0.00015659999999999998, + "loss": 1.1899, + "step": 785 + }, + { + "epoch": 11.73, + "learning_rate": 0.00015679999999999996, + "loss": 0.6802, + "step": 786 + }, + { + "epoch": 11.74, + "learning_rate": 0.000157, + "loss": 1.5924, + "step": 787 + }, + { + "epoch": 11.76, + "learning_rate": 0.0001572, + "loss": 1.3299, + "step": 788 + }, + { + "epoch": 11.77, + "learning_rate": 0.00015739999999999998, + "loss": 1.2905, + "step": 789 + }, + { + "epoch": 11.79, + "learning_rate": 0.00015759999999999998, + "loss": 0.7761, + "step": 790 + }, + { + "epoch": 11.8, + "learning_rate": 0.0001578, + "loss": 0.7079, + "step": 791 + }, + { + "epoch": 11.82, + "learning_rate": 0.00015799999999999996, + "loss": 1.2024, + "step": 792 + }, + { + "epoch": 11.83, + "learning_rate": 0.00015819999999999997, + "loss": 1.0789, + "step": 793 + }, + { + "epoch": 11.85, + "learning_rate": 0.0001584, + "loss": 0.8125, + "step": 794 + }, + { + "epoch": 11.86, + "learning_rate": 0.00015859999999999998, + "loss": 0.7292, + "step": 795 + }, + { + "epoch": 11.88, + "learning_rate": 0.00015879999999999998, + "loss": 0.7865, + "step": 796 + }, + { + "epoch": 11.89, + "learning_rate": 0.000159, + "loss": 1.002, + "step": 797 + }, + { + "epoch": 11.91, + "learning_rate": 0.00015919999999999997, + "loss": 0.8947, + "step": 798 + }, + { + "epoch": 11.92, + "learning_rate": 0.00015939999999999997, + "loss": 1.1309, + "step": 799 + }, + { + "epoch": 11.94, + "learning_rate": 0.0001596, + "loss": 1.0801, + "step": 800 + }, + { + "epoch": 11.94, + "eval_accuracy": 0.7197748409202154, + "eval_f1": 0.7132262117406672, + "eval_loss": 0.9862720370292664, + "eval_runtime": 343.503, + "eval_samples_per_second": 11.895, + "eval_steps_per_second": 0.745, + "step": 800 + }, + { + "epoch": 11.95, + "learning_rate": 0.00015979999999999998, + "loss": 0.7474, + "step": 801 + }, + { + "epoch": 11.97, + "learning_rate": 0.00015999999999999999, + "loss": 0.843, + "step": 802 + }, + { + "epoch": 11.98, + "learning_rate": 0.0001602, + "loss": 0.7314, + "step": 803 + }, + { + "epoch": 12.0, + "learning_rate": 0.00016039999999999997, + "loss": 0.9725, + "step": 804 + }, + { + "epoch": 12.01, + "learning_rate": 0.00016059999999999997, + "loss": 1.4201, + "step": 805 + }, + { + "epoch": 12.03, + "learning_rate": 0.0001608, + "loss": 0.5819, + "step": 806 + }, + { + "epoch": 12.04, + "learning_rate": 0.00016099999999999998, + "loss": 1.0351, + "step": 807 + }, + { + "epoch": 12.06, + "learning_rate": 0.0001612, + "loss": 0.8731, + "step": 808 + }, + { + "epoch": 12.07, + "learning_rate": 0.0001614, + "loss": 0.8561, + "step": 809 + }, + { + "epoch": 12.09, + "learning_rate": 0.00016159999999999997, + "loss": 0.9219, + "step": 810 + }, + { + "epoch": 12.1, + "learning_rate": 0.00016179999999999998, + "loss": 0.7039, + "step": 811 + }, + { + "epoch": 12.12, + "learning_rate": 0.000162, + "loss": 1.0735, + "step": 812 + }, + { + "epoch": 12.13, + "learning_rate": 0.00016219999999999999, + "loss": 1.1316, + "step": 813 + }, + { + "epoch": 12.15, + "learning_rate": 0.0001624, + "loss": 0.83, + "step": 814 + }, + { + "epoch": 12.16, + "learning_rate": 0.0001626, + "loss": 0.6262, + "step": 815 + }, + { + "epoch": 12.18, + "learning_rate": 0.00016279999999999997, + "loss": 0.6751, + "step": 816 + }, + { + "epoch": 12.19, + "learning_rate": 0.00016299999999999998, + "loss": 0.7606, + "step": 817 + }, + { + "epoch": 12.21, + "learning_rate": 0.0001632, + "loss": 0.5516, + "step": 818 + }, + { + "epoch": 12.22, + "learning_rate": 0.0001634, + "loss": 0.8138, + "step": 819 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001636, + "loss": 0.3187, + "step": 820 + }, + { + "epoch": 12.25, + "learning_rate": 0.0001638, + "loss": 0.5061, + "step": 821 + }, + { + "epoch": 12.27, + "learning_rate": 0.00016399999999999997, + "loss": 0.9732, + "step": 822 + }, + { + "epoch": 12.28, + "learning_rate": 0.00016419999999999998, + "loss": 0.8939, + "step": 823 + }, + { + "epoch": 12.3, + "learning_rate": 0.0001644, + "loss": 0.5243, + "step": 824 + }, + { + "epoch": 12.31, + "learning_rate": 0.0001646, + "loss": 0.7636, + "step": 825 + }, + { + "epoch": 12.33, + "learning_rate": 0.0001648, + "loss": 0.8959, + "step": 826 + }, + { + "epoch": 12.34, + "learning_rate": 0.000165, + "loss": 1.1794, + "step": 827 + }, + { + "epoch": 12.36, + "learning_rate": 0.00016519999999999998, + "loss": 0.9718, + "step": 828 + }, + { + "epoch": 12.37, + "learning_rate": 0.00016539999999999998, + "loss": 0.8994, + "step": 829 + }, + { + "epoch": 12.39, + "learning_rate": 0.0001656, + "loss": 0.6492, + "step": 830 + }, + { + "epoch": 12.4, + "learning_rate": 0.00016579999999999996, + "loss": 0.5952, + "step": 831 + }, + { + "epoch": 12.42, + "learning_rate": 0.000166, + "loss": 0.7995, + "step": 832 + }, + { + "epoch": 12.43, + "learning_rate": 0.0001662, + "loss": 1.0975, + "step": 833 + }, + { + "epoch": 12.45, + "learning_rate": 0.00016639999999999998, + "loss": 1.332, + "step": 834 + }, + { + "epoch": 12.46, + "learning_rate": 0.00016659999999999998, + "loss": 1.1362, + "step": 835 + }, + { + "epoch": 12.48, + "learning_rate": 0.0001668, + "loss": 0.7806, + "step": 836 + }, + { + "epoch": 12.49, + "learning_rate": 0.00016699999999999997, + "loss": 0.8691, + "step": 837 + }, + { + "epoch": 12.51, + "learning_rate": 0.0001672, + "loss": 0.8649, + "step": 838 + }, + { + "epoch": 12.52, + "learning_rate": 0.0001674, + "loss": 0.7305, + "step": 839 + }, + { + "epoch": 12.54, + "learning_rate": 0.00016759999999999998, + "loss": 1.0438, + "step": 840 + }, + { + "epoch": 12.55, + "learning_rate": 0.00016779999999999999, + "loss": 0.977, + "step": 841 + }, + { + "epoch": 12.57, + "learning_rate": 0.000168, + "loss": 0.5909, + "step": 842 + }, + { + "epoch": 12.58, + "learning_rate": 0.00016819999999999997, + "loss": 1.1691, + "step": 843 + }, + { + "epoch": 12.59, + "learning_rate": 0.0001684, + "loss": 1.4127, + "step": 844 + }, + { + "epoch": 12.61, + "learning_rate": 0.0001686, + "loss": 1.1452, + "step": 845 + }, + { + "epoch": 12.62, + "learning_rate": 0.00016879999999999998, + "loss": 0.858, + "step": 846 + }, + { + "epoch": 12.64, + "learning_rate": 0.000169, + "loss": 0.9272, + "step": 847 + }, + { + "epoch": 12.65, + "learning_rate": 0.00016919999999999997, + "loss": 0.8182, + "step": 848 + }, + { + "epoch": 12.67, + "learning_rate": 0.00016939999999999997, + "loss": 0.93, + "step": 849 + }, + { + "epoch": 12.68, + "learning_rate": 0.0001696, + "loss": 1.305, + "step": 850 + }, + { + "epoch": 12.7, + "learning_rate": 0.00016979999999999998, + "loss": 0.7852, + "step": 851 + }, + { + "epoch": 12.71, + "learning_rate": 0.00016999999999999999, + "loss": 0.6363, + "step": 852 + }, + { + "epoch": 12.73, + "learning_rate": 0.0001702, + "loss": 0.9209, + "step": 853 + }, + { + "epoch": 12.74, + "learning_rate": 0.00017039999999999997, + "loss": 0.5253, + "step": 854 + }, + { + "epoch": 12.76, + "learning_rate": 0.00017059999999999997, + "loss": 1.0001, + "step": 855 + }, + { + "epoch": 12.77, + "learning_rate": 0.0001708, + "loss": 0.9678, + "step": 856 + }, + { + "epoch": 12.79, + "learning_rate": 0.00017099999999999998, + "loss": 0.639, + "step": 857 + }, + { + "epoch": 12.8, + "learning_rate": 0.0001712, + "loss": 0.8503, + "step": 858 + }, + { + "epoch": 12.82, + "learning_rate": 0.0001714, + "loss": 0.6808, + "step": 859 + }, + { + "epoch": 12.83, + "learning_rate": 0.00017159999999999997, + "loss": 0.7688, + "step": 860 + }, + { + "epoch": 12.85, + "learning_rate": 0.00017179999999999997, + "loss": 1.008, + "step": 861 + }, + { + "epoch": 12.86, + "learning_rate": 0.000172, + "loss": 0.9021, + "step": 862 + }, + { + "epoch": 12.88, + "learning_rate": 0.00017219999999999998, + "loss": 0.9418, + "step": 863 + }, + { + "epoch": 12.89, + "learning_rate": 0.0001724, + "loss": 0.9163, + "step": 864 + }, + { + "epoch": 12.91, + "learning_rate": 0.0001726, + "loss": 1.0017, + "step": 865 + }, + { + "epoch": 12.92, + "learning_rate": 0.00017279999999999997, + "loss": 0.8404, + "step": 866 + }, + { + "epoch": 12.94, + "learning_rate": 0.00017299999999999998, + "loss": 0.6335, + "step": 867 + }, + { + "epoch": 12.95, + "learning_rate": 0.00017319999999999998, + "loss": 0.7219, + "step": 868 + }, + { + "epoch": 12.97, + "learning_rate": 0.00017339999999999996, + "loss": 1.103, + "step": 869 + }, + { + "epoch": 12.98, + "learning_rate": 0.0001736, + "loss": 0.7049, + "step": 870 + }, + { + "epoch": 13.0, + "learning_rate": 0.0001738, + "loss": 0.9147, + "step": 871 + }, + { + "epoch": 13.01, + "learning_rate": 0.00017399999999999997, + "loss": 0.91, + "step": 872 + }, + { + "epoch": 13.03, + "learning_rate": 0.00017419999999999998, + "loss": 0.8884, + "step": 873 + }, + { + "epoch": 13.04, + "learning_rate": 0.00017439999999999998, + "loss": 0.6004, + "step": 874 + }, + { + "epoch": 13.06, + "learning_rate": 0.00017459999999999996, + "loss": 0.5358, + "step": 875 + }, + { + "epoch": 13.07, + "learning_rate": 0.0001748, + "loss": 0.8575, + "step": 876 + }, + { + "epoch": 13.09, + "learning_rate": 0.000175, + "loss": 0.8457, + "step": 877 + }, + { + "epoch": 13.1, + "learning_rate": 0.00017519999999999998, + "loss": 0.7373, + "step": 878 + }, + { + "epoch": 13.12, + "learning_rate": 0.00017539999999999998, + "loss": 0.6553, + "step": 879 + }, + { + "epoch": 13.13, + "learning_rate": 0.00017559999999999999, + "loss": 0.7267, + "step": 880 + }, + { + "epoch": 13.15, + "learning_rate": 0.00017579999999999996, + "loss": 0.6778, + "step": 881 + }, + { + "epoch": 13.16, + "learning_rate": 0.000176, + "loss": 0.4163, + "step": 882 + }, + { + "epoch": 13.18, + "learning_rate": 0.0001762, + "loss": 1.0095, + "step": 883 + }, + { + "epoch": 13.19, + "learning_rate": 0.00017639999999999998, + "loss": 1.2363, + "step": 884 + }, + { + "epoch": 13.21, + "learning_rate": 0.00017659999999999998, + "loss": 0.9183, + "step": 885 + }, + { + "epoch": 13.22, + "learning_rate": 0.0001768, + "loss": 0.8321, + "step": 886 + }, + { + "epoch": 13.24, + "learning_rate": 0.00017699999999999997, + "loss": 0.9908, + "step": 887 + }, + { + "epoch": 13.25, + "learning_rate": 0.0001772, + "loss": 0.7905, + "step": 888 + }, + { + "epoch": 13.27, + "learning_rate": 0.0001774, + "loss": 1.2317, + "step": 889 + }, + { + "epoch": 13.28, + "learning_rate": 0.00017759999999999998, + "loss": 0.8439, + "step": 890 + }, + { + "epoch": 13.3, + "learning_rate": 0.00017779999999999998, + "loss": 1.0982, + "step": 891 + }, + { + "epoch": 13.31, + "learning_rate": 0.000178, + "loss": 0.7464, + "step": 892 + }, + { + "epoch": 13.33, + "learning_rate": 0.00017819999999999997, + "loss": 0.602, + "step": 893 + }, + { + "epoch": 13.34, + "learning_rate": 0.0001784, + "loss": 0.9053, + "step": 894 + }, + { + "epoch": 13.36, + "learning_rate": 0.0001786, + "loss": 0.6464, + "step": 895 + }, + { + "epoch": 13.37, + "learning_rate": 0.00017879999999999998, + "loss": 1.0542, + "step": 896 + }, + { + "epoch": 13.39, + "learning_rate": 0.000179, + "loss": 0.5522, + "step": 897 + }, + { + "epoch": 13.4, + "learning_rate": 0.0001792, + "loss": 0.7257, + "step": 898 + }, + { + "epoch": 13.42, + "learning_rate": 0.00017939999999999997, + "loss": 0.6098, + "step": 899 + }, + { + "epoch": 13.43, + "learning_rate": 0.0001796, + "loss": 0.5454, + "step": 900 + }, + { + "epoch": 13.45, + "learning_rate": 0.0001798, + "loss": 0.7657, + "step": 901 + }, + { + "epoch": 13.46, + "learning_rate": 0.00017999999999999998, + "loss": 0.78, + "step": 902 + }, + { + "epoch": 13.48, + "learning_rate": 0.0001802, + "loss": 0.81, + "step": 903 + }, + { + "epoch": 13.49, + "learning_rate": 0.0001804, + "loss": 0.9637, + "step": 904 + }, + { + "epoch": 13.51, + "learning_rate": 0.00018059999999999997, + "loss": 0.7132, + "step": 905 + }, + { + "epoch": 13.52, + "learning_rate": 0.00018079999999999998, + "loss": 0.9006, + "step": 906 + }, + { + "epoch": 13.54, + "learning_rate": 0.000181, + "loss": 0.7289, + "step": 907 + }, + { + "epoch": 13.55, + "learning_rate": 0.00018119999999999999, + "loss": 0.8941, + "step": 908 + }, + { + "epoch": 13.57, + "learning_rate": 0.00018119999999999999, + "loss": 0.8517, + "step": 909 + }, + { + "epoch": 13.58, + "learning_rate": 0.0001814, + "loss": 0.6031, + "step": 910 + }, + { + "epoch": 13.59, + "learning_rate": 0.00018159999999999997, + "loss": 0.7666, + "step": 911 + }, + { + "epoch": 13.61, + "learning_rate": 0.00018179999999999997, + "loss": 0.8234, + "step": 912 + }, + { + "epoch": 13.62, + "learning_rate": 0.00018199999999999998, + "loss": 1.0899, + "step": 913 + }, + { + "epoch": 13.64, + "learning_rate": 0.00018219999999999996, + "loss": 0.2315, + "step": 914 + }, + { + "epoch": 13.65, + "learning_rate": 0.0001824, + "loss": 0.5115, + "step": 915 + }, + { + "epoch": 13.67, + "learning_rate": 0.0001826, + "loss": 0.9171, + "step": 916 + }, + { + "epoch": 13.68, + "learning_rate": 0.00018279999999999997, + "loss": 1.2401, + "step": 917 + }, + { + "epoch": 13.7, + "learning_rate": 0.00018299999999999998, + "loss": 0.5019, + "step": 918 + }, + { + "epoch": 13.71, + "learning_rate": 0.00018319999999999998, + "loss": 0.6102, + "step": 919 + }, + { + "epoch": 13.73, + "learning_rate": 0.00018339999999999996, + "loss": 0.5548, + "step": 920 + }, + { + "epoch": 13.74, + "learning_rate": 0.0001836, + "loss": 0.8196, + "step": 921 + }, + { + "epoch": 13.76, + "learning_rate": 0.0001838, + "loss": 0.4653, + "step": 922 + }, + { + "epoch": 13.77, + "learning_rate": 0.00018399999999999997, + "loss": 0.6946, + "step": 923 + }, + { + "epoch": 13.79, + "learning_rate": 0.00018419999999999998, + "loss": 0.8383, + "step": 924 + }, + { + "epoch": 13.8, + "learning_rate": 0.00018439999999999998, + "loss": 0.757, + "step": 925 + }, + { + "epoch": 13.82, + "learning_rate": 0.00018459999999999996, + "loss": 1.1668, + "step": 926 + }, + { + "epoch": 13.83, + "learning_rate": 0.0001848, + "loss": 1.2177, + "step": 927 + }, + { + "epoch": 13.85, + "learning_rate": 0.000185, + "loss": 1.1548, + "step": 928 + }, + { + "epoch": 13.86, + "learning_rate": 0.00018519999999999998, + "loss": 0.7392, + "step": 929 + }, + { + "epoch": 13.88, + "learning_rate": 0.00018539999999999998, + "loss": 0.659, + "step": 930 + }, + { + "epoch": 13.89, + "learning_rate": 0.00018559999999999998, + "loss": 0.4055, + "step": 931 + }, + { + "epoch": 13.91, + "learning_rate": 0.00018579999999999996, + "loss": 0.7573, + "step": 932 + }, + { + "epoch": 13.92, + "learning_rate": 0.000186, + "loss": 1.0789, + "step": 933 + }, + { + "epoch": 13.94, + "learning_rate": 0.0001862, + "loss": 1.0103, + "step": 934 + }, + { + "epoch": 13.95, + "learning_rate": 0.00018639999999999998, + "loss": 0.626, + "step": 935 + }, + { + "epoch": 13.97, + "learning_rate": 0.00018659999999999998, + "loss": 0.5344, + "step": 936 + }, + { + "epoch": 13.98, + "learning_rate": 0.0001868, + "loss": 1.0478, + "step": 937 + }, + { + "epoch": 14.0, + "learning_rate": 0.00018699999999999996, + "loss": 1.033, + "step": 938 + }, + { + "epoch": 14.01, + "learning_rate": 0.0001872, + "loss": 1.1783, + "step": 939 + }, + { + "epoch": 14.03, + "learning_rate": 0.0001874, + "loss": 1.0731, + "step": 940 + }, + { + "epoch": 14.04, + "learning_rate": 0.00018759999999999998, + "loss": 0.9886, + "step": 941 + }, + { + "epoch": 14.06, + "learning_rate": 0.00018779999999999998, + "loss": 0.5971, + "step": 942 + }, + { + "epoch": 14.07, + "learning_rate": 0.000188, + "loss": 0.9326, + "step": 943 + }, + { + "epoch": 14.09, + "learning_rate": 0.00018819999999999997, + "loss": 0.9478, + "step": 944 + }, + { + "epoch": 14.1, + "learning_rate": 0.00018839999999999997, + "loss": 0.5437, + "step": 945 + }, + { + "epoch": 14.12, + "learning_rate": 0.0001886, + "loss": 0.9006, + "step": 946 + }, + { + "epoch": 14.13, + "learning_rate": 0.00018879999999999998, + "loss": 1.1322, + "step": 947 + }, + { + "epoch": 14.15, + "learning_rate": 0.00018899999999999999, + "loss": 0.7737, + "step": 948 + }, + { + "epoch": 14.16, + "learning_rate": 0.0001892, + "loss": 0.658, + "step": 949 + }, + { + "epoch": 14.18, + "learning_rate": 0.00018939999999999997, + "loss": 0.8001, + "step": 950 + }, + { + "epoch": 14.19, + "learning_rate": 0.00018959999999999997, + "loss": 1.0102, + "step": 951 + }, + { + "epoch": 14.21, + "learning_rate": 0.0001898, + "loss": 0.6759, + "step": 952 + }, + { + "epoch": 14.22, + "learning_rate": 0.00018999999999999998, + "loss": 0.6328, + "step": 953 + }, + { + "epoch": 14.24, + "learning_rate": 0.0001902, + "loss": 0.4403, + "step": 954 + }, + { + "epoch": 14.25, + "learning_rate": 0.0001904, + "loss": 0.6944, + "step": 955 + }, + { + "epoch": 14.27, + "learning_rate": 0.00019059999999999997, + "loss": 0.5963, + "step": 956 + }, + { + "epoch": 14.28, + "learning_rate": 0.00019079999999999998, + "loss": 0.5515, + "step": 957 + }, + { + "epoch": 14.3, + "learning_rate": 0.000191, + "loss": 0.9895, + "step": 958 + }, + { + "epoch": 14.31, + "learning_rate": 0.00019119999999999999, + "loss": 0.5896, + "step": 959 + }, + { + "epoch": 14.33, + "learning_rate": 0.0001914, + "loss": 0.6857, + "step": 960 + }, + { + "epoch": 14.34, + "learning_rate": 0.0001916, + "loss": 0.9114, + "step": 961 + }, + { + "epoch": 14.36, + "learning_rate": 0.00019179999999999997, + "loss": 0.7469, + "step": 962 + }, + { + "epoch": 14.37, + "learning_rate": 0.00019199999999999998, + "loss": 0.6013, + "step": 963 + }, + { + "epoch": 14.39, + "learning_rate": 0.0001922, + "loss": 1.166, + "step": 964 + }, + { + "epoch": 14.4, + "learning_rate": 0.0001924, + "loss": 0.5838, + "step": 965 + }, + { + "epoch": 14.42, + "learning_rate": 0.0001926, + "loss": 1.0111, + "step": 966 + }, + { + "epoch": 14.43, + "learning_rate": 0.0001928, + "loss": 0.9963, + "step": 967 + }, + { + "epoch": 14.45, + "learning_rate": 0.00019299999999999997, + "loss": 0.9305, + "step": 968 + }, + { + "epoch": 14.46, + "learning_rate": 0.00019319999999999998, + "loss": 0.6325, + "step": 969 + }, + { + "epoch": 14.48, + "learning_rate": 0.0001934, + "loss": 0.6293, + "step": 970 + }, + { + "epoch": 14.49, + "learning_rate": 0.0001936, + "loss": 0.6669, + "step": 971 + }, + { + "epoch": 14.51, + "learning_rate": 0.0001938, + "loss": 0.4591, + "step": 972 + }, + { + "epoch": 14.52, + "learning_rate": 0.00019399999999999997, + "loss": 0.6482, + "step": 973 + }, + { + "epoch": 14.54, + "learning_rate": 0.00019419999999999998, + "loss": 0.5052, + "step": 974 + }, + { + "epoch": 14.55, + "learning_rate": 0.00019439999999999998, + "loss": 0.5585, + "step": 975 + }, + { + "epoch": 14.57, + "learning_rate": 0.00019459999999999996, + "loss": 1.0939, + "step": 976 + }, + { + "epoch": 14.58, + "learning_rate": 0.0001948, + "loss": 0.5625, + "step": 977 + }, + { + "epoch": 14.59, + "learning_rate": 0.000195, + "loss": 0.7092, + "step": 978 + }, + { + "epoch": 14.61, + "learning_rate": 0.00019519999999999997, + "loss": 0.6617, + "step": 979 + }, + { + "epoch": 14.62, + "learning_rate": 0.00019539999999999998, + "loss": 0.4754, + "step": 980 + }, + { + "epoch": 14.64, + "learning_rate": 0.00019559999999999998, + "loss": 0.5685, + "step": 981 + }, + { + "epoch": 14.65, + "learning_rate": 0.00019579999999999996, + "loss": 0.5839, + "step": 982 + }, + { + "epoch": 14.67, + "learning_rate": 0.00019599999999999997, + "loss": 0.6545, + "step": 983 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001962, + "loss": 0.6199, + "step": 984 + }, + { + "epoch": 14.7, + "learning_rate": 0.00019639999999999998, + "loss": 1.2447, + "step": 985 + }, + { + "epoch": 14.71, + "learning_rate": 0.00019659999999999998, + "loss": 0.6477, + "step": 986 + }, + { + "epoch": 14.73, + "learning_rate": 0.00019679999999999999, + "loss": 0.8531, + "step": 987 + }, + { + "epoch": 14.74, + "learning_rate": 0.00019699999999999996, + "loss": 1.2488, + "step": 988 + }, + { + "epoch": 14.76, + "learning_rate": 0.00019719999999999997, + "loss": 1.353, + "step": 989 + }, + { + "epoch": 14.77, + "learning_rate": 0.0001974, + "loss": 0.9964, + "step": 990 + }, + { + "epoch": 14.79, + "learning_rate": 0.00019759999999999998, + "loss": 1.2242, + "step": 991 + }, + { + "epoch": 14.8, + "learning_rate": 0.00019779999999999998, + "loss": 1.2178, + "step": 992 + }, + { + "epoch": 14.82, + "learning_rate": 0.000198, + "loss": 0.9707, + "step": 993 + }, + { + "epoch": 14.83, + "learning_rate": 0.00019819999999999997, + "loss": 1.4878, + "step": 994 + }, + { + "epoch": 14.85, + "learning_rate": 0.00019839999999999997, + "loss": 1.3396, + "step": 995 + }, + { + "epoch": 14.86, + "learning_rate": 0.0001986, + "loss": 1.1865, + "step": 996 + }, + { + "epoch": 14.88, + "learning_rate": 0.00019879999999999998, + "loss": 1.0701, + "step": 997 + }, + { + "epoch": 14.89, + "learning_rate": 0.00019899999999999999, + "loss": 1.0805, + "step": 998 + }, + { + "epoch": 14.91, + "learning_rate": 0.0001992, + "loss": 0.8146, + "step": 999 + }, + { + "epoch": 14.92, + "learning_rate": 0.00019939999999999997, + "loss": 0.9285, + "step": 1000 + }, + { + "epoch": 14.92, + "eval_accuracy": 0.7139011257953989, + "eval_f1": 0.7036851398351651, + "eval_loss": 0.991243302822113, + "eval_runtime": 344.8445, + "eval_samples_per_second": 11.849, + "eval_steps_per_second": 0.742, + "step": 1000 + }, + { + "epoch": 14.94, + "learning_rate": 0.00019959999999999997, + "loss": 0.7898, + "step": 1001 + }, + { + "epoch": 14.95, + "learning_rate": 0.0001998, + "loss": 1.1387, + "step": 1002 + }, + { + "epoch": 14.97, + "learning_rate": 0.00019999999999999998, + "loss": 0.2668, + "step": 1003 + }, + { + "epoch": 14.98, + "learning_rate": 0.0002002, + "loss": 0.3939, + "step": 1004 + }, + { + "epoch": 15.0, + "learning_rate": 0.0002004, + "loss": 0.7606, + "step": 1005 + }, + { + "epoch": 15.01, + "learning_rate": 0.00020059999999999997, + "loss": 1.0102, + "step": 1006 + }, + { + "epoch": 15.03, + "learning_rate": 0.00020079999999999997, + "loss": 0.6444, + "step": 1007 + }, + { + "epoch": 15.04, + "learning_rate": 0.000201, + "loss": 1.0351, + "step": 1008 + }, + { + "epoch": 15.06, + "learning_rate": 0.00020119999999999998, + "loss": 0.7079, + "step": 1009 + }, + { + "epoch": 15.07, + "learning_rate": 0.0002014, + "loss": 0.281, + "step": 1010 + }, + { + "epoch": 15.09, + "learning_rate": 0.0002016, + "loss": 0.6687, + "step": 1011 + }, + { + "epoch": 15.1, + "learning_rate": 0.00020179999999999997, + "loss": 0.8258, + "step": 1012 + }, + { + "epoch": 15.12, + "learning_rate": 0.00020199999999999998, + "loss": 0.7249, + "step": 1013 + }, + { + "epoch": 15.13, + "learning_rate": 0.0002022, + "loss": 0.776, + "step": 1014 + }, + { + "epoch": 15.15, + "learning_rate": 0.0002024, + "loss": 0.9554, + "step": 1015 + }, + { + "epoch": 15.16, + "learning_rate": 0.0002026, + "loss": 0.364, + "step": 1016 + }, + { + "epoch": 15.18, + "learning_rate": 0.0002028, + "loss": 0.7906, + "step": 1017 + }, + { + "epoch": 15.19, + "learning_rate": 0.00020299999999999997, + "loss": 0.6387, + "step": 1018 + }, + { + "epoch": 15.21, + "learning_rate": 0.00020319999999999998, + "loss": 0.4794, + "step": 1019 + }, + { + "epoch": 15.22, + "learning_rate": 0.00020339999999999998, + "loss": 1.0287, + "step": 1020 + }, + { + "epoch": 15.24, + "learning_rate": 0.00020359999999999996, + "loss": 0.7316, + "step": 1021 + }, + { + "epoch": 15.25, + "learning_rate": 0.0002038, + "loss": 0.8346, + "step": 1022 + }, + { + "epoch": 15.27, + "learning_rate": 0.000204, + "loss": 1.083, + "step": 1023 + }, + { + "epoch": 15.28, + "learning_rate": 0.00020419999999999998, + "loss": 0.9167, + "step": 1024 + }, + { + "epoch": 15.3, + "learning_rate": 0.00020439999999999998, + "loss": 0.9092, + "step": 1025 + }, + { + "epoch": 15.31, + "learning_rate": 0.00020459999999999999, + "loss": 0.6361, + "step": 1026 + }, + { + "epoch": 15.33, + "learning_rate": 0.00020479999999999996, + "loss": 0.8581, + "step": 1027 + }, + { + "epoch": 15.34, + "learning_rate": 0.000205, + "loss": 1.1408, + "step": 1028 + }, + { + "epoch": 15.36, + "learning_rate": 0.0002052, + "loss": 0.6805, + "step": 1029 + }, + { + "epoch": 15.37, + "learning_rate": 0.00020539999999999998, + "loss": 1.0752, + "step": 1030 + }, + { + "epoch": 15.39, + "learning_rate": 0.00020559999999999998, + "loss": 0.8579, + "step": 1031 + }, + { + "epoch": 15.4, + "learning_rate": 0.0002058, + "loss": 0.7609, + "step": 1032 + }, + { + "epoch": 15.42, + "learning_rate": 0.00020599999999999997, + "loss": 0.6567, + "step": 1033 + }, + { + "epoch": 15.43, + "learning_rate": 0.0002062, + "loss": 0.8744, + "step": 1034 + }, + { + "epoch": 15.45, + "learning_rate": 0.00020639999999999998, + "loss": 1.0182, + "step": 1035 + }, + { + "epoch": 15.46, + "learning_rate": 0.00020659999999999998, + "loss": 0.5767, + "step": 1036 + }, + { + "epoch": 15.48, + "learning_rate": 0.00020679999999999999, + "loss": 0.7308, + "step": 1037 + }, + { + "epoch": 15.49, + "learning_rate": 0.00020699999999999996, + "loss": 0.6098, + "step": 1038 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020719999999999997, + "loss": 1.1058, + "step": 1039 + }, + { + "epoch": 15.52, + "learning_rate": 0.0002074, + "loss": 0.471, + "step": 1040 + }, + { + "epoch": 15.54, + "learning_rate": 0.00020759999999999998, + "loss": 0.9943, + "step": 1041 + }, + { + "epoch": 15.55, + "learning_rate": 0.00020779999999999998, + "loss": 0.598, + "step": 1042 + }, + { + "epoch": 15.57, + "learning_rate": 0.000208, + "loss": 0.8455, + "step": 1043 + }, + { + "epoch": 15.58, + "learning_rate": 0.00020819999999999996, + "loss": 0.7184, + "step": 1044 + }, + { + "epoch": 15.59, + "learning_rate": 0.00020839999999999997, + "loss": 1.0079, + "step": 1045 + }, + { + "epoch": 15.61, + "learning_rate": 0.0002086, + "loss": 0.5931, + "step": 1046 + }, + { + "epoch": 15.62, + "learning_rate": 0.00020879999999999998, + "loss": 0.6176, + "step": 1047 + }, + { + "epoch": 15.64, + "learning_rate": 0.00020899999999999998, + "loss": 0.6809, + "step": 1048 + }, + { + "epoch": 15.65, + "learning_rate": 0.0002092, + "loss": 0.3144, + "step": 1049 + }, + { + "epoch": 15.67, + "learning_rate": 0.00020939999999999997, + "loss": 0.7481, + "step": 1050 + }, + { + "epoch": 15.68, + "learning_rate": 0.00020959999999999997, + "loss": 0.5394, + "step": 1051 + }, + { + "epoch": 15.7, + "learning_rate": 0.0002098, + "loss": 0.6844, + "step": 1052 + }, + { + "epoch": 15.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.6366, + "step": 1053 + }, + { + "epoch": 15.73, + "learning_rate": 0.0002102, + "loss": 0.585, + "step": 1054 + }, + { + "epoch": 15.74, + "learning_rate": 0.0002104, + "loss": 0.6458, + "step": 1055 + }, + { + "epoch": 15.76, + "learning_rate": 0.00021059999999999997, + "loss": 1.1576, + "step": 1056 + }, + { + "epoch": 15.77, + "learning_rate": 0.00021079999999999997, + "loss": 0.7424, + "step": 1057 + }, + { + "epoch": 15.79, + "learning_rate": 0.00021099999999999998, + "loss": 0.8765, + "step": 1058 + }, + { + "epoch": 15.8, + "learning_rate": 0.00021119999999999996, + "loss": 0.6492, + "step": 1059 + }, + { + "epoch": 15.82, + "learning_rate": 0.0002114, + "loss": 0.9297, + "step": 1060 + }, + { + "epoch": 15.83, + "learning_rate": 0.0002116, + "loss": 0.7282, + "step": 1061 + }, + { + "epoch": 15.85, + "learning_rate": 0.00021179999999999997, + "loss": 1.1035, + "step": 1062 + }, + { + "epoch": 15.86, + "learning_rate": 0.00021199999999999998, + "loss": 0.8628, + "step": 1063 + }, + { + "epoch": 15.88, + "learning_rate": 0.00021219999999999998, + "loss": 0.7203, + "step": 1064 + }, + { + "epoch": 15.89, + "learning_rate": 0.00021239999999999996, + "loss": 0.5733, + "step": 1065 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002126, + "loss": 0.6324, + "step": 1066 + }, + { + "epoch": 15.92, + "learning_rate": 0.0002128, + "loss": 0.899, + "step": 1067 + }, + { + "epoch": 15.94, + "learning_rate": 0.00021299999999999997, + "loss": 0.7308, + "step": 1068 + }, + { + "epoch": 15.95, + "learning_rate": 0.00021319999999999998, + "loss": 0.8845, + "step": 1069 + }, + { + "epoch": 15.97, + "learning_rate": 0.00021339999999999998, + "loss": 0.2766, + "step": 1070 + }, + { + "epoch": 15.98, + "learning_rate": 0.00021359999999999996, + "loss": 0.7306, + "step": 1071 + }, + { + "epoch": 16.0, + "learning_rate": 0.0002138, + "loss": 0.9311, + "step": 1072 + }, + { + "epoch": 16.01, + "learning_rate": 0.000214, + "loss": 0.7444, + "step": 1073 + }, + { + "epoch": 16.03, + "learning_rate": 0.00021419999999999998, + "loss": 0.9739, + "step": 1074 + }, + { + "epoch": 16.04, + "learning_rate": 0.00021439999999999998, + "loss": 0.9668, + "step": 1075 + }, + { + "epoch": 16.06, + "learning_rate": 0.00021459999999999998, + "loss": 0.7567, + "step": 1076 + }, + { + "epoch": 16.07, + "learning_rate": 0.00021479999999999996, + "loss": 0.4541, + "step": 1077 + }, + { + "epoch": 16.09, + "learning_rate": 0.000215, + "loss": 0.443, + "step": 1078 + }, + { + "epoch": 16.1, + "learning_rate": 0.0002152, + "loss": 0.4377, + "step": 1079 + }, + { + "epoch": 16.12, + "learning_rate": 0.00021539999999999998, + "loss": 0.4075, + "step": 1080 + }, + { + "epoch": 16.13, + "learning_rate": 0.00021559999999999998, + "loss": 1.0803, + "step": 1081 + }, + { + "epoch": 16.15, + "learning_rate": 0.0002158, + "loss": 0.7278, + "step": 1082 + }, + { + "epoch": 16.16, + "learning_rate": 0.00021599999999999996, + "loss": 0.4957, + "step": 1083 + }, + { + "epoch": 16.18, + "learning_rate": 0.0002162, + "loss": 0.4261, + "step": 1084 + }, + { + "epoch": 16.19, + "learning_rate": 0.0002164, + "loss": 0.3906, + "step": 1085 + }, + { + "epoch": 16.21, + "learning_rate": 0.00021659999999999998, + "loss": 0.7426, + "step": 1086 + }, + { + "epoch": 16.22, + "learning_rate": 0.00021679999999999998, + "loss": 0.6193, + "step": 1087 + }, + { + "epoch": 16.24, + "learning_rate": 0.000217, + "loss": 0.5344, + "step": 1088 + }, + { + "epoch": 16.25, + "learning_rate": 0.00021719999999999997, + "loss": 0.9232, + "step": 1089 + }, + { + "epoch": 16.27, + "learning_rate": 0.0002174, + "loss": 0.5786, + "step": 1090 + }, + { + "epoch": 16.28, + "learning_rate": 0.0002176, + "loss": 0.8235, + "step": 1091 + }, + { + "epoch": 16.3, + "learning_rate": 0.00021779999999999998, + "loss": 0.9143, + "step": 1092 + }, + { + "epoch": 16.31, + "learning_rate": 0.00021799999999999999, + "loss": 0.3326, + "step": 1093 + }, + { + "epoch": 16.33, + "learning_rate": 0.0002182, + "loss": 0.2456, + "step": 1094 + }, + { + "epoch": 16.34, + "learning_rate": 0.00021839999999999997, + "loss": 0.858, + "step": 1095 + }, + { + "epoch": 16.36, + "learning_rate": 0.00021859999999999997, + "loss": 1.022, + "step": 1096 + }, + { + "epoch": 16.37, + "learning_rate": 0.00021879999999999995, + "loss": 0.5284, + "step": 1097 + }, + { + "epoch": 16.39, + "learning_rate": 0.00021899999999999998, + "loss": 0.3627, + "step": 1098 + }, + { + "epoch": 16.4, + "learning_rate": 0.0002192, + "loss": 0.9552, + "step": 1099 + }, + { + "epoch": 16.42, + "learning_rate": 0.00021939999999999997, + "loss": 0.619, + "step": 1100 + }, + { + "epoch": 16.43, + "learning_rate": 0.00021959999999999997, + "loss": 0.2164, + "step": 1101 + }, + { + "epoch": 16.45, + "learning_rate": 0.00021979999999999998, + "loss": 0.7193, + "step": 1102 + }, + { + "epoch": 16.46, + "learning_rate": 0.00021999999999999995, + "loss": 0.7335, + "step": 1103 + }, + { + "epoch": 16.48, + "learning_rate": 0.00022019999999999999, + "loss": 0.367, + "step": 1104 + }, + { + "epoch": 16.49, + "learning_rate": 0.0002204, + "loss": 0.4086, + "step": 1105 + }, + { + "epoch": 16.51, + "learning_rate": 0.00022059999999999997, + "loss": 1.1209, + "step": 1106 + }, + { + "epoch": 16.52, + "learning_rate": 0.00022079999999999997, + "loss": 0.4251, + "step": 1107 + }, + { + "epoch": 16.54, + "learning_rate": 0.00022099999999999998, + "loss": 0.2345, + "step": 1108 + }, + { + "epoch": 16.55, + "learning_rate": 0.00022119999999999996, + "loss": 0.9918, + "step": 1109 + }, + { + "epoch": 16.57, + "learning_rate": 0.0002214, + "loss": 0.5629, + "step": 1110 + }, + { + "epoch": 16.58, + "learning_rate": 0.0002216, + "loss": 0.9618, + "step": 1111 + }, + { + "epoch": 16.59, + "learning_rate": 0.00022179999999999997, + "loss": 0.9031, + "step": 1112 + }, + { + "epoch": 16.61, + "learning_rate": 0.00022199999999999998, + "loss": 0.7487, + "step": 1113 + }, + { + "epoch": 16.62, + "learning_rate": 0.00022219999999999998, + "loss": 0.8206, + "step": 1114 + }, + { + "epoch": 16.64, + "learning_rate": 0.00022239999999999996, + "loss": 0.3871, + "step": 1115 + }, + { + "epoch": 16.65, + "learning_rate": 0.0002226, + "loss": 0.6073, + "step": 1116 + }, + { + "epoch": 16.67, + "learning_rate": 0.0002228, + "loss": 0.3924, + "step": 1117 + }, + { + "epoch": 16.68, + "learning_rate": 0.00022299999999999997, + "loss": 0.6858, + "step": 1118 + }, + { + "epoch": 16.7, + "learning_rate": 0.00022319999999999998, + "loss": 0.51, + "step": 1119 + }, + { + "epoch": 16.71, + "learning_rate": 0.00022339999999999998, + "loss": 0.8494, + "step": 1120 + }, + { + "epoch": 16.73, + "learning_rate": 0.00022359999999999996, + "loss": 0.7321, + "step": 1121 + }, + { + "epoch": 16.74, + "learning_rate": 0.0002238, + "loss": 0.9092, + "step": 1122 + }, + { + "epoch": 16.76, + "learning_rate": 0.000224, + "loss": 0.8845, + "step": 1123 + }, + { + "epoch": 16.77, + "learning_rate": 0.00022419999999999997, + "loss": 1.2335, + "step": 1124 + }, + { + "epoch": 16.79, + "learning_rate": 0.00022439999999999998, + "loss": 1.1761, + "step": 1125 + }, + { + "epoch": 16.8, + "learning_rate": 0.00022459999999999998, + "loss": 1.3628, + "step": 1126 + }, + { + "epoch": 16.82, + "learning_rate": 0.00022479999999999996, + "loss": 0.6865, + "step": 1127 + }, + { + "epoch": 16.83, + "learning_rate": 0.000225, + "loss": 1.1485, + "step": 1128 + }, + { + "epoch": 16.85, + "learning_rate": 0.0002252, + "loss": 0.9444, + "step": 1129 + }, + { + "epoch": 16.86, + "learning_rate": 0.00022539999999999998, + "loss": 0.3779, + "step": 1130 + }, + { + "epoch": 16.88, + "learning_rate": 0.00022559999999999998, + "loss": 0.7367, + "step": 1131 + }, + { + "epoch": 16.89, + "learning_rate": 0.00022579999999999999, + "loss": 0.7024, + "step": 1132 + }, + { + "epoch": 16.91, + "learning_rate": 0.00022599999999999996, + "loss": 0.6302, + "step": 1133 + }, + { + "epoch": 16.92, + "learning_rate": 0.00022619999999999997, + "loss": 0.7474, + "step": 1134 + }, + { + "epoch": 16.94, + "learning_rate": 0.0002264, + "loss": 0.9909, + "step": 1135 + }, + { + "epoch": 16.95, + "learning_rate": 0.00022659999999999998, + "loss": 0.8745, + "step": 1136 + }, + { + "epoch": 16.97, + "learning_rate": 0.00022679999999999998, + "loss": 0.4185, + "step": 1137 + }, + { + "epoch": 16.98, + "learning_rate": 0.000227, + "loss": 0.9698, + "step": 1138 + }, + { + "epoch": 17.0, + "learning_rate": 0.00022719999999999997, + "loss": 1.022, + "step": 1139 + }, + { + "epoch": 17.01, + "learning_rate": 0.00022739999999999997, + "loss": 1.537, + "step": 1140 + }, + { + "epoch": 17.03, + "learning_rate": 0.0002276, + "loss": 0.7912, + "step": 1141 + }, + { + "epoch": 17.04, + "learning_rate": 0.00022779999999999998, + "loss": 0.8698, + "step": 1142 + }, + { + "epoch": 17.06, + "learning_rate": 0.00022799999999999999, + "loss": 0.7288, + "step": 1143 + }, + { + "epoch": 17.07, + "learning_rate": 0.0002282, + "loss": 0.2158, + "step": 1144 + }, + { + "epoch": 17.09, + "learning_rate": 0.00022839999999999997, + "loss": 0.631, + "step": 1145 + }, + { + "epoch": 17.1, + "learning_rate": 0.00022859999999999997, + "loss": 0.8167, + "step": 1146 + }, + { + "epoch": 17.12, + "learning_rate": 0.0002288, + "loss": 0.4729, + "step": 1147 + }, + { + "epoch": 17.13, + "learning_rate": 0.00022899999999999998, + "loss": 0.8643, + "step": 1148 + }, + { + "epoch": 17.15, + "learning_rate": 0.0002292, + "loss": 0.5403, + "step": 1149 + }, + { + "epoch": 17.16, + "learning_rate": 0.0002294, + "loss": 0.7748, + "step": 1150 + }, + { + "epoch": 17.18, + "learning_rate": 0.00022959999999999997, + "loss": 0.9571, + "step": 1151 + }, + { + "epoch": 17.19, + "learning_rate": 0.00022979999999999997, + "loss": 0.5619, + "step": 1152 + }, + { + "epoch": 17.21, + "learning_rate": 0.00023, + "loss": 0.6504, + "step": 1153 + }, + { + "epoch": 17.22, + "learning_rate": 0.00023019999999999998, + "loss": 0.5057, + "step": 1154 + }, + { + "epoch": 17.24, + "learning_rate": 0.0002304, + "loss": 0.481, + "step": 1155 + }, + { + "epoch": 17.25, + "learning_rate": 0.0002306, + "loss": 1.1169, + "step": 1156 + }, + { + "epoch": 17.27, + "learning_rate": 0.00023079999999999997, + "loss": 0.8684, + "step": 1157 + }, + { + "epoch": 17.28, + "learning_rate": 0.00023099999999999998, + "loss": 0.5652, + "step": 1158 + }, + { + "epoch": 17.3, + "learning_rate": 0.0002312, + "loss": 0.9157, + "step": 1159 + }, + { + "epoch": 17.31, + "learning_rate": 0.0002314, + "loss": 0.912, + "step": 1160 + }, + { + "epoch": 17.33, + "learning_rate": 0.0002316, + "loss": 1.064, + "step": 1161 + }, + { + "epoch": 17.34, + "learning_rate": 0.00023179999999999997, + "loss": 0.481, + "step": 1162 + }, + { + "epoch": 17.36, + "learning_rate": 0.00023199999999999997, + "loss": 1.0895, + "step": 1163 + }, + { + "epoch": 17.37, + "learning_rate": 0.00023219999999999998, + "loss": 0.9731, + "step": 1164 + }, + { + "epoch": 17.39, + "learning_rate": 0.00023239999999999996, + "loss": 0.7848, + "step": 1165 + }, + { + "epoch": 17.4, + "learning_rate": 0.00023259999999999996, + "loss": 0.8208, + "step": 1166 + }, + { + "epoch": 17.42, + "learning_rate": 0.0002328, + "loss": 0.3356, + "step": 1167 + }, + { + "epoch": 17.43, + "learning_rate": 0.00023299999999999997, + "loss": 0.5094, + "step": 1168 + }, + { + "epoch": 17.45, + "learning_rate": 0.00023319999999999998, + "loss": 0.5053, + "step": 1169 + }, + { + "epoch": 17.46, + "learning_rate": 0.00023339999999999998, + "loss": 0.6877, + "step": 1170 + }, + { + "epoch": 17.48, + "learning_rate": 0.00023359999999999996, + "loss": 0.4477, + "step": 1171 + }, + { + "epoch": 17.49, + "learning_rate": 0.00023379999999999996, + "loss": 0.5053, + "step": 1172 + }, + { + "epoch": 17.51, + "learning_rate": 0.000234, + "loss": 0.9771, + "step": 1173 + }, + { + "epoch": 17.52, + "learning_rate": 0.00023419999999999997, + "loss": 1.0193, + "step": 1174 + }, + { + "epoch": 17.54, + "learning_rate": 0.00023439999999999998, + "loss": 0.9896, + "step": 1175 + }, + { + "epoch": 17.55, + "learning_rate": 0.00023459999999999998, + "loss": 0.7799, + "step": 1176 + }, + { + "epoch": 17.57, + "learning_rate": 0.00023479999999999996, + "loss": 1.0455, + "step": 1177 + }, + { + "epoch": 17.58, + "learning_rate": 0.00023499999999999997, + "loss": 0.5766, + "step": 1178 + }, + { + "epoch": 17.59, + "learning_rate": 0.0002352, + "loss": 1.0561, + "step": 1179 + }, + { + "epoch": 17.61, + "learning_rate": 0.00023539999999999998, + "loss": 0.6531, + "step": 1180 + }, + { + "epoch": 17.62, + "learning_rate": 0.00023559999999999998, + "loss": 1.1587, + "step": 1181 + }, + { + "epoch": 17.64, + "learning_rate": 0.00023579999999999999, + "loss": 0.3106, + "step": 1182 + }, + { + "epoch": 17.65, + "learning_rate": 0.00023599999999999996, + "loss": 0.9023, + "step": 1183 + }, + { + "epoch": 17.67, + "learning_rate": 0.00023619999999999997, + "loss": 0.5792, + "step": 1184 + }, + { + "epoch": 17.68, + "learning_rate": 0.0002364, + "loss": 0.7693, + "step": 1185 + }, + { + "epoch": 17.7, + "learning_rate": 0.00023659999999999998, + "loss": 0.4481, + "step": 1186 + }, + { + "epoch": 17.71, + "learning_rate": 0.00023679999999999998, + "loss": 0.6329, + "step": 1187 + }, + { + "epoch": 17.73, + "learning_rate": 0.000237, + "loss": 0.4823, + "step": 1188 + }, + { + "epoch": 17.74, + "learning_rate": 0.00023719999999999997, + "loss": 0.3002, + "step": 1189 + }, + { + "epoch": 17.76, + "learning_rate": 0.00023739999999999997, + "loss": 0.8866, + "step": 1190 + }, + { + "epoch": 17.77, + "learning_rate": 0.0002376, + "loss": 0.6895, + "step": 1191 + }, + { + "epoch": 17.79, + "learning_rate": 0.00023779999999999998, + "loss": 1.239, + "step": 1192 + }, + { + "epoch": 17.8, + "learning_rate": 0.00023799999999999998, + "loss": 0.8187, + "step": 1193 + }, + { + "epoch": 17.82, + "learning_rate": 0.0002382, + "loss": 0.6027, + "step": 1194 + }, + { + "epoch": 17.83, + "learning_rate": 0.00023839999999999997, + "loss": 0.7884, + "step": 1195 + }, + { + "epoch": 17.85, + "learning_rate": 0.00023859999999999997, + "loss": 0.9204, + "step": 1196 + }, + { + "epoch": 17.86, + "learning_rate": 0.0002388, + "loss": 0.7918, + "step": 1197 + }, + { + "epoch": 17.88, + "learning_rate": 0.00023899999999999998, + "loss": 0.7283, + "step": 1198 + }, + { + "epoch": 17.89, + "learning_rate": 0.0002392, + "loss": 0.429, + "step": 1199 + }, + { + "epoch": 17.91, + "learning_rate": 0.0002394, + "loss": 0.4164, + "step": 1200 + }, + { + "epoch": 17.91, + "eval_accuracy": 0.7741067058247675, + "eval_f1": 0.7742523404153536, + "eval_loss": 0.8225661516189575, + "eval_runtime": 343.498, + "eval_samples_per_second": 11.895, + "eval_steps_per_second": 0.745, + "step": 1200 + }, + { + "epoch": 17.92, + "learning_rate": 0.00023959999999999997, + "loss": 0.2999, + "step": 1201 + }, + { + "epoch": 17.94, + "learning_rate": 0.00023979999999999997, + "loss": 0.6004, + "step": 1202 + }, + { + "epoch": 17.95, + "learning_rate": 0.00023999999999999998, + "loss": 0.4274, + "step": 1203 + }, + { + "epoch": 17.97, + "learning_rate": 0.00024019999999999996, + "loss": 0.3801, + "step": 1204 + }, + { + "epoch": 17.98, + "learning_rate": 0.0002404, + "loss": 0.6226, + "step": 1205 + }, + { + "epoch": 18.0, + "learning_rate": 0.0002406, + "loss": 0.8777, + "step": 1206 + }, + { + "epoch": 18.01, + "learning_rate": 0.00024079999999999997, + "loss": 1.1747, + "step": 1207 + }, + { + "epoch": 18.03, + "learning_rate": 0.00024099999999999998, + "loss": 0.6821, + "step": 1208 + }, + { + "epoch": 18.04, + "learning_rate": 0.00024119999999999998, + "loss": 0.64, + "step": 1209 + }, + { + "epoch": 18.06, + "learning_rate": 0.00024139999999999996, + "loss": 0.4556, + "step": 1210 + }, + { + "epoch": 18.07, + "learning_rate": 0.0002416, + "loss": 0.5366, + "step": 1211 + }, + { + "epoch": 18.09, + "learning_rate": 0.0002418, + "loss": 0.3206, + "step": 1212 + }, + { + "epoch": 18.1, + "learning_rate": 0.00024199999999999997, + "loss": 0.496, + "step": 1213 + }, + { + "epoch": 18.12, + "learning_rate": 0.00024219999999999998, + "loss": 0.9585, + "step": 1214 + }, + { + "epoch": 18.13, + "learning_rate": 0.00024239999999999998, + "loss": 0.331, + "step": 1215 + }, + { + "epoch": 18.15, + "learning_rate": 0.00024259999999999996, + "loss": 0.4689, + "step": 1216 + }, + { + "epoch": 18.16, + "learning_rate": 0.0002428, + "loss": 0.9751, + "step": 1217 + }, + { + "epoch": 18.18, + "learning_rate": 0.000243, + "loss": 0.6266, + "step": 1218 + }, + { + "epoch": 18.19, + "learning_rate": 0.00024319999999999998, + "loss": 0.7254, + "step": 1219 + }, + { + "epoch": 18.21, + "learning_rate": 0.00024339999999999998, + "loss": 0.86, + "step": 1220 + }, + { + "epoch": 18.22, + "learning_rate": 0.00024359999999999999, + "loss": 0.7694, + "step": 1221 + }, + { + "epoch": 18.24, + "learning_rate": 0.00024379999999999996, + "loss": 0.5908, + "step": 1222 + }, + { + "epoch": 18.25, + "learning_rate": 0.000244, + "loss": 0.7398, + "step": 1223 + }, + { + "epoch": 18.27, + "learning_rate": 0.00024419999999999997, + "loss": 0.5606, + "step": 1224 + }, + { + "epoch": 18.28, + "learning_rate": 0.0002444, + "loss": 0.9814, + "step": 1225 + }, + { + "epoch": 18.3, + "learning_rate": 0.0002446, + "loss": 1.1783, + "step": 1226 + }, + { + "epoch": 18.31, + "learning_rate": 0.0002448, + "loss": 0.5309, + "step": 1227 + }, + { + "epoch": 18.33, + "learning_rate": 0.000245, + "loss": 0.5142, + "step": 1228 + }, + { + "epoch": 18.34, + "learning_rate": 0.0002452, + "loss": 0.5806, + "step": 1229 + }, + { + "epoch": 18.36, + "learning_rate": 0.00024539999999999995, + "loss": 0.6274, + "step": 1230 + }, + { + "epoch": 18.37, + "learning_rate": 0.00024559999999999995, + "loss": 0.7508, + "step": 1231 + }, + { + "epoch": 18.39, + "learning_rate": 0.0002458, + "loss": 0.2308, + "step": 1232 + }, + { + "epoch": 18.4, + "learning_rate": 0.00024599999999999996, + "loss": 0.6213, + "step": 1233 + }, + { + "epoch": 18.42, + "learning_rate": 0.00024619999999999997, + "loss": 0.8141, + "step": 1234 + }, + { + "epoch": 18.43, + "learning_rate": 0.00024639999999999997, + "loss": 0.2922, + "step": 1235 + }, + { + "epoch": 18.45, + "learning_rate": 0.0002466, + "loss": 0.5329, + "step": 1236 + }, + { + "epoch": 18.46, + "learning_rate": 0.0002468, + "loss": 0.4933, + "step": 1237 + }, + { + "epoch": 18.48, + "learning_rate": 0.000247, + "loss": 0.5091, + "step": 1238 + }, + { + "epoch": 18.49, + "learning_rate": 0.0002472, + "loss": 0.6618, + "step": 1239 + }, + { + "epoch": 18.51, + "learning_rate": 0.0002474, + "loss": 1.1074, + "step": 1240 + }, + { + "epoch": 18.52, + "learning_rate": 0.0002476, + "loss": 0.3855, + "step": 1241 + }, + { + "epoch": 18.54, + "learning_rate": 0.00024779999999999995, + "loss": 0.5161, + "step": 1242 + }, + { + "epoch": 18.55, + "learning_rate": 0.00024799999999999996, + "loss": 0.7131, + "step": 1243 + }, + { + "epoch": 18.57, + "learning_rate": 0.00024819999999999996, + "loss": 0.6932, + "step": 1244 + }, + { + "epoch": 18.58, + "learning_rate": 0.00024839999999999997, + "loss": 0.8774, + "step": 1245 + }, + { + "epoch": 18.59, + "learning_rate": 0.00024859999999999997, + "loss": 0.5768, + "step": 1246 + }, + { + "epoch": 18.61, + "learning_rate": 0.0002488, + "loss": 0.981, + "step": 1247 + }, + { + "epoch": 18.62, + "learning_rate": 0.000249, + "loss": 1.1701, + "step": 1248 + }, + { + "epoch": 18.64, + "learning_rate": 0.0002492, + "loss": 0.735, + "step": 1249 + }, + { + "epoch": 18.65, + "learning_rate": 0.0002494, + "loss": 0.7628, + "step": 1250 + }, + { + "epoch": 18.67, + "learning_rate": 0.00024959999999999994, + "loss": 1.064, + "step": 1251 + }, + { + "epoch": 18.68, + "learning_rate": 0.0002498, + "loss": 1.0694, + "step": 1252 + }, + { + "epoch": 18.7, + "learning_rate": 0.00025, + "loss": 0.4311, + "step": 1253 + }, + { + "epoch": 18.71, + "learning_rate": 0.00025019999999999996, + "loss": 0.7704, + "step": 1254 + }, + { + "epoch": 18.73, + "learning_rate": 0.00025039999999999996, + "loss": 0.2663, + "step": 1255 + }, + { + "epoch": 18.74, + "learning_rate": 0.00025059999999999997, + "loss": 0.3193, + "step": 1256 + }, + { + "epoch": 18.76, + "learning_rate": 0.00025079999999999997, + "loss": 0.5628, + "step": 1257 + }, + { + "epoch": 18.77, + "learning_rate": 0.000251, + "loss": 0.779, + "step": 1258 + }, + { + "epoch": 18.79, + "learning_rate": 0.0002512, + "loss": 1.1226, + "step": 1259 + }, + { + "epoch": 18.8, + "learning_rate": 0.0002514, + "loss": 0.8199, + "step": 1260 + }, + { + "epoch": 18.82, + "learning_rate": 0.0002516, + "loss": 0.6151, + "step": 1261 + }, + { + "epoch": 18.83, + "learning_rate": 0.0002518, + "loss": 0.5055, + "step": 1262 + }, + { + "epoch": 18.85, + "learning_rate": 0.00025199999999999995, + "loss": 0.1621, + "step": 1263 + }, + { + "epoch": 18.86, + "learning_rate": 0.0002522, + "loss": 0.5277, + "step": 1264 + }, + { + "epoch": 18.88, + "learning_rate": 0.0002524, + "loss": 0.9161, + "step": 1265 + }, + { + "epoch": 18.89, + "learning_rate": 0.00025259999999999996, + "loss": 0.6097, + "step": 1266 + }, + { + "epoch": 18.91, + "learning_rate": 0.00025279999999999996, + "loss": 0.4727, + "step": 1267 + }, + { + "epoch": 18.92, + "learning_rate": 0.00025299999999999997, + "loss": 0.3202, + "step": 1268 + }, + { + "epoch": 18.94, + "learning_rate": 0.0002532, + "loss": 1.0695, + "step": 1269 + }, + { + "epoch": 18.95, + "learning_rate": 0.0002534, + "loss": 0.6529, + "step": 1270 + }, + { + "epoch": 18.97, + "learning_rate": 0.0002536, + "loss": 0.276, + "step": 1271 + }, + { + "epoch": 18.98, + "learning_rate": 0.0002538, + "loss": 0.5075, + "step": 1272 + }, + { + "epoch": 19.0, + "learning_rate": 0.000254, + "loss": 0.6272, + "step": 1273 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002542, + "loss": 1.1173, + "step": 1274 + }, + { + "epoch": 19.03, + "learning_rate": 0.00025439999999999995, + "loss": 0.3821, + "step": 1275 + }, + { + "epoch": 19.04, + "learning_rate": 0.0002546, + "loss": 0.544, + "step": 1276 + }, + { + "epoch": 19.06, + "learning_rate": 0.0002548, + "loss": 0.3951, + "step": 1277 + }, + { + "epoch": 19.07, + "learning_rate": 0.00025499999999999996, + "loss": 0.7183, + "step": 1278 + }, + { + "epoch": 19.09, + "learning_rate": 0.00025519999999999997, + "loss": 0.5846, + "step": 1279 + }, + { + "epoch": 19.1, + "learning_rate": 0.0002554, + "loss": 0.8813, + "step": 1280 + }, + { + "epoch": 19.12, + "learning_rate": 0.0002556, + "loss": 0.2713, + "step": 1281 + }, + { + "epoch": 19.13, + "learning_rate": 0.0002558, + "loss": 0.666, + "step": 1282 + }, + { + "epoch": 19.15, + "learning_rate": 0.000256, + "loss": 0.7438, + "step": 1283 + }, + { + "epoch": 19.16, + "learning_rate": 0.0002562, + "loss": 0.5591, + "step": 1284 + }, + { + "epoch": 19.18, + "learning_rate": 0.0002564, + "loss": 0.4984, + "step": 1285 + }, + { + "epoch": 19.19, + "learning_rate": 0.00025659999999999995, + "loss": 0.6012, + "step": 1286 + }, + { + "epoch": 19.21, + "learning_rate": 0.00025679999999999995, + "loss": 0.6843, + "step": 1287 + }, + { + "epoch": 19.22, + "learning_rate": 0.00025699999999999996, + "loss": 0.7454, + "step": 1288 + }, + { + "epoch": 19.24, + "learning_rate": 0.00025719999999999996, + "loss": 0.5694, + "step": 1289 + }, + { + "epoch": 19.25, + "learning_rate": 0.00025739999999999997, + "loss": 0.7526, + "step": 1290 + }, + { + "epoch": 19.27, + "learning_rate": 0.0002576, + "loss": 0.7195, + "step": 1291 + }, + { + "epoch": 19.28, + "learning_rate": 0.0002578, + "loss": 0.5612, + "step": 1292 + }, + { + "epoch": 19.3, + "learning_rate": 0.000258, + "loss": 0.9403, + "step": 1293 + }, + { + "epoch": 19.31, + "learning_rate": 0.0002582, + "loss": 0.5989, + "step": 1294 + }, + { + "epoch": 19.33, + "learning_rate": 0.00025839999999999994, + "loss": 0.8607, + "step": 1295 + }, + { + "epoch": 19.34, + "learning_rate": 0.0002586, + "loss": 0.5824, + "step": 1296 + }, + { + "epoch": 19.36, + "learning_rate": 0.0002588, + "loss": 0.6549, + "step": 1297 + }, + { + "epoch": 19.37, + "learning_rate": 0.00025899999999999995, + "loss": 0.6932, + "step": 1298 + }, + { + "epoch": 19.39, + "learning_rate": 0.00025919999999999996, + "loss": 0.6864, + "step": 1299 + }, + { + "epoch": 19.4, + "learning_rate": 0.00025939999999999996, + "loss": 0.5762, + "step": 1300 + }, + { + "epoch": 19.42, + "learning_rate": 0.00025959999999999997, + "loss": 0.8634, + "step": 1301 + }, + { + "epoch": 19.43, + "learning_rate": 0.00025979999999999997, + "loss": 0.6391, + "step": 1302 + }, + { + "epoch": 19.45, + "learning_rate": 0.00026, + "loss": 0.7527, + "step": 1303 + }, + { + "epoch": 19.46, + "learning_rate": 0.0002602, + "loss": 0.4871, + "step": 1304 + }, + { + "epoch": 19.48, + "learning_rate": 0.0002604, + "loss": 0.2296, + "step": 1305 + }, + { + "epoch": 19.49, + "learning_rate": 0.0002606, + "loss": 0.282, + "step": 1306 + }, + { + "epoch": 19.51, + "learning_rate": 0.00026079999999999994, + "loss": 0.8365, + "step": 1307 + }, + { + "epoch": 19.52, + "learning_rate": 0.000261, + "loss": 0.5651, + "step": 1308 + }, + { + "epoch": 19.54, + "learning_rate": 0.0002612, + "loss": 0.8462, + "step": 1309 + }, + { + "epoch": 19.55, + "learning_rate": 0.00026139999999999996, + "loss": 0.7099, + "step": 1310 + }, + { + "epoch": 19.57, + "learning_rate": 0.00026159999999999996, + "loss": 0.6501, + "step": 1311 + }, + { + "epoch": 19.58, + "learning_rate": 0.00026179999999999997, + "loss": 0.6466, + "step": 1312 + }, + { + "epoch": 19.59, + "learning_rate": 0.00026199999999999997, + "loss": 0.5846, + "step": 1313 + }, + { + "epoch": 19.61, + "learning_rate": 0.0002622, + "loss": 0.9175, + "step": 1314 + }, + { + "epoch": 19.62, + "learning_rate": 0.0002624, + "loss": 0.4235, + "step": 1315 + }, + { + "epoch": 19.64, + "learning_rate": 0.0002626, + "loss": 0.4004, + "step": 1316 + }, + { + "epoch": 19.65, + "learning_rate": 0.0002628, + "loss": 0.5586, + "step": 1317 + }, + { + "epoch": 19.67, + "learning_rate": 0.000263, + "loss": 0.5125, + "step": 1318 + }, + { + "epoch": 19.68, + "learning_rate": 0.00026319999999999995, + "loss": 0.4819, + "step": 1319 + }, + { + "epoch": 19.7, + "learning_rate": 0.00026339999999999995, + "loss": 0.3895, + "step": 1320 + }, + { + "epoch": 19.71, + "learning_rate": 0.0002636, + "loss": 0.4449, + "step": 1321 + }, + { + "epoch": 19.73, + "learning_rate": 0.00026379999999999996, + "loss": 0.6049, + "step": 1322 + }, + { + "epoch": 19.74, + "learning_rate": 0.00026399999999999997, + "loss": 1.2677, + "step": 1323 + }, + { + "epoch": 19.76, + "learning_rate": 0.00026419999999999997, + "loss": 1.0832, + "step": 1324 + }, + { + "epoch": 19.77, + "learning_rate": 0.0002644, + "loss": 0.7723, + "step": 1325 + }, + { + "epoch": 19.79, + "learning_rate": 0.0002646, + "loss": 1.1757, + "step": 1326 + }, + { + "epoch": 19.8, + "learning_rate": 0.0002648, + "loss": 0.5295, + "step": 1327 + }, + { + "epoch": 19.82, + "learning_rate": 0.000265, + "loss": 0.2148, + "step": 1328 + }, + { + "epoch": 19.83, + "learning_rate": 0.0002652, + "loss": 0.3802, + "step": 1329 + }, + { + "epoch": 19.85, + "learning_rate": 0.0002654, + "loss": 0.3808, + "step": 1330 + }, + { + "epoch": 19.86, + "learning_rate": 0.00026559999999999995, + "loss": 0.6835, + "step": 1331 + }, + { + "epoch": 19.88, + "learning_rate": 0.00026579999999999996, + "loss": 0.7268, + "step": 1332 + }, + { + "epoch": 19.89, + "learning_rate": 0.000266, + "loss": 0.3596, + "step": 1333 + }, + { + "epoch": 19.91, + "learning_rate": 0.00026619999999999997, + "loss": 0.6912, + "step": 1334 + }, + { + "epoch": 19.92, + "learning_rate": 0.00026639999999999997, + "loss": 1.3149, + "step": 1335 + }, + { + "epoch": 19.94, + "learning_rate": 0.0002666, + "loss": 0.7943, + "step": 1336 + }, + { + "epoch": 19.95, + "learning_rate": 0.0002668, + "loss": 1.0217, + "step": 1337 + }, + { + "epoch": 19.97, + "learning_rate": 0.000267, + "loss": 0.6082, + "step": 1338 + }, + { + "epoch": 19.98, + "learning_rate": 0.0002672, + "loss": 0.436, + "step": 1339 + }, + { + "epoch": 20.0, + "learning_rate": 0.0002674, + "loss": 0.7156, + "step": 1340 + }, + { + "epoch": 20.01, + "learning_rate": 0.0002676, + "loss": 0.6298, + "step": 1341 + }, + { + "epoch": 20.03, + "learning_rate": 0.0002678, + "loss": 0.5792, + "step": 1342 + }, + { + "epoch": 20.04, + "learning_rate": 0.00026799999999999995, + "loss": 0.6893, + "step": 1343 + }, + { + "epoch": 20.06, + "learning_rate": 0.00026819999999999996, + "loss": 0.6991, + "step": 1344 + }, + { + "epoch": 20.07, + "learning_rate": 0.0002684, + "loss": 0.7287, + "step": 1345 + }, + { + "epoch": 20.09, + "learning_rate": 0.00026859999999999997, + "loss": 0.4918, + "step": 1346 + }, + { + "epoch": 20.1, + "learning_rate": 0.0002688, + "loss": 0.912, + "step": 1347 + }, + { + "epoch": 20.12, + "learning_rate": 0.000269, + "loss": 0.5828, + "step": 1348 + }, + { + "epoch": 20.13, + "learning_rate": 0.0002692, + "loss": 0.6996, + "step": 1349 + }, + { + "epoch": 20.15, + "learning_rate": 0.0002694, + "loss": 0.46, + "step": 1350 + }, + { + "epoch": 20.16, + "learning_rate": 0.00026959999999999994, + "loss": 0.3978, + "step": 1351 + }, + { + "epoch": 20.18, + "learning_rate": 0.0002698, + "loss": 0.5182, + "step": 1352 + }, + { + "epoch": 20.19, + "learning_rate": 0.00027, + "loss": 0.893, + "step": 1353 + }, + { + "epoch": 20.21, + "learning_rate": 0.00027019999999999995, + "loss": 0.9816, + "step": 1354 + }, + { + "epoch": 20.22, + "learning_rate": 0.00027039999999999996, + "loss": 0.4094, + "step": 1355 + }, + { + "epoch": 20.24, + "learning_rate": 0.00027059999999999996, + "loss": 0.4442, + "step": 1356 + }, + { + "epoch": 20.25, + "learning_rate": 0.00027079999999999997, + "loss": 0.4466, + "step": 1357 + }, + { + "epoch": 20.27, + "learning_rate": 0.000271, + "loss": 0.415, + "step": 1358 + }, + { + "epoch": 20.28, + "learning_rate": 0.0002712, + "loss": 0.9744, + "step": 1359 + }, + { + "epoch": 20.3, + "learning_rate": 0.0002714, + "loss": 0.9508, + "step": 1360 + }, + { + "epoch": 20.31, + "learning_rate": 0.0002716, + "loss": 0.635, + "step": 1361 + }, + { + "epoch": 20.33, + "learning_rate": 0.0002718, + "loss": 0.7858, + "step": 1362 + }, + { + "epoch": 20.34, + "learning_rate": 0.00027199999999999994, + "loss": 1.0854, + "step": 1363 + }, + { + "epoch": 20.36, + "learning_rate": 0.00027219999999999995, + "loss": 0.9812, + "step": 1364 + }, + { + "epoch": 20.37, + "learning_rate": 0.0002724, + "loss": 0.5966, + "step": 1365 + }, + { + "epoch": 20.39, + "learning_rate": 0.00027259999999999996, + "loss": 0.4025, + "step": 1366 + }, + { + "epoch": 20.4, + "learning_rate": 0.00027279999999999996, + "loss": 0.5009, + "step": 1367 + }, + { + "epoch": 20.42, + "learning_rate": 0.00027299999999999997, + "loss": 0.1482, + "step": 1368 + }, + { + "epoch": 20.43, + "learning_rate": 0.00027319999999999997, + "loss": 0.5557, + "step": 1369 + }, + { + "epoch": 20.45, + "learning_rate": 0.0002734, + "loss": 0.6613, + "step": 1370 + }, + { + "epoch": 20.46, + "learning_rate": 0.0002736, + "loss": 0.8169, + "step": 1371 + }, + { + "epoch": 20.48, + "learning_rate": 0.0002738, + "loss": 0.5959, + "step": 1372 + }, + { + "epoch": 20.49, + "learning_rate": 0.000274, + "loss": 0.4948, + "step": 1373 + }, + { + "epoch": 20.51, + "learning_rate": 0.0002742, + "loss": 0.7418, + "step": 1374 + }, + { + "epoch": 20.52, + "learning_rate": 0.00027439999999999995, + "loss": 0.5943, + "step": 1375 + }, + { + "epoch": 20.54, + "learning_rate": 0.00027459999999999995, + "loss": 0.716, + "step": 1376 + }, + { + "epoch": 20.55, + "learning_rate": 0.0002748, + "loss": 0.8324, + "step": 1377 + }, + { + "epoch": 20.57, + "learning_rate": 0.00027499999999999996, + "loss": 0.2354, + "step": 1378 + }, + { + "epoch": 20.58, + "learning_rate": 0.00027519999999999997, + "loss": 0.5413, + "step": 1379 + }, + { + "epoch": 20.59, + "learning_rate": 0.00027539999999999997, + "loss": 0.6385, + "step": 1380 + }, + { + "epoch": 20.61, + "learning_rate": 0.0002756, + "loss": 0.5612, + "step": 1381 + }, + { + "epoch": 20.62, + "learning_rate": 0.0002758, + "loss": 0.9119, + "step": 1382 + }, + { + "epoch": 20.64, + "learning_rate": 0.000276, + "loss": 0.5478, + "step": 1383 + }, + { + "epoch": 20.65, + "learning_rate": 0.0002762, + "loss": 0.5139, + "step": 1384 + }, + { + "epoch": 20.67, + "learning_rate": 0.0002764, + "loss": 0.7378, + "step": 1385 + }, + { + "epoch": 20.68, + "learning_rate": 0.0002766, + "loss": 0.6912, + "step": 1386 + }, + { + "epoch": 20.7, + "learning_rate": 0.00027679999999999995, + "loss": 0.391, + "step": 1387 + }, + { + "epoch": 20.71, + "learning_rate": 0.00027699999999999996, + "loss": 0.3529, + "step": 1388 + }, + { + "epoch": 20.73, + "learning_rate": 0.0002772, + "loss": 0.6294, + "step": 1389 + }, + { + "epoch": 20.74, + "learning_rate": 0.00027739999999999997, + "loss": 0.5305, + "step": 1390 + }, + { + "epoch": 20.76, + "learning_rate": 0.00027759999999999997, + "loss": 0.6153, + "step": 1391 + }, + { + "epoch": 20.77, + "learning_rate": 0.0002778, + "loss": 0.7893, + "step": 1392 + }, + { + "epoch": 20.79, + "learning_rate": 0.000278, + "loss": 0.7969, + "step": 1393 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002782, + "loss": 0.5978, + "step": 1394 + }, + { + "epoch": 20.82, + "learning_rate": 0.0002784, + "loss": 0.7604, + "step": 1395 + }, + { + "epoch": 20.83, + "learning_rate": 0.00027859999999999994, + "loss": 0.8762, + "step": 1396 + }, + { + "epoch": 20.85, + "learning_rate": 0.0002788, + "loss": 0.727, + "step": 1397 + }, + { + "epoch": 20.86, + "learning_rate": 0.000279, + "loss": 0.8943, + "step": 1398 + }, + { + "epoch": 20.88, + "learning_rate": 0.00027919999999999996, + "loss": 0.6312, + "step": 1399 + }, + { + "epoch": 20.89, + "learning_rate": 0.00027939999999999996, + "loss": 0.7669, + "step": 1400 + }, + { + "epoch": 20.89, + "eval_accuracy": 0.7787567302985805, + "eval_f1": 0.778299974393245, + "eval_loss": 0.813083827495575, + "eval_runtime": 344.4352, + "eval_samples_per_second": 11.863, + "eval_steps_per_second": 0.743, + "step": 1400 + }, + { + "epoch": 20.91, + "learning_rate": 0.00027959999999999997, + "loss": 0.455, + "step": 1401 + }, + { + "epoch": 20.92, + "learning_rate": 0.00027979999999999997, + "loss": 0.6066, + "step": 1402 + }, + { + "epoch": 20.94, + "learning_rate": 0.00028, + "loss": 0.3945, + "step": 1403 + }, + { + "epoch": 20.95, + "learning_rate": 0.0002802, + "loss": 0.7466, + "step": 1404 + }, + { + "epoch": 20.97, + "learning_rate": 0.0002804, + "loss": 0.5893, + "step": 1405 + }, + { + "epoch": 20.98, + "learning_rate": 0.0002806, + "loss": 0.6915, + "step": 1406 + }, + { + "epoch": 21.0, + "learning_rate": 0.0002808, + "loss": 1.0937, + "step": 1407 + }, + { + "epoch": 21.01, + "learning_rate": 0.00028099999999999995, + "loss": 0.8182, + "step": 1408 + }, + { + "epoch": 21.03, + "learning_rate": 0.0002812, + "loss": 0.9176, + "step": 1409 + }, + { + "epoch": 21.04, + "learning_rate": 0.00028139999999999996, + "loss": 0.8487, + "step": 1410 + }, + { + "epoch": 21.06, + "learning_rate": 0.00028159999999999996, + "loss": 0.4835, + "step": 1411 + }, + { + "epoch": 21.07, + "learning_rate": 0.00028179999999999997, + "loss": 0.9755, + "step": 1412 + }, + { + "epoch": 21.09, + "learning_rate": 0.00028199999999999997, + "loss": 1.315, + "step": 1413 + }, + { + "epoch": 21.1, + "learning_rate": 0.0002822, + "loss": 0.8134, + "step": 1414 + }, + { + "epoch": 21.12, + "learning_rate": 0.0002824, + "loss": 0.7944, + "step": 1415 + }, + { + "epoch": 21.13, + "learning_rate": 0.0002826, + "loss": 0.4791, + "step": 1416 + }, + { + "epoch": 21.15, + "learning_rate": 0.0002828, + "loss": 0.6249, + "step": 1417 + }, + { + "epoch": 21.16, + "learning_rate": 0.000283, + "loss": 0.8417, + "step": 1418 + }, + { + "epoch": 21.18, + "learning_rate": 0.00028319999999999994, + "loss": 0.774, + "step": 1419 + }, + { + "epoch": 21.19, + "learning_rate": 0.00028339999999999995, + "loss": 0.727, + "step": 1420 + }, + { + "epoch": 21.21, + "learning_rate": 0.0002836, + "loss": 0.3706, + "step": 1421 + }, + { + "epoch": 21.22, + "learning_rate": 0.00028379999999999996, + "loss": 0.3887, + "step": 1422 + }, + { + "epoch": 21.24, + "learning_rate": 0.00028399999999999996, + "loss": 0.6382, + "step": 1423 + }, + { + "epoch": 21.25, + "learning_rate": 0.00028419999999999997, + "loss": 0.718, + "step": 1424 + }, + { + "epoch": 21.27, + "learning_rate": 0.0002844, + "loss": 0.8399, + "step": 1425 + }, + { + "epoch": 21.28, + "learning_rate": 0.0002846, + "loss": 0.9529, + "step": 1426 + }, + { + "epoch": 21.3, + "learning_rate": 0.0002848, + "loss": 0.9305, + "step": 1427 + }, + { + "epoch": 21.31, + "learning_rate": 0.000285, + "loss": 0.7231, + "step": 1428 + }, + { + "epoch": 21.33, + "learning_rate": 0.0002852, + "loss": 0.5729, + "step": 1429 + }, + { + "epoch": 21.34, + "learning_rate": 0.0002854, + "loss": 0.5407, + "step": 1430 + }, + { + "epoch": 21.36, + "learning_rate": 0.00028559999999999995, + "loss": 0.7901, + "step": 1431 + }, + { + "epoch": 21.37, + "learning_rate": 0.00028579999999999995, + "loss": 0.575, + "step": 1432 + }, + { + "epoch": 21.39, + "learning_rate": 0.00028599999999999996, + "loss": 0.219, + "step": 1433 + }, + { + "epoch": 21.4, + "learning_rate": 0.00028619999999999996, + "loss": 0.7134, + "step": 1434 + }, + { + "epoch": 21.42, + "learning_rate": 0.00028639999999999997, + "loss": 0.2605, + "step": 1435 + }, + { + "epoch": 21.43, + "learning_rate": 0.0002866, + "loss": 0.5372, + "step": 1436 + }, + { + "epoch": 21.45, + "learning_rate": 0.0002868, + "loss": 0.4434, + "step": 1437 + }, + { + "epoch": 21.46, + "learning_rate": 0.000287, + "loss": 0.774, + "step": 1438 + }, + { + "epoch": 21.48, + "learning_rate": 0.0002872, + "loss": 0.2877, + "step": 1439 + }, + { + "epoch": 21.49, + "learning_rate": 0.00028739999999999994, + "loss": 0.2525, + "step": 1440 + }, + { + "epoch": 21.51, + "learning_rate": 0.0002876, + "loss": 0.5474, + "step": 1441 + }, + { + "epoch": 21.52, + "learning_rate": 0.0002878, + "loss": 0.4075, + "step": 1442 + }, + { + "epoch": 21.54, + "learning_rate": 0.00028799999999999995, + "loss": 0.72, + "step": 1443 + }, + { + "epoch": 21.55, + "learning_rate": 0.00028819999999999996, + "loss": 0.3995, + "step": 1444 + }, + { + "epoch": 21.57, + "learning_rate": 0.00028839999999999996, + "loss": 0.5891, + "step": 1445 + }, + { + "epoch": 21.58, + "learning_rate": 0.00028859999999999997, + "loss": 0.8102, + "step": 1446 + }, + { + "epoch": 21.59, + "learning_rate": 0.00028879999999999997, + "loss": 0.6418, + "step": 1447 + }, + { + "epoch": 21.61, + "learning_rate": 0.000289, + "loss": 0.3104, + "step": 1448 + }, + { + "epoch": 21.62, + "learning_rate": 0.0002892, + "loss": 0.8299, + "step": 1449 + }, + { + "epoch": 21.64, + "learning_rate": 0.0002894, + "loss": 0.2404, + "step": 1450 + }, + { + "epoch": 21.65, + "learning_rate": 0.0002896, + "loss": 0.3771, + "step": 1451 + }, + { + "epoch": 21.67, + "learning_rate": 0.00028979999999999994, + "loss": 0.7731, + "step": 1452 + }, + { + "epoch": 21.68, + "learning_rate": 0.00029, + "loss": 0.2826, + "step": 1453 + }, + { + "epoch": 21.7, + "learning_rate": 0.0002902, + "loss": 0.5406, + "step": 1454 + }, + { + "epoch": 21.71, + "learning_rate": 0.00029039999999999996, + "loss": 0.5265, + "step": 1455 + }, + { + "epoch": 21.73, + "learning_rate": 0.00029059999999999996, + "loss": 0.2196, + "step": 1456 + }, + { + "epoch": 21.74, + "learning_rate": 0.00029079999999999997, + "loss": 0.5047, + "step": 1457 + }, + { + "epoch": 21.76, + "learning_rate": 0.00029099999999999997, + "loss": 0.8224, + "step": 1458 + }, + { + "epoch": 21.77, + "learning_rate": 0.0002912, + "loss": 0.7324, + "step": 1459 + }, + { + "epoch": 21.79, + "learning_rate": 0.0002914, + "loss": 0.9723, + "step": 1460 + }, + { + "epoch": 21.8, + "learning_rate": 0.0002916, + "loss": 0.7277, + "step": 1461 + }, + { + "epoch": 21.82, + "learning_rate": 0.0002918, + "loss": 0.6583, + "step": 1462 + }, + { + "epoch": 21.83, + "learning_rate": 0.000292, + "loss": 0.6693, + "step": 1463 + }, + { + "epoch": 21.85, + "learning_rate": 0.00029219999999999995, + "loss": 0.5129, + "step": 1464 + }, + { + "epoch": 21.86, + "learning_rate": 0.0002924, + "loss": 0.5233, + "step": 1465 + }, + { + "epoch": 21.88, + "learning_rate": 0.0002926, + "loss": 0.2062, + "step": 1466 + }, + { + "epoch": 21.89, + "learning_rate": 0.00029279999999999996, + "loss": 0.9001, + "step": 1467 + }, + { + "epoch": 21.91, + "learning_rate": 0.00029299999999999997, + "loss": 0.3718, + "step": 1468 + }, + { + "epoch": 21.92, + "learning_rate": 0.00029319999999999997, + "loss": 0.5696, + "step": 1469 + }, + { + "epoch": 21.94, + "learning_rate": 0.0002934, + "loss": 0.7076, + "step": 1470 + }, + { + "epoch": 21.95, + "learning_rate": 0.0002936, + "loss": 0.7528, + "step": 1471 + }, + { + "epoch": 21.97, + "learning_rate": 0.00029379999999999993, + "loss": 0.4238, + "step": 1472 + }, + { + "epoch": 21.98, + "learning_rate": 0.000294, + "loss": 0.5624, + "step": 1473 + }, + { + "epoch": 22.0, + "learning_rate": 0.0002942, + "loss": 0.8636, + "step": 1474 + }, + { + "epoch": 22.01, + "learning_rate": 0.00029439999999999995, + "loss": 0.3772, + "step": 1475 + }, + { + "epoch": 22.03, + "learning_rate": 0.00029459999999999995, + "loss": 1.2659, + "step": 1476 + }, + { + "epoch": 22.04, + "learning_rate": 0.00029479999999999996, + "loss": 0.4182, + "step": 1477 + }, + { + "epoch": 22.06, + "learning_rate": 0.00029499999999999996, + "loss": 0.3934, + "step": 1478 + }, + { + "epoch": 22.07, + "learning_rate": 0.00029519999999999997, + "loss": 0.4777, + "step": 1479 + }, + { + "epoch": 22.09, + "learning_rate": 0.00029539999999999997, + "loss": 0.8398, + "step": 1480 + }, + { + "epoch": 22.1, + "learning_rate": 0.0002956, + "loss": 0.7846, + "step": 1481 + }, + { + "epoch": 22.12, + "learning_rate": 0.0002958, + "loss": 0.8802, + "step": 1482 + }, + { + "epoch": 22.13, + "learning_rate": 0.000296, + "loss": 0.4906, + "step": 1483 + }, + { + "epoch": 22.15, + "learning_rate": 0.00029619999999999994, + "loss": 0.5514, + "step": 1484 + }, + { + "epoch": 22.16, + "learning_rate": 0.0002964, + "loss": 0.3834, + "step": 1485 + }, + { + "epoch": 22.18, + "learning_rate": 0.0002966, + "loss": 0.3833, + "step": 1486 + }, + { + "epoch": 22.19, + "learning_rate": 0.00029679999999999995, + "loss": 0.2823, + "step": 1487 + }, + { + "epoch": 22.21, + "learning_rate": 0.00029699999999999996, + "loss": 0.7299, + "step": 1488 + }, + { + "epoch": 22.22, + "learning_rate": 0.00029719999999999996, + "loss": 0.2089, + "step": 1489 + }, + { + "epoch": 22.24, + "learning_rate": 0.00029739999999999996, + "loss": 0.103, + "step": 1490 + }, + { + "epoch": 22.25, + "learning_rate": 0.00029759999999999997, + "loss": 0.4865, + "step": 1491 + }, + { + "epoch": 22.27, + "learning_rate": 0.0002978, + "loss": 0.9131, + "step": 1492 + }, + { + "epoch": 22.28, + "learning_rate": 0.000298, + "loss": 0.4465, + "step": 1493 + }, + { + "epoch": 22.3, + "learning_rate": 0.0002982, + "loss": 0.3508, + "step": 1494 + }, + { + "epoch": 22.31, + "learning_rate": 0.0002984, + "loss": 0.4053, + "step": 1495 + }, + { + "epoch": 22.33, + "learning_rate": 0.00029859999999999994, + "loss": 1.0208, + "step": 1496 + }, + { + "epoch": 22.34, + "learning_rate": 0.0002988, + "loss": 0.3976, + "step": 1497 + }, + { + "epoch": 22.36, + "learning_rate": 0.000299, + "loss": 0.6894, + "step": 1498 + }, + { + "epoch": 22.37, + "learning_rate": 0.00029919999999999995, + "loss": 0.1601, + "step": 1499 + }, + { + "epoch": 22.39, + "learning_rate": 0.00029939999999999996, + "loss": 0.4907, + "step": 1500 + }, + { + "epoch": 22.4, + "learning_rate": 0.00029959999999999996, + "loss": 0.3205, + "step": 1501 + }, + { + "epoch": 22.42, + "learning_rate": 0.00029979999999999997, + "loss": 1.2744, + "step": 1502 + }, + { + "epoch": 22.43, + "learning_rate": 0.0003, + "loss": 0.4245, + "step": 1503 + }, + { + "epoch": 22.45, + "learning_rate": 0.0002999649122807017, + "loss": 0.7102, + "step": 1504 + }, + { + "epoch": 22.46, + "learning_rate": 0.00029992982456140347, + "loss": 1.26, + "step": 1505 + }, + { + "epoch": 22.48, + "learning_rate": 0.0002998947368421052, + "loss": 0.8482, + "step": 1506 + }, + { + "epoch": 22.49, + "learning_rate": 0.00029985964912280697, + "loss": 0.6502, + "step": 1507 + }, + { + "epoch": 22.51, + "learning_rate": 0.0002998245614035087, + "loss": 1.0905, + "step": 1508 + }, + { + "epoch": 22.52, + "learning_rate": 0.0002997894736842105, + "loss": 0.9749, + "step": 1509 + }, + { + "epoch": 22.54, + "learning_rate": 0.00029975438596491227, + "loss": 0.8889, + "step": 1510 + }, + { + "epoch": 22.55, + "learning_rate": 0.000299719298245614, + "loss": 0.9885, + "step": 1511 + }, + { + "epoch": 22.57, + "learning_rate": 0.00029968421052631577, + "loss": 0.9825, + "step": 1512 + }, + { + "epoch": 22.58, + "learning_rate": 0.0002996491228070175, + "loss": 0.5989, + "step": 1513 + }, + { + "epoch": 22.59, + "learning_rate": 0.00029961403508771926, + "loss": 0.6243, + "step": 1514 + }, + { + "epoch": 22.61, + "learning_rate": 0.000299578947368421, + "loss": 0.9003, + "step": 1515 + }, + { + "epoch": 22.62, + "learning_rate": 0.0002995438596491228, + "loss": 0.3962, + "step": 1516 + }, + { + "epoch": 22.64, + "learning_rate": 0.00029950877192982457, + "loss": 0.6765, + "step": 1517 + }, + { + "epoch": 22.65, + "learning_rate": 0.0002994736842105263, + "loss": 0.242, + "step": 1518 + }, + { + "epoch": 22.67, + "learning_rate": 0.000299438596491228, + "loss": 0.456, + "step": 1519 + }, + { + "epoch": 22.68, + "learning_rate": 0.0002994035087719298, + "loss": 0.4331, + "step": 1520 + }, + { + "epoch": 22.7, + "learning_rate": 0.00029936842105263156, + "loss": 0.1624, + "step": 1521 + }, + { + "epoch": 22.71, + "learning_rate": 0.0002993333333333333, + "loss": 0.7405, + "step": 1522 + }, + { + "epoch": 22.73, + "learning_rate": 0.00029929824561403506, + "loss": 0.2843, + "step": 1523 + }, + { + "epoch": 22.74, + "learning_rate": 0.0002992631578947368, + "loss": 0.7121, + "step": 1524 + }, + { + "epoch": 22.76, + "learning_rate": 0.00029922807017543856, + "loss": 1.2846, + "step": 1525 + }, + { + "epoch": 22.77, + "learning_rate": 0.0002991929824561403, + "loss": 0.6734, + "step": 1526 + }, + { + "epoch": 22.79, + "learning_rate": 0.00029915789473684205, + "loss": 1.0198, + "step": 1527 + }, + { + "epoch": 22.8, + "learning_rate": 0.00029912280701754386, + "loss": 0.5129, + "step": 1528 + }, + { + "epoch": 22.82, + "learning_rate": 0.0002990877192982456, + "loss": 0.4385, + "step": 1529 + }, + { + "epoch": 22.83, + "learning_rate": 0.00029905263157894735, + "loss": 0.8057, + "step": 1530 + }, + { + "epoch": 22.85, + "learning_rate": 0.0002990175438596491, + "loss": 1.1243, + "step": 1531 + }, + { + "epoch": 22.86, + "learning_rate": 0.00029898245614035085, + "loss": 1.09, + "step": 1532 + }, + { + "epoch": 22.88, + "learning_rate": 0.0002989473684210526, + "loss": 0.4577, + "step": 1533 + }, + { + "epoch": 22.89, + "learning_rate": 0.00029891228070175435, + "loss": 0.5147, + "step": 1534 + }, + { + "epoch": 22.91, + "learning_rate": 0.00029887719298245615, + "loss": 0.5657, + "step": 1535 + }, + { + "epoch": 22.92, + "learning_rate": 0.00029884210526315785, + "loss": 0.7621, + "step": 1536 + }, + { + "epoch": 22.94, + "learning_rate": 0.0002988070175438596, + "loss": 0.4339, + "step": 1537 + }, + { + "epoch": 22.95, + "learning_rate": 0.00029877192982456134, + "loss": 0.6648, + "step": 1538 + }, + { + "epoch": 22.97, + "learning_rate": 0.00029873684210526315, + "loss": 0.4844, + "step": 1539 + }, + { + "epoch": 22.98, + "learning_rate": 0.0002987017543859649, + "loss": 0.4958, + "step": 1540 + }, + { + "epoch": 23.0, + "learning_rate": 0.00029866666666666664, + "loss": 0.6266, + "step": 1541 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002986315789473684, + "loss": 1.2849, + "step": 1542 + }, + { + "epoch": 23.03, + "learning_rate": 0.00029859649122807014, + "loss": 0.8311, + "step": 1543 + }, + { + "epoch": 23.04, + "learning_rate": 0.0002985614035087719, + "loss": 0.7814, + "step": 1544 + }, + { + "epoch": 23.06, + "learning_rate": 0.00029852631578947364, + "loss": 0.7051, + "step": 1545 + }, + { + "epoch": 23.07, + "learning_rate": 0.00029849122807017544, + "loss": 0.752, + "step": 1546 + }, + { + "epoch": 23.09, + "learning_rate": 0.0002984561403508772, + "loss": 0.4662, + "step": 1547 + }, + { + "epoch": 23.1, + "learning_rate": 0.00029842105263157894, + "loss": 0.4054, + "step": 1548 + }, + { + "epoch": 23.12, + "learning_rate": 0.0002983859649122807, + "loss": 0.9495, + "step": 1549 + }, + { + "epoch": 23.13, + "learning_rate": 0.00029835087719298244, + "loss": 0.4645, + "step": 1550 + }, + { + "epoch": 23.15, + "learning_rate": 0.0002983157894736842, + "loss": 0.9708, + "step": 1551 + }, + { + "epoch": 23.16, + "learning_rate": 0.00029828070175438593, + "loss": 0.5805, + "step": 1552 + }, + { + "epoch": 23.18, + "learning_rate": 0.0002982456140350877, + "loss": 0.6846, + "step": 1553 + }, + { + "epoch": 23.19, + "learning_rate": 0.00029821052631578943, + "loss": 0.7177, + "step": 1554 + }, + { + "epoch": 23.21, + "learning_rate": 0.0002981754385964912, + "loss": 0.764, + "step": 1555 + }, + { + "epoch": 23.22, + "learning_rate": 0.00029814035087719293, + "loss": 0.512, + "step": 1556 + }, + { + "epoch": 23.24, + "learning_rate": 0.00029810526315789473, + "loss": 0.1168, + "step": 1557 + }, + { + "epoch": 23.25, + "learning_rate": 0.0002980701754385965, + "loss": 0.3928, + "step": 1558 + }, + { + "epoch": 23.27, + "learning_rate": 0.00029803508771929823, + "loss": 0.624, + "step": 1559 + }, + { + "epoch": 23.28, + "learning_rate": 0.000298, + "loss": 0.7086, + "step": 1560 + }, + { + "epoch": 23.3, + "learning_rate": 0.00029796491228070173, + "loss": 0.6273, + "step": 1561 + }, + { + "epoch": 23.31, + "learning_rate": 0.0002979298245614035, + "loss": 0.6187, + "step": 1562 + }, + { + "epoch": 23.33, + "learning_rate": 0.0002978947368421052, + "loss": 0.6805, + "step": 1563 + }, + { + "epoch": 23.34, + "learning_rate": 0.000297859649122807, + "loss": 0.5361, + "step": 1564 + }, + { + "epoch": 23.36, + "learning_rate": 0.0002978245614035088, + "loss": 0.6424, + "step": 1565 + }, + { + "epoch": 23.37, + "learning_rate": 0.0002977894736842105, + "loss": 0.1724, + "step": 1566 + }, + { + "epoch": 23.39, + "learning_rate": 0.0002977543859649122, + "loss": 0.936, + "step": 1567 + }, + { + "epoch": 23.4, + "learning_rate": 0.000297719298245614, + "loss": 0.5024, + "step": 1568 + }, + { + "epoch": 23.42, + "learning_rate": 0.00029768421052631577, + "loss": 0.4122, + "step": 1569 + }, + { + "epoch": 23.43, + "learning_rate": 0.0002976491228070175, + "loss": 0.7544, + "step": 1570 + }, + { + "epoch": 23.45, + "learning_rate": 0.00029761403508771927, + "loss": 0.3982, + "step": 1571 + }, + { + "epoch": 23.46, + "learning_rate": 0.000297578947368421, + "loss": 0.2862, + "step": 1572 + }, + { + "epoch": 23.48, + "learning_rate": 0.00029754385964912277, + "loss": 0.4515, + "step": 1573 + }, + { + "epoch": 23.49, + "learning_rate": 0.0002975087719298245, + "loss": 0.1853, + "step": 1574 + }, + { + "epoch": 23.51, + "learning_rate": 0.00029747368421052627, + "loss": 0.7664, + "step": 1575 + }, + { + "epoch": 23.52, + "learning_rate": 0.00029743859649122807, + "loss": 0.6729, + "step": 1576 + }, + { + "epoch": 23.54, + "learning_rate": 0.0002974035087719298, + "loss": 0.3235, + "step": 1577 + }, + { + "epoch": 23.55, + "learning_rate": 0.00029736842105263157, + "loss": 0.5128, + "step": 1578 + }, + { + "epoch": 23.57, + "learning_rate": 0.0002973333333333333, + "loss": 0.8515, + "step": 1579 + }, + { + "epoch": 23.58, + "learning_rate": 0.00029729824561403506, + "loss": 0.8926, + "step": 1580 + }, + { + "epoch": 23.59, + "learning_rate": 0.0002972631578947368, + "loss": 1.0383, + "step": 1581 + }, + { + "epoch": 23.61, + "learning_rate": 0.00029722807017543856, + "loss": 1.0073, + "step": 1582 + }, + { + "epoch": 23.62, + "learning_rate": 0.00029719298245614036, + "loss": 1.0494, + "step": 1583 + }, + { + "epoch": 23.64, + "learning_rate": 0.00029715789473684206, + "loss": 0.7003, + "step": 1584 + }, + { + "epoch": 23.65, + "learning_rate": 0.0002971228070175438, + "loss": 0.5168, + "step": 1585 + }, + { + "epoch": 23.67, + "learning_rate": 0.00029708771929824556, + "loss": 0.6879, + "step": 1586 + }, + { + "epoch": 23.68, + "learning_rate": 0.00029705263157894736, + "loss": 0.2879, + "step": 1587 + }, + { + "epoch": 23.7, + "learning_rate": 0.0002970175438596491, + "loss": 0.8452, + "step": 1588 + }, + { + "epoch": 23.71, + "learning_rate": 0.00029698245614035086, + "loss": 0.2214, + "step": 1589 + }, + { + "epoch": 23.73, + "learning_rate": 0.0002969473684210526, + "loss": 0.5489, + "step": 1590 + }, + { + "epoch": 23.74, + "learning_rate": 0.00029691228070175435, + "loss": 0.4714, + "step": 1591 + }, + { + "epoch": 23.76, + "learning_rate": 0.0002968771929824561, + "loss": 1.0541, + "step": 1592 + }, + { + "epoch": 23.77, + "learning_rate": 0.00029684210526315785, + "loss": 0.8526, + "step": 1593 + }, + { + "epoch": 23.79, + "learning_rate": 0.00029680701754385965, + "loss": 0.5043, + "step": 1594 + }, + { + "epoch": 23.8, + "learning_rate": 0.0002967719298245614, + "loss": 0.4541, + "step": 1595 + }, + { + "epoch": 23.82, + "learning_rate": 0.00029673684210526315, + "loss": 0.4321, + "step": 1596 + }, + { + "epoch": 23.83, + "learning_rate": 0.00029670175438596485, + "loss": 0.7054, + "step": 1597 + }, + { + "epoch": 23.85, + "learning_rate": 0.00029666666666666665, + "loss": 0.8708, + "step": 1598 + }, + { + "epoch": 23.86, + "learning_rate": 0.0002966315789473684, + "loss": 0.343, + "step": 1599 + }, + { + "epoch": 23.88, + "learning_rate": 0.00029659649122807015, + "loss": 0.4606, + "step": 1600 + }, + { + "epoch": 23.88, + "eval_accuracy": 0.7792462065589819, + "eval_f1": 0.7878802780794968, + "eval_loss": 0.8314271569252014, + "eval_runtime": 344.0688, + "eval_samples_per_second": 11.876, + "eval_steps_per_second": 0.744, + "step": 1600 + }, + { + "epoch": 23.89, + "learning_rate": 0.0002965614035087719, + "loss": 0.6198, + "step": 1601 + }, + { + "epoch": 23.91, + "learning_rate": 0.00029652631578947364, + "loss": 0.6289, + "step": 1602 + }, + { + "epoch": 23.92, + "learning_rate": 0.0002964912280701754, + "loss": 0.4563, + "step": 1603 + }, + { + "epoch": 23.94, + "learning_rate": 0.00029645614035087714, + "loss": 0.9609, + "step": 1604 + }, + { + "epoch": 23.95, + "learning_rate": 0.00029642105263157895, + "loss": 0.4432, + "step": 1605 + }, + { + "epoch": 23.97, + "learning_rate": 0.0002963859649122807, + "loss": 0.3827, + "step": 1606 + }, + { + "epoch": 23.98, + "learning_rate": 0.00029635087719298244, + "loss": 0.9992, + "step": 1607 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002963157894736842, + "loss": 0.4266, + "step": 1608 + }, + { + "epoch": 24.01, + "learning_rate": 0.00029628070175438594, + "loss": 0.5823, + "step": 1609 + }, + { + "epoch": 24.03, + "learning_rate": 0.0002962456140350877, + "loss": 0.5493, + "step": 1610 + }, + { + "epoch": 24.04, + "learning_rate": 0.00029621052631578944, + "loss": 0.3707, + "step": 1611 + }, + { + "epoch": 24.06, + "learning_rate": 0.0002961754385964912, + "loss": 0.4518, + "step": 1612 + }, + { + "epoch": 24.07, + "learning_rate": 0.000296140350877193, + "loss": 0.7895, + "step": 1613 + }, + { + "epoch": 24.09, + "learning_rate": 0.00029610526315789474, + "loss": 0.5102, + "step": 1614 + }, + { + "epoch": 24.1, + "learning_rate": 0.00029607017543859643, + "loss": 0.6533, + "step": 1615 + }, + { + "epoch": 24.12, + "learning_rate": 0.00029603508771929824, + "loss": 0.6582, + "step": 1616 + }, + { + "epoch": 24.13, + "learning_rate": 0.000296, + "loss": 0.6249, + "step": 1617 + }, + { + "epoch": 24.15, + "learning_rate": 0.00029596491228070173, + "loss": 0.4666, + "step": 1618 + }, + { + "epoch": 24.16, + "learning_rate": 0.0002959298245614035, + "loss": 0.0862, + "step": 1619 + }, + { + "epoch": 24.18, + "learning_rate": 0.00029589473684210523, + "loss": 0.2562, + "step": 1620 + }, + { + "epoch": 24.19, + "learning_rate": 0.000295859649122807, + "loss": 0.5261, + "step": 1621 + }, + { + "epoch": 24.21, + "learning_rate": 0.00029582456140350873, + "loss": 0.5443, + "step": 1622 + }, + { + "epoch": 24.22, + "learning_rate": 0.0002957894736842105, + "loss": 0.2514, + "step": 1623 + }, + { + "epoch": 24.24, + "learning_rate": 0.0002957543859649123, + "loss": 0.501, + "step": 1624 + }, + { + "epoch": 24.25, + "learning_rate": 0.00029571929824561403, + "loss": 0.6602, + "step": 1625 + }, + { + "epoch": 24.27, + "learning_rate": 0.0002956842105263158, + "loss": 1.0655, + "step": 1626 + }, + { + "epoch": 24.28, + "learning_rate": 0.0002956491228070175, + "loss": 1.0767, + "step": 1627 + }, + { + "epoch": 24.3, + "learning_rate": 0.0002956140350877193, + "loss": 1.6448, + "step": 1628 + }, + { + "epoch": 24.31, + "learning_rate": 0.000295578947368421, + "loss": 0.8344, + "step": 1629 + }, + { + "epoch": 24.33, + "learning_rate": 0.0002955438596491228, + "loss": 0.4289, + "step": 1630 + }, + { + "epoch": 24.34, + "learning_rate": 0.0002955087719298246, + "loss": 0.4206, + "step": 1631 + }, + { + "epoch": 24.36, + "learning_rate": 0.00029547368421052627, + "loss": 0.4214, + "step": 1632 + }, + { + "epoch": 24.37, + "learning_rate": 0.000295438596491228, + "loss": 0.8347, + "step": 1633 + }, + { + "epoch": 24.39, + "learning_rate": 0.00029540350877192977, + "loss": 0.4034, + "step": 1634 + }, + { + "epoch": 24.4, + "learning_rate": 0.00029536842105263157, + "loss": 0.4719, + "step": 1635 + }, + { + "epoch": 24.42, + "learning_rate": 0.0002953333333333333, + "loss": 0.3112, + "step": 1636 + }, + { + "epoch": 24.43, + "learning_rate": 0.00029529824561403507, + "loss": 0.7053, + "step": 1637 + }, + { + "epoch": 24.45, + "learning_rate": 0.0002952631578947368, + "loss": 0.2759, + "step": 1638 + }, + { + "epoch": 24.46, + "learning_rate": 0.00029522807017543857, + "loss": 0.3891, + "step": 1639 + }, + { + "epoch": 24.48, + "learning_rate": 0.0002951929824561403, + "loss": 0.2628, + "step": 1640 + }, + { + "epoch": 24.49, + "learning_rate": 0.00029515789473684206, + "loss": 0.4246, + "step": 1641 + }, + { + "epoch": 24.51, + "learning_rate": 0.00029512280701754387, + "loss": 0.8412, + "step": 1642 + }, + { + "epoch": 24.52, + "learning_rate": 0.0002950877192982456, + "loss": 0.5919, + "step": 1643 + }, + { + "epoch": 24.54, + "learning_rate": 0.00029505263157894736, + "loss": 0.5095, + "step": 1644 + }, + { + "epoch": 24.55, + "learning_rate": 0.00029501754385964906, + "loss": 0.7387, + "step": 1645 + }, + { + "epoch": 24.57, + "learning_rate": 0.00029498245614035086, + "loss": 0.3755, + "step": 1646 + }, + { + "epoch": 24.58, + "learning_rate": 0.0002949473684210526, + "loss": 0.3604, + "step": 1647 + }, + { + "epoch": 24.59, + "learning_rate": 0.00029491228070175436, + "loss": 0.3853, + "step": 1648 + }, + { + "epoch": 24.61, + "learning_rate": 0.0002948771929824561, + "loss": 0.4088, + "step": 1649 + }, + { + "epoch": 24.62, + "learning_rate": 0.00029484210526315786, + "loss": 0.652, + "step": 1650 + }, + { + "epoch": 24.64, + "learning_rate": 0.0002948070175438596, + "loss": 0.8167, + "step": 1651 + }, + { + "epoch": 24.65, + "learning_rate": 0.00029477192982456135, + "loss": 0.3586, + "step": 1652 + }, + { + "epoch": 24.67, + "learning_rate": 0.0002947368421052631, + "loss": 0.2884, + "step": 1653 + }, + { + "epoch": 24.68, + "learning_rate": 0.0002947017543859649, + "loss": 0.2747, + "step": 1654 + }, + { + "epoch": 24.7, + "learning_rate": 0.00029466666666666666, + "loss": 0.3327, + "step": 1655 + }, + { + "epoch": 24.71, + "learning_rate": 0.0002946315789473684, + "loss": 0.2504, + "step": 1656 + }, + { + "epoch": 24.73, + "learning_rate": 0.00029459649122807015, + "loss": 0.3425, + "step": 1657 + }, + { + "epoch": 24.74, + "learning_rate": 0.0002945614035087719, + "loss": 0.429, + "step": 1658 + }, + { + "epoch": 24.76, + "learning_rate": 0.00029452631578947365, + "loss": 0.3557, + "step": 1659 + }, + { + "epoch": 24.77, + "learning_rate": 0.0002944912280701754, + "loss": 0.6772, + "step": 1660 + }, + { + "epoch": 24.79, + "learning_rate": 0.0002944561403508772, + "loss": 0.321, + "step": 1661 + }, + { + "epoch": 24.8, + "learning_rate": 0.00029442105263157895, + "loss": 0.7565, + "step": 1662 + }, + { + "epoch": 24.82, + "learning_rate": 0.00029438596491228065, + "loss": 0.2763, + "step": 1663 + }, + { + "epoch": 24.83, + "learning_rate": 0.0002943508771929824, + "loss": 0.8887, + "step": 1664 + }, + { + "epoch": 24.85, + "learning_rate": 0.0002943157894736842, + "loss": 0.5218, + "step": 1665 + }, + { + "epoch": 24.86, + "learning_rate": 0.00029428070175438595, + "loss": 0.2061, + "step": 1666 + }, + { + "epoch": 24.88, + "learning_rate": 0.0002942456140350877, + "loss": 0.2076, + "step": 1667 + }, + { + "epoch": 24.89, + "learning_rate": 0.00029421052631578944, + "loss": 0.3881, + "step": 1668 + }, + { + "epoch": 24.91, + "learning_rate": 0.0002941754385964912, + "loss": 0.5423, + "step": 1669 + }, + { + "epoch": 24.92, + "learning_rate": 0.00029414035087719294, + "loss": 0.2679, + "step": 1670 + }, + { + "epoch": 24.94, + "learning_rate": 0.0002941052631578947, + "loss": 0.3646, + "step": 1671 + }, + { + "epoch": 24.95, + "learning_rate": 0.0002940701754385965, + "loss": 0.5029, + "step": 1672 + }, + { + "epoch": 24.97, + "learning_rate": 0.00029403508771929824, + "loss": 0.3023, + "step": 1673 + }, + { + "epoch": 24.98, + "learning_rate": 0.000294, + "loss": 0.5306, + "step": 1674 + }, + { + "epoch": 25.0, + "learning_rate": 0.00029396491228070174, + "loss": 0.4512, + "step": 1675 + }, + { + "epoch": 25.01, + "learning_rate": 0.0002939298245614035, + "loss": 0.7319, + "step": 1676 + }, + { + "epoch": 25.03, + "learning_rate": 0.00029389473684210524, + "loss": 0.604, + "step": 1677 + }, + { + "epoch": 25.04, + "learning_rate": 0.000293859649122807, + "loss": 0.3086, + "step": 1678 + }, + { + "epoch": 25.06, + "learning_rate": 0.0002938245614035088, + "loss": 0.2008, + "step": 1679 + }, + { + "epoch": 25.07, + "learning_rate": 0.0002937894736842105, + "loss": 0.3837, + "step": 1680 + }, + { + "epoch": 25.09, + "learning_rate": 0.00029375438596491223, + "loss": 0.5445, + "step": 1681 + }, + { + "epoch": 25.1, + "learning_rate": 0.000293719298245614, + "loss": 0.4024, + "step": 1682 + }, + { + "epoch": 25.12, + "learning_rate": 0.0002936842105263158, + "loss": 0.8895, + "step": 1683 + }, + { + "epoch": 25.13, + "learning_rate": 0.00029364912280701753, + "loss": 0.4681, + "step": 1684 + }, + { + "epoch": 25.15, + "learning_rate": 0.0002936140350877193, + "loss": 0.2096, + "step": 1685 + }, + { + "epoch": 25.16, + "learning_rate": 0.00029357894736842103, + "loss": 0.3158, + "step": 1686 + }, + { + "epoch": 25.18, + "learning_rate": 0.0002935438596491228, + "loss": 0.3478, + "step": 1687 + }, + { + "epoch": 25.19, + "learning_rate": 0.00029350877192982453, + "loss": 0.1883, + "step": 1688 + }, + { + "epoch": 25.21, + "learning_rate": 0.0002934736842105263, + "loss": 0.3698, + "step": 1689 + }, + { + "epoch": 25.22, + "learning_rate": 0.000293438596491228, + "loss": 0.4285, + "step": 1690 + }, + { + "epoch": 25.24, + "learning_rate": 0.00029340350877192983, + "loss": 0.373, + "step": 1691 + }, + { + "epoch": 25.25, + "learning_rate": 0.0002933684210526316, + "loss": 0.1485, + "step": 1692 + }, + { + "epoch": 25.27, + "learning_rate": 0.00029333333333333327, + "loss": 0.447, + "step": 1693 + }, + { + "epoch": 25.28, + "learning_rate": 0.0002932982456140351, + "loss": 0.6378, + "step": 1694 + }, + { + "epoch": 25.3, + "learning_rate": 0.0002932631578947368, + "loss": 0.5661, + "step": 1695 + }, + { + "epoch": 25.31, + "learning_rate": 0.00029322807017543857, + "loss": 0.5903, + "step": 1696 + }, + { + "epoch": 25.33, + "learning_rate": 0.0002931929824561403, + "loss": 0.6971, + "step": 1697 + }, + { + "epoch": 25.34, + "learning_rate": 0.00029315789473684207, + "loss": 0.3974, + "step": 1698 + }, + { + "epoch": 25.36, + "learning_rate": 0.0002931228070175438, + "loss": 0.2634, + "step": 1699 + }, + { + "epoch": 25.37, + "learning_rate": 0.00029308771929824557, + "loss": 0.7687, + "step": 1700 + }, + { + "epoch": 25.39, + "learning_rate": 0.0002930526315789473, + "loss": 0.6014, + "step": 1701 + }, + { + "epoch": 25.4, + "learning_rate": 0.0002930175438596491, + "loss": 0.5096, + "step": 1702 + }, + { + "epoch": 25.42, + "learning_rate": 0.00029298245614035087, + "loss": 0.3946, + "step": 1703 + }, + { + "epoch": 25.43, + "learning_rate": 0.0002929473684210526, + "loss": 0.3069, + "step": 1704 + }, + { + "epoch": 25.45, + "learning_rate": 0.00029291228070175437, + "loss": 0.2154, + "step": 1705 + }, + { + "epoch": 25.46, + "learning_rate": 0.0002928771929824561, + "loss": 0.2049, + "step": 1706 + }, + { + "epoch": 25.48, + "learning_rate": 0.00029284210526315786, + "loss": 0.4638, + "step": 1707 + }, + { + "epoch": 25.49, + "learning_rate": 0.0002928070175438596, + "loss": 0.4175, + "step": 1708 + }, + { + "epoch": 25.51, + "learning_rate": 0.0002927719298245614, + "loss": 1.1591, + "step": 1709 + }, + { + "epoch": 25.52, + "learning_rate": 0.00029273684210526316, + "loss": 0.2331, + "step": 1710 + }, + { + "epoch": 25.54, + "learning_rate": 0.00029270175438596486, + "loss": 0.7711, + "step": 1711 + }, + { + "epoch": 25.55, + "learning_rate": 0.0002926666666666666, + "loss": 0.3989, + "step": 1712 + }, + { + "epoch": 25.57, + "learning_rate": 0.0002926315789473684, + "loss": 0.4343, + "step": 1713 + }, + { + "epoch": 25.58, + "learning_rate": 0.00029259649122807016, + "loss": 0.5231, + "step": 1714 + }, + { + "epoch": 25.59, + "learning_rate": 0.0002925614035087719, + "loss": 0.3829, + "step": 1715 + }, + { + "epoch": 25.61, + "learning_rate": 0.00029252631578947366, + "loss": 0.5188, + "step": 1716 + }, + { + "epoch": 25.62, + "learning_rate": 0.0002924912280701754, + "loss": 0.495, + "step": 1717 + }, + { + "epoch": 25.64, + "learning_rate": 0.00029245614035087715, + "loss": 0.1815, + "step": 1718 + }, + { + "epoch": 25.65, + "learning_rate": 0.0002924210526315789, + "loss": 0.2124, + "step": 1719 + }, + { + "epoch": 25.67, + "learning_rate": 0.0002923859649122807, + "loss": 0.372, + "step": 1720 + }, + { + "epoch": 25.68, + "learning_rate": 0.00029235087719298245, + "loss": 0.1183, + "step": 1721 + }, + { + "epoch": 25.7, + "learning_rate": 0.0002923157894736842, + "loss": 0.7579, + "step": 1722 + }, + { + "epoch": 25.71, + "learning_rate": 0.00029228070175438595, + "loss": 0.217, + "step": 1723 + }, + { + "epoch": 25.73, + "learning_rate": 0.0002922456140350877, + "loss": 0.0824, + "step": 1724 + }, + { + "epoch": 25.74, + "learning_rate": 0.00029221052631578945, + "loss": 0.1303, + "step": 1725 + }, + { + "epoch": 25.76, + "learning_rate": 0.0002921754385964912, + "loss": 0.8698, + "step": 1726 + }, + { + "epoch": 25.77, + "learning_rate": 0.00029214035087719295, + "loss": 0.7743, + "step": 1727 + }, + { + "epoch": 25.79, + "learning_rate": 0.0002921052631578947, + "loss": 0.3436, + "step": 1728 + }, + { + "epoch": 25.8, + "learning_rate": 0.00029207017543859644, + "loss": 0.9807, + "step": 1729 + }, + { + "epoch": 25.82, + "learning_rate": 0.0002920350877192982, + "loss": 0.3922, + "step": 1730 + }, + { + "epoch": 25.83, + "learning_rate": 0.000292, + "loss": 0.9146, + "step": 1731 + }, + { + "epoch": 25.85, + "learning_rate": 0.00029196491228070174, + "loss": 0.2695, + "step": 1732 + }, + { + "epoch": 25.86, + "learning_rate": 0.0002919298245614035, + "loss": 0.5178, + "step": 1733 + }, + { + "epoch": 25.88, + "learning_rate": 0.00029189473684210524, + "loss": 0.4618, + "step": 1734 + }, + { + "epoch": 25.89, + "learning_rate": 0.000291859649122807, + "loss": 0.3861, + "step": 1735 + }, + { + "epoch": 25.91, + "learning_rate": 0.00029182456140350874, + "loss": 0.2787, + "step": 1736 + }, + { + "epoch": 25.92, + "learning_rate": 0.0002917894736842105, + "loss": 0.4399, + "step": 1737 + }, + { + "epoch": 25.94, + "learning_rate": 0.00029175438596491224, + "loss": 0.9383, + "step": 1738 + }, + { + "epoch": 25.95, + "learning_rate": 0.00029171929824561404, + "loss": 0.3467, + "step": 1739 + }, + { + "epoch": 25.97, + "learning_rate": 0.0002916842105263158, + "loss": 0.6328, + "step": 1740 + }, + { + "epoch": 25.98, + "learning_rate": 0.0002916491228070175, + "loss": 0.5946, + "step": 1741 + }, + { + "epoch": 26.0, + "learning_rate": 0.0002916140350877193, + "loss": 0.2688, + "step": 1742 + }, + { + "epoch": 26.01, + "learning_rate": 0.00029157894736842104, + "loss": 0.3514, + "step": 1743 + }, + { + "epoch": 26.03, + "learning_rate": 0.0002915438596491228, + "loss": 0.8932, + "step": 1744 + }, + { + "epoch": 26.04, + "learning_rate": 0.00029150877192982453, + "loss": 0.4737, + "step": 1745 + }, + { + "epoch": 26.06, + "learning_rate": 0.0002914736842105263, + "loss": 0.1499, + "step": 1746 + }, + { + "epoch": 26.07, + "learning_rate": 0.00029143859649122803, + "loss": 0.5537, + "step": 1747 + }, + { + "epoch": 26.09, + "learning_rate": 0.0002914035087719298, + "loss": 0.2274, + "step": 1748 + }, + { + "epoch": 26.1, + "learning_rate": 0.00029136842105263153, + "loss": 0.2173, + "step": 1749 + }, + { + "epoch": 26.12, + "learning_rate": 0.00029133333333333333, + "loss": 0.6862, + "step": 1750 + }, + { + "epoch": 26.13, + "learning_rate": 0.0002912982456140351, + "loss": 0.4882, + "step": 1751 + }, + { + "epoch": 26.15, + "learning_rate": 0.00029126315789473683, + "loss": 0.3082, + "step": 1752 + }, + { + "epoch": 26.16, + "learning_rate": 0.0002912280701754386, + "loss": 0.4787, + "step": 1753 + }, + { + "epoch": 26.18, + "learning_rate": 0.0002911929824561403, + "loss": 0.5084, + "step": 1754 + }, + { + "epoch": 26.19, + "learning_rate": 0.0002911578947368421, + "loss": 0.5261, + "step": 1755 + }, + { + "epoch": 26.21, + "learning_rate": 0.0002911228070175438, + "loss": 0.8972, + "step": 1756 + }, + { + "epoch": 26.22, + "learning_rate": 0.0002910877192982456, + "loss": 0.0873, + "step": 1757 + }, + { + "epoch": 26.24, + "learning_rate": 0.0002910526315789473, + "loss": 0.0688, + "step": 1758 + }, + { + "epoch": 26.25, + "learning_rate": 0.00029101754385964907, + "loss": 0.5787, + "step": 1759 + }, + { + "epoch": 26.27, + "learning_rate": 0.0002909824561403508, + "loss": 0.5109, + "step": 1760 + }, + { + "epoch": 26.28, + "learning_rate": 0.0002909473684210526, + "loss": 0.9027, + "step": 1761 + }, + { + "epoch": 26.3, + "learning_rate": 0.00029091228070175437, + "loss": 0.6658, + "step": 1762 + }, + { + "epoch": 26.31, + "learning_rate": 0.0002908771929824561, + "loss": 0.3153, + "step": 1763 + }, + { + "epoch": 26.33, + "learning_rate": 0.00029084210526315787, + "loss": 0.4754, + "step": 1764 + }, + { + "epoch": 26.34, + "learning_rate": 0.0002908070175438596, + "loss": 0.8009, + "step": 1765 + }, + { + "epoch": 26.36, + "learning_rate": 0.00029077192982456137, + "loss": 0.7278, + "step": 1766 + }, + { + "epoch": 26.37, + "learning_rate": 0.0002907368421052631, + "loss": 0.9031, + "step": 1767 + }, + { + "epoch": 26.39, + "learning_rate": 0.0002907017543859649, + "loss": 0.4986, + "step": 1768 + }, + { + "epoch": 26.4, + "learning_rate": 0.00029066666666666667, + "loss": 0.6661, + "step": 1769 + }, + { + "epoch": 26.42, + "learning_rate": 0.0002906315789473684, + "loss": 0.555, + "step": 1770 + }, + { + "epoch": 26.43, + "learning_rate": 0.00029059649122807016, + "loss": 0.5531, + "step": 1771 + }, + { + "epoch": 26.45, + "learning_rate": 0.0002905614035087719, + "loss": 0.3935, + "step": 1772 + }, + { + "epoch": 26.46, + "learning_rate": 0.00029052631578947366, + "loss": 0.8509, + "step": 1773 + }, + { + "epoch": 26.48, + "learning_rate": 0.0002904912280701754, + "loss": 0.3773, + "step": 1774 + }, + { + "epoch": 26.49, + "learning_rate": 0.00029045614035087716, + "loss": 0.4842, + "step": 1775 + }, + { + "epoch": 26.51, + "learning_rate": 0.0002904210526315789, + "loss": 0.5316, + "step": 1776 + }, + { + "epoch": 26.52, + "learning_rate": 0.00029038596491228066, + "loss": 0.3468, + "step": 1777 + }, + { + "epoch": 26.54, + "learning_rate": 0.0002903508771929824, + "loss": 0.6874, + "step": 1778 + }, + { + "epoch": 26.55, + "learning_rate": 0.0002903157894736842, + "loss": 0.4743, + "step": 1779 + }, + { + "epoch": 26.57, + "learning_rate": 0.00029028070175438596, + "loss": 0.551, + "step": 1780 + }, + { + "epoch": 26.58, + "learning_rate": 0.0002902456140350877, + "loss": 0.1915, + "step": 1781 + }, + { + "epoch": 26.59, + "learning_rate": 0.00029021052631578945, + "loss": 0.8392, + "step": 1782 + }, + { + "epoch": 26.61, + "learning_rate": 0.0002901754385964912, + "loss": 0.4326, + "step": 1783 + }, + { + "epoch": 26.62, + "learning_rate": 0.00029014035087719295, + "loss": 0.3187, + "step": 1784 + }, + { + "epoch": 26.64, + "learning_rate": 0.0002901052631578947, + "loss": 0.3209, + "step": 1785 + }, + { + "epoch": 26.65, + "learning_rate": 0.00029007017543859645, + "loss": 0.5229, + "step": 1786 + }, + { + "epoch": 26.67, + "learning_rate": 0.00029003508771929825, + "loss": 0.5639, + "step": 1787 + }, + { + "epoch": 26.68, + "learning_rate": 0.00029, + "loss": 0.2592, + "step": 1788 + }, + { + "epoch": 26.7, + "learning_rate": 0.0002899649122807017, + "loss": 0.5842, + "step": 1789 + }, + { + "epoch": 26.71, + "learning_rate": 0.00028992982456140345, + "loss": 0.3494, + "step": 1790 + }, + { + "epoch": 26.73, + "learning_rate": 0.00028989473684210525, + "loss": 0.3164, + "step": 1791 + }, + { + "epoch": 26.74, + "learning_rate": 0.000289859649122807, + "loss": 0.4838, + "step": 1792 + }, + { + "epoch": 26.76, + "learning_rate": 0.00028982456140350875, + "loss": 0.2019, + "step": 1793 + }, + { + "epoch": 26.77, + "learning_rate": 0.0002897894736842105, + "loss": 0.3821, + "step": 1794 + }, + { + "epoch": 26.79, + "learning_rate": 0.00028975438596491224, + "loss": 0.5468, + "step": 1795 + }, + { + "epoch": 26.8, + "learning_rate": 0.000289719298245614, + "loss": 0.1877, + "step": 1796 + }, + { + "epoch": 26.82, + "learning_rate": 0.00028968421052631574, + "loss": 0.5564, + "step": 1797 + }, + { + "epoch": 26.83, + "learning_rate": 0.00028964912280701754, + "loss": 0.9035, + "step": 1798 + }, + { + "epoch": 26.85, + "learning_rate": 0.0002896140350877193, + "loss": 0.6982, + "step": 1799 + }, + { + "epoch": 26.86, + "learning_rate": 0.00028957894736842104, + "loss": 0.6975, + "step": 1800 + }, + { + "epoch": 26.86, + "eval_accuracy": 0.793930494371023, + "eval_f1": 0.792651690068327, + "eval_loss": 0.7666684985160828, + "eval_runtime": 344.0857, + "eval_samples_per_second": 11.875, + "eval_steps_per_second": 0.744, + "step": 1800 + }, + { + "epoch": 26.88, + "learning_rate": 0.0002895438596491228, + "loss": 0.2577, + "step": 1801 + }, + { + "epoch": 26.89, + "learning_rate": 0.00028950877192982454, + "loss": 0.2534, + "step": 1802 + }, + { + "epoch": 26.91, + "learning_rate": 0.0002894736842105263, + "loss": 0.2775, + "step": 1803 + }, + { + "epoch": 26.92, + "learning_rate": 0.00028943859649122804, + "loss": 0.2636, + "step": 1804 + }, + { + "epoch": 26.94, + "learning_rate": 0.00028940350877192984, + "loss": 0.6394, + "step": 1805 + }, + { + "epoch": 26.95, + "learning_rate": 0.00028936842105263153, + "loss": 0.6804, + "step": 1806 + }, + { + "epoch": 26.97, + "learning_rate": 0.0002893333333333333, + "loss": 0.4613, + "step": 1807 + }, + { + "epoch": 26.98, + "learning_rate": 0.00028929824561403503, + "loss": 0.2651, + "step": 1808 + }, + { + "epoch": 27.0, + "learning_rate": 0.00028926315789473683, + "loss": 0.3102, + "step": 1809 + }, + { + "epoch": 27.01, + "learning_rate": 0.0002892280701754386, + "loss": 0.6897, + "step": 1810 + }, + { + "epoch": 27.03, + "learning_rate": 0.00028919298245614033, + "loss": 0.3374, + "step": 1811 + }, + { + "epoch": 27.04, + "learning_rate": 0.0002891578947368421, + "loss": 1.2131, + "step": 1812 + }, + { + "epoch": 27.06, + "learning_rate": 0.00028912280701754383, + "loss": 0.4629, + "step": 1813 + }, + { + "epoch": 27.07, + "learning_rate": 0.0002890877192982456, + "loss": 0.4115, + "step": 1814 + }, + { + "epoch": 27.09, + "learning_rate": 0.00028905263157894733, + "loss": 0.4939, + "step": 1815 + }, + { + "epoch": 27.1, + "learning_rate": 0.0002890175438596491, + "loss": 0.3057, + "step": 1816 + }, + { + "epoch": 27.12, + "learning_rate": 0.0002889824561403509, + "loss": 0.7153, + "step": 1817 + }, + { + "epoch": 27.13, + "learning_rate": 0.00028894736842105263, + "loss": 0.3378, + "step": 1818 + }, + { + "epoch": 27.15, + "learning_rate": 0.0002889122807017544, + "loss": 0.3109, + "step": 1819 + }, + { + "epoch": 27.16, + "learning_rate": 0.0002888771929824561, + "loss": 0.4494, + "step": 1820 + }, + { + "epoch": 27.18, + "learning_rate": 0.0002888421052631579, + "loss": 0.2402, + "step": 1821 + }, + { + "epoch": 27.19, + "learning_rate": 0.0002888070175438596, + "loss": 0.1107, + "step": 1822 + }, + { + "epoch": 27.21, + "learning_rate": 0.00028877192982456137, + "loss": 0.7066, + "step": 1823 + }, + { + "epoch": 27.22, + "learning_rate": 0.0002887368421052631, + "loss": 0.1625, + "step": 1824 + }, + { + "epoch": 27.24, + "learning_rate": 0.00028870175438596487, + "loss": 0.2452, + "step": 1825 + }, + { + "epoch": 27.25, + "learning_rate": 0.0002886666666666666, + "loss": 1.0513, + "step": 1826 + }, + { + "epoch": 27.27, + "learning_rate": 0.00028863157894736837, + "loss": 0.5964, + "step": 1827 + }, + { + "epoch": 27.28, + "learning_rate": 0.00028859649122807017, + "loss": 0.7036, + "step": 1828 + }, + { + "epoch": 27.3, + "learning_rate": 0.0002885614035087719, + "loss": 0.4673, + "step": 1829 + }, + { + "epoch": 27.31, + "learning_rate": 0.00028852631578947367, + "loss": 0.255, + "step": 1830 + }, + { + "epoch": 27.33, + "learning_rate": 0.0002884912280701754, + "loss": 0.4344, + "step": 1831 + }, + { + "epoch": 27.34, + "learning_rate": 0.00028845614035087716, + "loss": 0.2454, + "step": 1832 + }, + { + "epoch": 27.36, + "learning_rate": 0.0002884210526315789, + "loss": 0.6276, + "step": 1833 + }, + { + "epoch": 27.37, + "learning_rate": 0.00028838596491228066, + "loss": 0.3496, + "step": 1834 + }, + { + "epoch": 27.39, + "learning_rate": 0.00028835087719298247, + "loss": 0.249, + "step": 1835 + }, + { + "epoch": 27.4, + "learning_rate": 0.0002883157894736842, + "loss": 0.1035, + "step": 1836 + }, + { + "epoch": 27.42, + "learning_rate": 0.0002882807017543859, + "loss": 0.2411, + "step": 1837 + }, + { + "epoch": 27.43, + "learning_rate": 0.00028824561403508766, + "loss": 0.3164, + "step": 1838 + }, + { + "epoch": 27.45, + "learning_rate": 0.00028821052631578946, + "loss": 0.0571, + "step": 1839 + }, + { + "epoch": 27.46, + "learning_rate": 0.0002881754385964912, + "loss": 0.0659, + "step": 1840 + }, + { + "epoch": 27.48, + "learning_rate": 0.00028814035087719296, + "loss": 0.363, + "step": 1841 + }, + { + "epoch": 27.49, + "learning_rate": 0.0002881052631578947, + "loss": 0.6705, + "step": 1842 + }, + { + "epoch": 27.51, + "learning_rate": 0.00028807017543859646, + "loss": 0.4213, + "step": 1843 + }, + { + "epoch": 27.52, + "learning_rate": 0.0002880350877192982, + "loss": 0.4256, + "step": 1844 + }, + { + "epoch": 27.54, + "learning_rate": 0.00028799999999999995, + "loss": 0.3606, + "step": 1845 + }, + { + "epoch": 27.55, + "learning_rate": 0.00028796491228070176, + "loss": 0.7367, + "step": 1846 + }, + { + "epoch": 27.57, + "learning_rate": 0.0002879298245614035, + "loss": 0.391, + "step": 1847 + }, + { + "epoch": 27.58, + "learning_rate": 0.00028789473684210525, + "loss": 0.7125, + "step": 1848 + }, + { + "epoch": 27.59, + "learning_rate": 0.000287859649122807, + "loss": 0.0586, + "step": 1849 + }, + { + "epoch": 27.61, + "learning_rate": 0.00028782456140350875, + "loss": 0.3911, + "step": 1850 + }, + { + "epoch": 27.62, + "learning_rate": 0.0002877894736842105, + "loss": 0.222, + "step": 1851 + }, + { + "epoch": 27.64, + "learning_rate": 0.00028775438596491225, + "loss": 0.2349, + "step": 1852 + }, + { + "epoch": 27.65, + "learning_rate": 0.000287719298245614, + "loss": 0.1077, + "step": 1853 + }, + { + "epoch": 27.67, + "learning_rate": 0.00028768421052631575, + "loss": 0.1471, + "step": 1854 + }, + { + "epoch": 27.68, + "learning_rate": 0.0002876491228070175, + "loss": 0.4215, + "step": 1855 + }, + { + "epoch": 27.7, + "learning_rate": 0.00028761403508771924, + "loss": 0.3904, + "step": 1856 + }, + { + "epoch": 27.71, + "learning_rate": 0.00028757894736842105, + "loss": 0.4847, + "step": 1857 + }, + { + "epoch": 27.73, + "learning_rate": 0.0002875438596491228, + "loss": 0.4888, + "step": 1858 + }, + { + "epoch": 27.74, + "learning_rate": 0.00028750877192982454, + "loss": 0.7409, + "step": 1859 + }, + { + "epoch": 27.76, + "learning_rate": 0.0002874736842105263, + "loss": 0.3056, + "step": 1860 + }, + { + "epoch": 27.77, + "learning_rate": 0.00028743859649122804, + "loss": 0.4178, + "step": 1861 + }, + { + "epoch": 27.79, + "learning_rate": 0.0002874035087719298, + "loss": 0.2794, + "step": 1862 + }, + { + "epoch": 27.8, + "learning_rate": 0.00028736842105263154, + "loss": 0.2874, + "step": 1863 + }, + { + "epoch": 27.82, + "learning_rate": 0.0002873333333333333, + "loss": 0.6377, + "step": 1864 + }, + { + "epoch": 27.83, + "learning_rate": 0.0002872982456140351, + "loss": 1.002, + "step": 1865 + }, + { + "epoch": 27.85, + "learning_rate": 0.00028726315789473684, + "loss": 0.5173, + "step": 1866 + }, + { + "epoch": 27.86, + "learning_rate": 0.0002872280701754386, + "loss": 0.4654, + "step": 1867 + }, + { + "epoch": 27.88, + "learning_rate": 0.00028719298245614034, + "loss": 0.3534, + "step": 1868 + }, + { + "epoch": 27.89, + "learning_rate": 0.0002871578947368421, + "loss": 0.6233, + "step": 1869 + }, + { + "epoch": 27.91, + "learning_rate": 0.00028712280701754384, + "loss": 0.3434, + "step": 1870 + }, + { + "epoch": 27.92, + "learning_rate": 0.0002870877192982456, + "loss": 0.2129, + "step": 1871 + }, + { + "epoch": 27.94, + "learning_rate": 0.00028705263157894733, + "loss": 0.1183, + "step": 1872 + }, + { + "epoch": 27.95, + "learning_rate": 0.0002870175438596491, + "loss": 0.233, + "step": 1873 + }, + { + "epoch": 27.97, + "learning_rate": 0.00028698245614035083, + "loss": 0.5479, + "step": 1874 + }, + { + "epoch": 27.98, + "learning_rate": 0.0002869473684210526, + "loss": 0.3382, + "step": 1875 + }, + { + "epoch": 28.0, + "learning_rate": 0.0002869122807017544, + "loss": 0.9043, + "step": 1876 + }, + { + "epoch": 28.01, + "learning_rate": 0.00028687719298245613, + "loss": 0.6242, + "step": 1877 + }, + { + "epoch": 28.03, + "learning_rate": 0.0002868421052631579, + "loss": 0.6041, + "step": 1878 + }, + { + "epoch": 28.04, + "learning_rate": 0.00028680701754385963, + "loss": 0.6087, + "step": 1879 + }, + { + "epoch": 28.06, + "learning_rate": 0.0002867719298245614, + "loss": 0.2625, + "step": 1880 + }, + { + "epoch": 28.07, + "learning_rate": 0.0002867368421052631, + "loss": 0.3646, + "step": 1881 + }, + { + "epoch": 28.09, + "learning_rate": 0.0002867017543859649, + "loss": 0.4302, + "step": 1882 + }, + { + "epoch": 28.1, + "learning_rate": 0.0002866666666666667, + "loss": 0.9889, + "step": 1883 + }, + { + "epoch": 28.12, + "learning_rate": 0.0002866315789473684, + "loss": 0.4311, + "step": 1884 + }, + { + "epoch": 28.13, + "learning_rate": 0.0002865964912280701, + "loss": 0.3208, + "step": 1885 + }, + { + "epoch": 28.15, + "learning_rate": 0.00028656140350877187, + "loss": 0.7227, + "step": 1886 + }, + { + "epoch": 28.16, + "learning_rate": 0.00028652631578947367, + "loss": 0.2999, + "step": 1887 + }, + { + "epoch": 28.18, + "learning_rate": 0.0002864912280701754, + "loss": 0.1791, + "step": 1888 + }, + { + "epoch": 28.19, + "learning_rate": 0.00028645614035087717, + "loss": 0.377, + "step": 1889 + }, + { + "epoch": 28.21, + "learning_rate": 0.0002864210526315789, + "loss": 0.1628, + "step": 1890 + }, + { + "epoch": 28.22, + "learning_rate": 0.00028638596491228067, + "loss": 0.3154, + "step": 1891 + }, + { + "epoch": 28.24, + "learning_rate": 0.0002863508771929824, + "loss": 0.1039, + "step": 1892 + }, + { + "epoch": 28.25, + "learning_rate": 0.00028631578947368417, + "loss": 0.2205, + "step": 1893 + }, + { + "epoch": 28.27, + "learning_rate": 0.00028628070175438597, + "loss": 0.8305, + "step": 1894 + }, + { + "epoch": 28.28, + "learning_rate": 0.0002862456140350877, + "loss": 0.2431, + "step": 1895 + }, + { + "epoch": 28.3, + "learning_rate": 0.00028621052631578947, + "loss": 0.2169, + "step": 1896 + }, + { + "epoch": 28.31, + "learning_rate": 0.0002861754385964912, + "loss": 0.2878, + "step": 1897 + }, + { + "epoch": 28.33, + "learning_rate": 0.00028614035087719296, + "loss": 0.3567, + "step": 1898 + }, + { + "epoch": 28.34, + "learning_rate": 0.0002861052631578947, + "loss": 0.7409, + "step": 1899 + }, + { + "epoch": 28.36, + "learning_rate": 0.00028607017543859646, + "loss": 0.3751, + "step": 1900 + }, + { + "epoch": 28.37, + "learning_rate": 0.0002860350877192982, + "loss": 0.154, + "step": 1901 + }, + { + "epoch": 28.39, + "learning_rate": 0.00028599999999999996, + "loss": 0.1643, + "step": 1902 + }, + { + "epoch": 28.4, + "learning_rate": 0.0002859649122807017, + "loss": 0.2874, + "step": 1903 + }, + { + "epoch": 28.42, + "learning_rate": 0.00028592982456140346, + "loss": 0.6538, + "step": 1904 + }, + { + "epoch": 28.43, + "learning_rate": 0.00028589473684210526, + "loss": 0.3018, + "step": 1905 + }, + { + "epoch": 28.45, + "learning_rate": 0.000285859649122807, + "loss": 0.7342, + "step": 1906 + }, + { + "epoch": 28.46, + "learning_rate": 0.00028582456140350876, + "loss": 0.3263, + "step": 1907 + }, + { + "epoch": 28.48, + "learning_rate": 0.0002857894736842105, + "loss": 0.5486, + "step": 1908 + }, + { + "epoch": 28.49, + "learning_rate": 0.00028575438596491225, + "loss": 0.6566, + "step": 1909 + }, + { + "epoch": 28.51, + "learning_rate": 0.000285719298245614, + "loss": 0.645, + "step": 1910 + }, + { + "epoch": 28.52, + "learning_rate": 0.00028568421052631575, + "loss": 0.657, + "step": 1911 + }, + { + "epoch": 28.54, + "learning_rate": 0.0002856491228070175, + "loss": 0.5839, + "step": 1912 + }, + { + "epoch": 28.55, + "learning_rate": 0.0002856140350877193, + "loss": 0.4959, + "step": 1913 + }, + { + "epoch": 28.57, + "learning_rate": 0.00028557894736842105, + "loss": 0.6578, + "step": 1914 + }, + { + "epoch": 28.58, + "learning_rate": 0.0002855438596491228, + "loss": 1.0061, + "step": 1915 + }, + { + "epoch": 28.59, + "learning_rate": 0.0002855087719298245, + "loss": 0.6346, + "step": 1916 + }, + { + "epoch": 28.61, + "learning_rate": 0.0002854736842105263, + "loss": 1.1097, + "step": 1917 + }, + { + "epoch": 28.62, + "learning_rate": 0.00028543859649122805, + "loss": 0.45, + "step": 1918 + }, + { + "epoch": 28.64, + "learning_rate": 0.0002854035087719298, + "loss": 0.2154, + "step": 1919 + }, + { + "epoch": 28.65, + "learning_rate": 0.00028536842105263155, + "loss": 0.4049, + "step": 1920 + }, + { + "epoch": 28.67, + "learning_rate": 0.0002853333333333333, + "loss": 0.742, + "step": 1921 + }, + { + "epoch": 28.68, + "learning_rate": 0.00028529824561403504, + "loss": 0.336, + "step": 1922 + }, + { + "epoch": 28.7, + "learning_rate": 0.0002852631578947368, + "loss": 0.4915, + "step": 1923 + }, + { + "epoch": 28.71, + "learning_rate": 0.0002852280701754386, + "loss": 0.3299, + "step": 1924 + }, + { + "epoch": 28.73, + "learning_rate": 0.00028519298245614034, + "loss": 0.1165, + "step": 1925 + }, + { + "epoch": 28.74, + "learning_rate": 0.0002851578947368421, + "loss": 0.4511, + "step": 1926 + }, + { + "epoch": 28.76, + "learning_rate": 0.00028512280701754384, + "loss": 0.36, + "step": 1927 + }, + { + "epoch": 28.77, + "learning_rate": 0.0002850877192982456, + "loss": 0.9207, + "step": 1928 + }, + { + "epoch": 28.79, + "learning_rate": 0.00028505263157894734, + "loss": 0.573, + "step": 1929 + }, + { + "epoch": 28.8, + "learning_rate": 0.0002850175438596491, + "loss": 0.282, + "step": 1930 + }, + { + "epoch": 28.82, + "learning_rate": 0.0002849824561403509, + "loss": 0.8202, + "step": 1931 + }, + { + "epoch": 28.83, + "learning_rate": 0.00028494736842105264, + "loss": 0.6128, + "step": 1932 + }, + { + "epoch": 28.85, + "learning_rate": 0.00028491228070175433, + "loss": 0.3645, + "step": 1933 + }, + { + "epoch": 28.86, + "learning_rate": 0.0002848771929824561, + "loss": 0.1557, + "step": 1934 + }, + { + "epoch": 28.88, + "learning_rate": 0.0002848421052631579, + "loss": 0.2146, + "step": 1935 + }, + { + "epoch": 28.89, + "learning_rate": 0.00028480701754385963, + "loss": 0.7744, + "step": 1936 + }, + { + "epoch": 28.91, + "learning_rate": 0.0002847719298245614, + "loss": 0.0497, + "step": 1937 + }, + { + "epoch": 28.92, + "learning_rate": 0.00028473684210526313, + "loss": 0.2807, + "step": 1938 + }, + { + "epoch": 28.94, + "learning_rate": 0.0002847017543859649, + "loss": 0.3621, + "step": 1939 + }, + { + "epoch": 28.95, + "learning_rate": 0.00028466666666666663, + "loss": 0.4343, + "step": 1940 + }, + { + "epoch": 28.97, + "learning_rate": 0.0002846315789473684, + "loss": 0.4488, + "step": 1941 + }, + { + "epoch": 28.98, + "learning_rate": 0.0002845964912280702, + "loss": 0.6635, + "step": 1942 + }, + { + "epoch": 29.0, + "learning_rate": 0.00028456140350877193, + "loss": 0.49, + "step": 1943 + }, + { + "epoch": 29.01, + "learning_rate": 0.0002845263157894737, + "loss": 0.6394, + "step": 1944 + }, + { + "epoch": 29.03, + "learning_rate": 0.00028449122807017543, + "loss": 0.289, + "step": 1945 + }, + { + "epoch": 29.04, + "learning_rate": 0.0002844561403508772, + "loss": 0.4145, + "step": 1946 + }, + { + "epoch": 29.06, + "learning_rate": 0.0002844210526315789, + "loss": 0.4305, + "step": 1947 + }, + { + "epoch": 29.07, + "learning_rate": 0.0002843859649122807, + "loss": 0.6245, + "step": 1948 + }, + { + "epoch": 29.09, + "learning_rate": 0.0002843508771929824, + "loss": 0.5812, + "step": 1949 + }, + { + "epoch": 29.1, + "learning_rate": 0.00028431578947368417, + "loss": 0.3968, + "step": 1950 + }, + { + "epoch": 29.12, + "learning_rate": 0.0002842807017543859, + "loss": 0.4733, + "step": 1951 + }, + { + "epoch": 29.13, + "learning_rate": 0.00028424561403508767, + "loss": 0.32, + "step": 1952 + }, + { + "epoch": 29.15, + "learning_rate": 0.0002842105263157894, + "loss": 0.1961, + "step": 1953 + }, + { + "epoch": 29.16, + "learning_rate": 0.0002841754385964912, + "loss": 0.5253, + "step": 1954 + }, + { + "epoch": 29.18, + "learning_rate": 0.00028414035087719297, + "loss": 0.1697, + "step": 1955 + }, + { + "epoch": 29.19, + "learning_rate": 0.0002841052631578947, + "loss": 0.3636, + "step": 1956 + }, + { + "epoch": 29.21, + "learning_rate": 0.00028407017543859647, + "loss": 0.4167, + "step": 1957 + }, + { + "epoch": 29.22, + "learning_rate": 0.0002840350877192982, + "loss": 0.4889, + "step": 1958 + }, + { + "epoch": 29.24, + "learning_rate": 0.00028399999999999996, + "loss": 0.4878, + "step": 1959 + }, + { + "epoch": 29.25, + "learning_rate": 0.0002839649122807017, + "loss": 0.6979, + "step": 1960 + }, + { + "epoch": 29.27, + "learning_rate": 0.0002839298245614035, + "loss": 0.9622, + "step": 1961 + }, + { + "epoch": 29.28, + "learning_rate": 0.00028389473684210526, + "loss": 0.5342, + "step": 1962 + }, + { + "epoch": 29.3, + "learning_rate": 0.000283859649122807, + "loss": 0.3308, + "step": 1963 + }, + { + "epoch": 29.31, + "learning_rate": 0.0002838245614035087, + "loss": 0.3056, + "step": 1964 + }, + { + "epoch": 29.33, + "learning_rate": 0.0002837894736842105, + "loss": 0.3, + "step": 1965 + }, + { + "epoch": 29.34, + "learning_rate": 0.00028375438596491226, + "loss": 0.7208, + "step": 1966 + }, + { + "epoch": 29.36, + "learning_rate": 0.000283719298245614, + "loss": 1.1475, + "step": 1967 + }, + { + "epoch": 29.37, + "learning_rate": 0.00028368421052631576, + "loss": 0.2273, + "step": 1968 + }, + { + "epoch": 29.39, + "learning_rate": 0.0002836491228070175, + "loss": 0.3318, + "step": 1969 + }, + { + "epoch": 29.4, + "learning_rate": 0.00028361403508771926, + "loss": 0.1126, + "step": 1970 + }, + { + "epoch": 29.42, + "learning_rate": 0.000283578947368421, + "loss": 0.2493, + "step": 1971 + }, + { + "epoch": 29.43, + "learning_rate": 0.0002835438596491228, + "loss": 0.1019, + "step": 1972 + }, + { + "epoch": 29.45, + "learning_rate": 0.00028350877192982456, + "loss": 0.3286, + "step": 1973 + }, + { + "epoch": 29.46, + "learning_rate": 0.0002834736842105263, + "loss": 0.0936, + "step": 1974 + }, + { + "epoch": 29.48, + "learning_rate": 0.00028343859649122805, + "loss": 0.7561, + "step": 1975 + }, + { + "epoch": 29.49, + "learning_rate": 0.0002834035087719298, + "loss": 0.2806, + "step": 1976 + }, + { + "epoch": 29.51, + "learning_rate": 0.00028336842105263155, + "loss": 0.4837, + "step": 1977 + }, + { + "epoch": 29.52, + "learning_rate": 0.0002833333333333333, + "loss": 0.4605, + "step": 1978 + }, + { + "epoch": 29.54, + "learning_rate": 0.00028329824561403505, + "loss": 0.7167, + "step": 1979 + }, + { + "epoch": 29.55, + "learning_rate": 0.00028326315789473685, + "loss": 0.3975, + "step": 1980 + }, + { + "epoch": 29.57, + "learning_rate": 0.00028322807017543855, + "loss": 0.3134, + "step": 1981 + }, + { + "epoch": 29.58, + "learning_rate": 0.0002831929824561403, + "loss": 0.4018, + "step": 1982 + }, + { + "epoch": 29.59, + "learning_rate": 0.0002831578947368421, + "loss": 0.1087, + "step": 1983 + }, + { + "epoch": 29.61, + "learning_rate": 0.00028312280701754385, + "loss": 0.793, + "step": 1984 + }, + { + "epoch": 29.62, + "learning_rate": 0.0002830877192982456, + "loss": 0.6217, + "step": 1985 + }, + { + "epoch": 29.64, + "learning_rate": 0.00028305263157894734, + "loss": 0.3012, + "step": 1986 + }, + { + "epoch": 29.65, + "learning_rate": 0.0002830175438596491, + "loss": 0.4654, + "step": 1987 + }, + { + "epoch": 29.67, + "learning_rate": 0.00028298245614035084, + "loss": 0.0225, + "step": 1988 + }, + { + "epoch": 29.68, + "learning_rate": 0.0002829473684210526, + "loss": 0.216, + "step": 1989 + }, + { + "epoch": 29.7, + "learning_rate": 0.00028291228070175434, + "loss": 0.4494, + "step": 1990 + }, + { + "epoch": 29.71, + "learning_rate": 0.00028287719298245614, + "loss": 0.5049, + "step": 1991 + }, + { + "epoch": 29.73, + "learning_rate": 0.0002828421052631579, + "loss": 0.0648, + "step": 1992 + }, + { + "epoch": 29.74, + "learning_rate": 0.00028280701754385964, + "loss": 0.4833, + "step": 1993 + }, + { + "epoch": 29.76, + "learning_rate": 0.0002827719298245614, + "loss": 0.3664, + "step": 1994 + }, + { + "epoch": 29.77, + "learning_rate": 0.00028273684210526314, + "loss": 0.5582, + "step": 1995 + }, + { + "epoch": 29.79, + "learning_rate": 0.0002827017543859649, + "loss": 0.323, + "step": 1996 + }, + { + "epoch": 29.8, + "learning_rate": 0.00028266666666666663, + "loss": 0.5679, + "step": 1997 + }, + { + "epoch": 29.82, + "learning_rate": 0.0002826315789473684, + "loss": 0.2273, + "step": 1998 + }, + { + "epoch": 29.83, + "learning_rate": 0.00028259649122807013, + "loss": 0.6848, + "step": 1999 + }, + { + "epoch": 29.85, + "learning_rate": 0.0002825614035087719, + "loss": 0.9913, + "step": 2000 + }, + { + "epoch": 29.85, + "eval_accuracy": 0.7706803720019579, + "eval_f1": 0.7733909006780546, + "eval_loss": 0.9206514954566956, + "eval_runtime": 344.174, + "eval_samples_per_second": 11.872, + "eval_steps_per_second": 0.744, + "step": 2000 + }, + { + "epoch": 29.86, + "learning_rate": 0.00028252631578947363, + "loss": 0.7397, + "step": 2001 + }, + { + "epoch": 29.88, + "learning_rate": 0.00028249122807017543, + "loss": 0.4359, + "step": 2002 + }, + { + "epoch": 29.89, + "learning_rate": 0.0002824561403508772, + "loss": 0.1009, + "step": 2003 + }, + { + "epoch": 29.91, + "learning_rate": 0.00028242105263157893, + "loss": 0.2327, + "step": 2004 + }, + { + "epoch": 29.92, + "learning_rate": 0.0002823859649122807, + "loss": 0.343, + "step": 2005 + }, + { + "epoch": 29.94, + "learning_rate": 0.00028235087719298243, + "loss": 0.3399, + "step": 2006 + }, + { + "epoch": 29.95, + "learning_rate": 0.0002823157894736842, + "loss": 0.5942, + "step": 2007 + }, + { + "epoch": 29.97, + "learning_rate": 0.0002822807017543859, + "loss": 0.5096, + "step": 2008 + }, + { + "epoch": 29.98, + "learning_rate": 0.00028224561403508773, + "loss": 0.2877, + "step": 2009 + }, + { + "epoch": 30.0, + "learning_rate": 0.0002822105263157895, + "loss": 0.1841, + "step": 2010 + }, + { + "epoch": 30.01, + "learning_rate": 0.0002821754385964912, + "loss": 0.0952, + "step": 2011 + }, + { + "epoch": 30.03, + "learning_rate": 0.0002821403508771929, + "loss": 0.6921, + "step": 2012 + }, + { + "epoch": 30.04, + "learning_rate": 0.0002821052631578947, + "loss": 0.3671, + "step": 2013 + }, + { + "epoch": 30.06, + "learning_rate": 0.00028207017543859647, + "loss": 0.3699, + "step": 2014 + }, + { + "epoch": 30.07, + "learning_rate": 0.0002820350877192982, + "loss": 0.4393, + "step": 2015 + }, + { + "epoch": 30.09, + "learning_rate": 0.00028199999999999997, + "loss": 0.0431, + "step": 2016 + }, + { + "epoch": 30.1, + "learning_rate": 0.0002819649122807017, + "loss": 0.4641, + "step": 2017 + }, + { + "epoch": 30.12, + "learning_rate": 0.00028192982456140347, + "loss": 0.4369, + "step": 2018 + }, + { + "epoch": 30.13, + "learning_rate": 0.0002818947368421052, + "loss": 0.1586, + "step": 2019 + }, + { + "epoch": 30.15, + "learning_rate": 0.000281859649122807, + "loss": 0.2343, + "step": 2020 + }, + { + "epoch": 30.16, + "learning_rate": 0.00028182456140350877, + "loss": 0.0312, + "step": 2021 + }, + { + "epoch": 30.18, + "learning_rate": 0.0002817894736842105, + "loss": 0.1787, + "step": 2022 + }, + { + "epoch": 30.19, + "learning_rate": 0.00028175438596491227, + "loss": 0.3184, + "step": 2023 + }, + { + "epoch": 30.21, + "learning_rate": 0.000281719298245614, + "loss": 0.3404, + "step": 2024 + }, + { + "epoch": 30.22, + "learning_rate": 0.00028168421052631576, + "loss": 0.2371, + "step": 2025 + }, + { + "epoch": 30.24, + "learning_rate": 0.0002816491228070175, + "loss": 0.3565, + "step": 2026 + }, + { + "epoch": 30.25, + "learning_rate": 0.00028161403508771926, + "loss": 0.6661, + "step": 2027 + }, + { + "epoch": 30.27, + "learning_rate": 0.00028157894736842106, + "loss": 0.9013, + "step": 2028 + }, + { + "epoch": 30.28, + "learning_rate": 0.00028154385964912276, + "loss": 0.3751, + "step": 2029 + }, + { + "epoch": 30.3, + "learning_rate": 0.0002815087719298245, + "loss": 0.1435, + "step": 2030 + }, + { + "epoch": 30.31, + "learning_rate": 0.0002814736842105263, + "loss": 0.2184, + "step": 2031 + }, + { + "epoch": 30.33, + "learning_rate": 0.00028143859649122806, + "loss": 0.0664, + "step": 2032 + }, + { + "epoch": 30.34, + "learning_rate": 0.0002814035087719298, + "loss": 0.227, + "step": 2033 + }, + { + "epoch": 30.36, + "learning_rate": 0.00028136842105263156, + "loss": 0.3569, + "step": 2034 + }, + { + "epoch": 30.37, + "learning_rate": 0.0002813333333333333, + "loss": 0.455, + "step": 2035 + }, + { + "epoch": 30.39, + "learning_rate": 0.00028129824561403505, + "loss": 0.3361, + "step": 2036 + }, + { + "epoch": 30.4, + "learning_rate": 0.0002812631578947368, + "loss": 0.2786, + "step": 2037 + }, + { + "epoch": 30.42, + "learning_rate": 0.00028122807017543855, + "loss": 0.0724, + "step": 2038 + }, + { + "epoch": 30.43, + "learning_rate": 0.00028119298245614035, + "loss": 0.7161, + "step": 2039 + }, + { + "epoch": 30.45, + "learning_rate": 0.0002811578947368421, + "loss": 0.0976, + "step": 2040 + }, + { + "epoch": 30.46, + "learning_rate": 0.00028112280701754385, + "loss": 0.4322, + "step": 2041 + }, + { + "epoch": 30.48, + "learning_rate": 0.00028108771929824555, + "loss": 0.4597, + "step": 2042 + }, + { + "epoch": 30.49, + "learning_rate": 0.00028105263157894735, + "loss": 0.6184, + "step": 2043 + }, + { + "epoch": 30.51, + "learning_rate": 0.0002810175438596491, + "loss": 0.7813, + "step": 2044 + }, + { + "epoch": 30.52, + "learning_rate": 0.00028098245614035085, + "loss": 0.3278, + "step": 2045 + }, + { + "epoch": 30.54, + "learning_rate": 0.0002809473684210526, + "loss": 0.1911, + "step": 2046 + }, + { + "epoch": 30.55, + "learning_rate": 0.00028091228070175434, + "loss": 0.3555, + "step": 2047 + }, + { + "epoch": 30.57, + "learning_rate": 0.0002808771929824561, + "loss": 0.6221, + "step": 2048 + }, + { + "epoch": 30.58, + "learning_rate": 0.00028084210526315784, + "loss": 0.376, + "step": 2049 + }, + { + "epoch": 30.59, + "learning_rate": 0.00028080701754385965, + "loss": 0.259, + "step": 2050 + }, + { + "epoch": 30.61, + "learning_rate": 0.0002807719298245614, + "loss": 0.3966, + "step": 2051 + }, + { + "epoch": 30.62, + "learning_rate": 0.00028073684210526314, + "loss": 0.5047, + "step": 2052 + }, + { + "epoch": 30.64, + "learning_rate": 0.0002807017543859649, + "loss": 0.4883, + "step": 2053 + }, + { + "epoch": 30.65, + "learning_rate": 0.00028066666666666664, + "loss": 0.0462, + "step": 2054 + }, + { + "epoch": 30.67, + "learning_rate": 0.0002806315789473684, + "loss": 0.152, + "step": 2055 + }, + { + "epoch": 30.68, + "learning_rate": 0.00028059649122807014, + "loss": 0.0942, + "step": 2056 + }, + { + "epoch": 30.7, + "learning_rate": 0.00028056140350877194, + "loss": 0.5002, + "step": 2057 + }, + { + "epoch": 30.71, + "learning_rate": 0.0002805263157894737, + "loss": 0.3678, + "step": 2058 + }, + { + "epoch": 30.73, + "learning_rate": 0.0002804912280701754, + "loss": 0.1678, + "step": 2059 + }, + { + "epoch": 30.74, + "learning_rate": 0.00028045614035087713, + "loss": 0.5979, + "step": 2060 + }, + { + "epoch": 30.76, + "learning_rate": 0.00028042105263157894, + "loss": 0.2498, + "step": 2061 + }, + { + "epoch": 30.77, + "learning_rate": 0.0002803859649122807, + "loss": 0.6051, + "step": 2062 + }, + { + "epoch": 30.79, + "learning_rate": 0.00028035087719298243, + "loss": 0.3782, + "step": 2063 + }, + { + "epoch": 30.8, + "learning_rate": 0.0002803157894736842, + "loss": 0.3381, + "step": 2064 + }, + { + "epoch": 30.82, + "learning_rate": 0.00028028070175438593, + "loss": 0.5742, + "step": 2065 + }, + { + "epoch": 30.83, + "learning_rate": 0.0002802456140350877, + "loss": 0.2356, + "step": 2066 + }, + { + "epoch": 30.85, + "learning_rate": 0.00028021052631578943, + "loss": 0.5509, + "step": 2067 + }, + { + "epoch": 30.86, + "learning_rate": 0.00028017543859649123, + "loss": 0.4965, + "step": 2068 + }, + { + "epoch": 30.88, + "learning_rate": 0.000280140350877193, + "loss": 0.1467, + "step": 2069 + }, + { + "epoch": 30.89, + "learning_rate": 0.00028010526315789473, + "loss": 0.4174, + "step": 2070 + }, + { + "epoch": 30.91, + "learning_rate": 0.0002800701754385965, + "loss": 0.2724, + "step": 2071 + }, + { + "epoch": 30.92, + "learning_rate": 0.0002800350877192982, + "loss": 0.4106, + "step": 2072 + }, + { + "epoch": 30.94, + "learning_rate": 0.00028, + "loss": 0.5703, + "step": 2073 + }, + { + "epoch": 30.95, + "learning_rate": 0.0002799649122807017, + "loss": 0.3324, + "step": 2074 + }, + { + "epoch": 30.97, + "learning_rate": 0.0002799298245614035, + "loss": 0.13, + "step": 2075 + }, + { + "epoch": 30.98, + "learning_rate": 0.0002798947368421053, + "loss": 0.2645, + "step": 2076 + }, + { + "epoch": 31.0, + "learning_rate": 0.00027985964912280697, + "loss": 0.1889, + "step": 2077 + }, + { + "epoch": 31.01, + "learning_rate": 0.0002798245614035087, + "loss": 0.3298, + "step": 2078 + }, + { + "epoch": 31.03, + "learning_rate": 0.00027978947368421047, + "loss": 0.1496, + "step": 2079 + }, + { + "epoch": 31.04, + "learning_rate": 0.00027975438596491227, + "loss": 0.3236, + "step": 2080 + }, + { + "epoch": 31.06, + "learning_rate": 0.000279719298245614, + "loss": 0.1155, + "step": 2081 + }, + { + "epoch": 31.07, + "learning_rate": 0.00027968421052631577, + "loss": 0.1675, + "step": 2082 + }, + { + "epoch": 31.09, + "learning_rate": 0.0002796491228070175, + "loss": 0.2023, + "step": 2083 + }, + { + "epoch": 31.1, + "learning_rate": 0.00027961403508771927, + "loss": 0.35, + "step": 2084 + }, + { + "epoch": 31.12, + "learning_rate": 0.000279578947368421, + "loss": 0.5572, + "step": 2085 + }, + { + "epoch": 31.13, + "learning_rate": 0.00027954385964912276, + "loss": 0.1961, + "step": 2086 + }, + { + "epoch": 31.15, + "learning_rate": 0.00027950877192982457, + "loss": 0.1702, + "step": 2087 + }, + { + "epoch": 31.16, + "learning_rate": 0.0002794736842105263, + "loss": 0.0372, + "step": 2088 + }, + { + "epoch": 31.18, + "learning_rate": 0.00027943859649122806, + "loss": 0.3766, + "step": 2089 + }, + { + "epoch": 31.19, + "learning_rate": 0.00027940350877192976, + "loss": 0.3156, + "step": 2090 + }, + { + "epoch": 31.21, + "learning_rate": 0.00027936842105263156, + "loss": 0.1704, + "step": 2091 + }, + { + "epoch": 31.22, + "learning_rate": 0.0002793333333333333, + "loss": 0.0498, + "step": 2092 + }, + { + "epoch": 31.24, + "learning_rate": 0.00027929824561403506, + "loss": 0.2717, + "step": 2093 + }, + { + "epoch": 31.25, + "learning_rate": 0.0002792631578947368, + "loss": 0.0501, + "step": 2094 + }, + { + "epoch": 31.27, + "learning_rate": 0.00027922807017543856, + "loss": 0.0992, + "step": 2095 + }, + { + "epoch": 31.28, + "learning_rate": 0.0002791929824561403, + "loss": 0.5988, + "step": 2096 + }, + { + "epoch": 31.3, + "learning_rate": 0.00027915789473684205, + "loss": 0.2703, + "step": 2097 + }, + { + "epoch": 31.31, + "learning_rate": 0.00027912280701754386, + "loss": 0.5195, + "step": 2098 + }, + { + "epoch": 31.33, + "learning_rate": 0.0002790877192982456, + "loss": 0.0573, + "step": 2099 + }, + { + "epoch": 31.34, + "learning_rate": 0.00027905263157894736, + "loss": 0.0256, + "step": 2100 + }, + { + "epoch": 31.36, + "learning_rate": 0.0002790175438596491, + "loss": 0.579, + "step": 2101 + }, + { + "epoch": 31.37, + "learning_rate": 0.00027898245614035085, + "loss": 0.0547, + "step": 2102 + }, + { + "epoch": 31.39, + "learning_rate": 0.0002789473684210526, + "loss": 0.0967, + "step": 2103 + }, + { + "epoch": 31.4, + "learning_rate": 0.00027891228070175435, + "loss": 0.258, + "step": 2104 + }, + { + "epoch": 31.42, + "learning_rate": 0.00027887719298245615, + "loss": 0.2852, + "step": 2105 + }, + { + "epoch": 31.43, + "learning_rate": 0.0002788421052631579, + "loss": 0.0816, + "step": 2106 + }, + { + "epoch": 31.45, + "learning_rate": 0.0002788070175438596, + "loss": 0.2592, + "step": 2107 + }, + { + "epoch": 31.46, + "learning_rate": 0.00027877192982456135, + "loss": 0.4082, + "step": 2108 + }, + { + "epoch": 31.48, + "learning_rate": 0.00027873684210526315, + "loss": 0.0363, + "step": 2109 + }, + { + "epoch": 31.49, + "learning_rate": 0.0002787017543859649, + "loss": 0.2759, + "step": 2110 + }, + { + "epoch": 31.51, + "learning_rate": 0.00027866666666666665, + "loss": 0.9859, + "step": 2111 + }, + { + "epoch": 31.52, + "learning_rate": 0.0002786315789473684, + "loss": 0.3908, + "step": 2112 + }, + { + "epoch": 31.54, + "learning_rate": 0.00027859649122807014, + "loss": 0.4191, + "step": 2113 + }, + { + "epoch": 31.55, + "learning_rate": 0.0002785614035087719, + "loss": 0.2121, + "step": 2114 + }, + { + "epoch": 31.57, + "learning_rate": 0.00027852631578947364, + "loss": 0.0456, + "step": 2115 + }, + { + "epoch": 31.58, + "learning_rate": 0.0002784912280701754, + "loss": 0.3328, + "step": 2116 + }, + { + "epoch": 31.59, + "learning_rate": 0.0002784561403508772, + "loss": 0.4704, + "step": 2117 + }, + { + "epoch": 31.61, + "learning_rate": 0.00027842105263157894, + "loss": 0.1809, + "step": 2118 + }, + { + "epoch": 31.62, + "learning_rate": 0.0002783859649122807, + "loss": 0.394, + "step": 2119 + }, + { + "epoch": 31.64, + "learning_rate": 0.00027835087719298244, + "loss": 0.0635, + "step": 2120 + }, + { + "epoch": 31.65, + "learning_rate": 0.0002783157894736842, + "loss": 0.0528, + "step": 2121 + }, + { + "epoch": 31.67, + "learning_rate": 0.00027828070175438594, + "loss": 0.0976, + "step": 2122 + }, + { + "epoch": 31.68, + "learning_rate": 0.0002782456140350877, + "loss": 0.5734, + "step": 2123 + }, + { + "epoch": 31.7, + "learning_rate": 0.0002782105263157895, + "loss": 0.2444, + "step": 2124 + }, + { + "epoch": 31.71, + "learning_rate": 0.0002781754385964912, + "loss": 0.1145, + "step": 2125 + }, + { + "epoch": 31.73, + "learning_rate": 0.00027814035087719293, + "loss": 0.2101, + "step": 2126 + }, + { + "epoch": 31.74, + "learning_rate": 0.0002781052631578947, + "loss": 0.3387, + "step": 2127 + }, + { + "epoch": 31.76, + "learning_rate": 0.0002780701754385965, + "loss": 0.1206, + "step": 2128 + }, + { + "epoch": 31.77, + "learning_rate": 0.00027803508771929823, + "loss": 0.3084, + "step": 2129 + }, + { + "epoch": 31.79, + "learning_rate": 0.000278, + "loss": 0.5995, + "step": 2130 + }, + { + "epoch": 31.8, + "learning_rate": 0.00027796491228070173, + "loss": 0.6344, + "step": 2131 + }, + { + "epoch": 31.82, + "learning_rate": 0.0002779298245614035, + "loss": 0.4073, + "step": 2132 + }, + { + "epoch": 31.83, + "learning_rate": 0.00027789473684210523, + "loss": 0.2271, + "step": 2133 + }, + { + "epoch": 31.85, + "learning_rate": 0.000277859649122807, + "loss": 0.1028, + "step": 2134 + }, + { + "epoch": 31.86, + "learning_rate": 0.0002778245614035088, + "loss": 0.1707, + "step": 2135 + }, + { + "epoch": 31.88, + "learning_rate": 0.00027778947368421053, + "loss": 0.1946, + "step": 2136 + }, + { + "epoch": 31.89, + "learning_rate": 0.0002777543859649123, + "loss": 0.5396, + "step": 2137 + }, + { + "epoch": 31.91, + "learning_rate": 0.00027771929824561397, + "loss": 0.0741, + "step": 2138 + }, + { + "epoch": 31.92, + "learning_rate": 0.0002776842105263158, + "loss": 0.3099, + "step": 2139 + }, + { + "epoch": 31.94, + "learning_rate": 0.0002776491228070175, + "loss": 0.2818, + "step": 2140 + }, + { + "epoch": 31.95, + "learning_rate": 0.00027761403508771927, + "loss": 0.3299, + "step": 2141 + }, + { + "epoch": 31.97, + "learning_rate": 0.000277578947368421, + "loss": 0.4072, + "step": 2142 + }, + { + "epoch": 31.98, + "learning_rate": 0.00027754385964912277, + "loss": 0.0182, + "step": 2143 + }, + { + "epoch": 32.0, + "learning_rate": 0.0002775087719298245, + "loss": 0.6486, + "step": 2144 + }, + { + "epoch": 32.01, + "learning_rate": 0.00027747368421052627, + "loss": 0.6269, + "step": 2145 + }, + { + "epoch": 32.03, + "learning_rate": 0.00027743859649122807, + "loss": 0.353, + "step": 2146 + }, + { + "epoch": 32.04, + "learning_rate": 0.0002774035087719298, + "loss": 0.4854, + "step": 2147 + }, + { + "epoch": 32.06, + "learning_rate": 0.00027736842105263157, + "loss": 0.2553, + "step": 2148 + }, + { + "epoch": 32.07, + "learning_rate": 0.0002773333333333333, + "loss": 0.158, + "step": 2149 + }, + { + "epoch": 32.09, + "learning_rate": 0.00027729824561403507, + "loss": 0.4838, + "step": 2150 + }, + { + "epoch": 32.1, + "learning_rate": 0.0002772631578947368, + "loss": 0.2814, + "step": 2151 + }, + { + "epoch": 32.12, + "learning_rate": 0.00027722807017543856, + "loss": 0.4563, + "step": 2152 + }, + { + "epoch": 32.13, + "learning_rate": 0.0002771929824561403, + "loss": 0.4573, + "step": 2153 + }, + { + "epoch": 32.15, + "learning_rate": 0.0002771578947368421, + "loss": 0.3097, + "step": 2154 + }, + { + "epoch": 32.16, + "learning_rate": 0.0002771228070175438, + "loss": 0.2421, + "step": 2155 + }, + { + "epoch": 32.18, + "learning_rate": 0.00027708771929824556, + "loss": 0.8276, + "step": 2156 + }, + { + "epoch": 32.19, + "learning_rate": 0.00027705263157894736, + "loss": 0.4142, + "step": 2157 + }, + { + "epoch": 32.21, + "learning_rate": 0.0002770175438596491, + "loss": 0.0869, + "step": 2158 + }, + { + "epoch": 32.22, + "learning_rate": 0.00027698245614035086, + "loss": 0.3774, + "step": 2159 + }, + { + "epoch": 32.24, + "learning_rate": 0.0002769473684210526, + "loss": 0.2919, + "step": 2160 + }, + { + "epoch": 32.25, + "learning_rate": 0.00027691228070175436, + "loss": 0.652, + "step": 2161 + }, + { + "epoch": 32.27, + "learning_rate": 0.0002768771929824561, + "loss": 0.2367, + "step": 2162 + }, + { + "epoch": 32.28, + "learning_rate": 0.00027684210526315785, + "loss": 0.7383, + "step": 2163 + }, + { + "epoch": 32.3, + "learning_rate": 0.0002768070175438596, + "loss": 0.4451, + "step": 2164 + }, + { + "epoch": 32.31, + "learning_rate": 0.0002767719298245614, + "loss": 0.2831, + "step": 2165 + }, + { + "epoch": 32.33, + "learning_rate": 0.00027673684210526315, + "loss": 0.1604, + "step": 2166 + }, + { + "epoch": 32.34, + "learning_rate": 0.0002767017543859649, + "loss": 0.0478, + "step": 2167 + }, + { + "epoch": 32.36, + "learning_rate": 0.00027666666666666665, + "loss": 0.2886, + "step": 2168 + }, + { + "epoch": 32.37, + "learning_rate": 0.0002766315789473684, + "loss": 0.625, + "step": 2169 + }, + { + "epoch": 32.39, + "learning_rate": 0.00027659649122807015, + "loss": 0.2271, + "step": 2170 + }, + { + "epoch": 32.4, + "learning_rate": 0.0002765614035087719, + "loss": 0.1946, + "step": 2171 + }, + { + "epoch": 32.42, + "learning_rate": 0.00027652631578947365, + "loss": 0.2989, + "step": 2172 + }, + { + "epoch": 32.43, + "learning_rate": 0.0002764912280701754, + "loss": 0.0619, + "step": 2173 + }, + { + "epoch": 32.45, + "learning_rate": 0.00027645614035087714, + "loss": 0.2763, + "step": 2174 + }, + { + "epoch": 32.46, + "learning_rate": 0.0002764210526315789, + "loss": 0.416, + "step": 2175 + }, + { + "epoch": 32.48, + "learning_rate": 0.0002763859649122807, + "loss": 0.2922, + "step": 2176 + }, + { + "epoch": 32.49, + "learning_rate": 0.00027635087719298244, + "loss": 0.2956, + "step": 2177 + }, + { + "epoch": 32.51, + "learning_rate": 0.0002763157894736842, + "loss": 0.369, + "step": 2178 + }, + { + "epoch": 32.52, + "learning_rate": 0.00027628070175438594, + "loss": 0.4236, + "step": 2179 + }, + { + "epoch": 32.54, + "learning_rate": 0.0002762456140350877, + "loss": 0.4884, + "step": 2180 + }, + { + "epoch": 32.55, + "learning_rate": 0.00027621052631578944, + "loss": 0.2356, + "step": 2181 + }, + { + "epoch": 32.57, + "learning_rate": 0.0002761754385964912, + "loss": 0.4257, + "step": 2182 + }, + { + "epoch": 32.58, + "learning_rate": 0.000276140350877193, + "loss": 0.3091, + "step": 2183 + }, + { + "epoch": 32.59, + "learning_rate": 0.00027610526315789474, + "loss": 0.2393, + "step": 2184 + }, + { + "epoch": 32.61, + "learning_rate": 0.0002760701754385965, + "loss": 0.2166, + "step": 2185 + }, + { + "epoch": 32.62, + "learning_rate": 0.0002760350877192982, + "loss": 0.0775, + "step": 2186 + }, + { + "epoch": 32.64, + "learning_rate": 0.000276, + "loss": 0.2225, + "step": 2187 + }, + { + "epoch": 32.65, + "learning_rate": 0.00027596491228070174, + "loss": 0.2278, + "step": 2188 + }, + { + "epoch": 32.67, + "learning_rate": 0.0002759298245614035, + "loss": 0.2411, + "step": 2189 + }, + { + "epoch": 32.68, + "learning_rate": 0.00027589473684210523, + "loss": 0.9149, + "step": 2190 + }, + { + "epoch": 32.7, + "learning_rate": 0.000275859649122807, + "loss": 0.186, + "step": 2191 + }, + { + "epoch": 32.71, + "learning_rate": 0.00027582456140350873, + "loss": 0.3165, + "step": 2192 + }, + { + "epoch": 32.73, + "learning_rate": 0.0002757894736842105, + "loss": 0.4464, + "step": 2193 + }, + { + "epoch": 32.74, + "learning_rate": 0.0002757543859649123, + "loss": 0.3044, + "step": 2194 + }, + { + "epoch": 32.76, + "learning_rate": 0.00027571929824561403, + "loss": 0.4148, + "step": 2195 + }, + { + "epoch": 32.77, + "learning_rate": 0.0002756842105263158, + "loss": 0.4251, + "step": 2196 + }, + { + "epoch": 32.79, + "learning_rate": 0.00027564912280701753, + "loss": 0.3422, + "step": 2197 + }, + { + "epoch": 32.8, + "learning_rate": 0.0002756140350877193, + "loss": 0.0342, + "step": 2198 + }, + { + "epoch": 32.82, + "learning_rate": 0.000275578947368421, + "loss": 0.2479, + "step": 2199 + }, + { + "epoch": 32.83, + "learning_rate": 0.0002755438596491228, + "loss": 0.2307, + "step": 2200 + }, + { + "epoch": 32.83, + "eval_accuracy": 0.8086147821830642, + "eval_f1": 0.8071662783486067, + "eval_loss": 0.7650861740112305, + "eval_runtime": 343.8365, + "eval_samples_per_second": 11.884, + "eval_steps_per_second": 0.745, + "step": 2200 + }, + { + "epoch": 32.85, + "learning_rate": 0.0002755087719298245, + "loss": 0.5653, + "step": 2201 + }, + { + "epoch": 32.86, + "learning_rate": 0.0002754736842105263, + "loss": 0.2916, + "step": 2202 + }, + { + "epoch": 32.88, + "learning_rate": 0.000275438596491228, + "loss": 0.0993, + "step": 2203 + }, + { + "epoch": 32.89, + "learning_rate": 0.00027540350877192977, + "loss": 0.3158, + "step": 2204 + }, + { + "epoch": 32.91, + "learning_rate": 0.0002753684210526315, + "loss": 0.0403, + "step": 2205 + }, + { + "epoch": 32.92, + "learning_rate": 0.0002753333333333333, + "loss": 0.1977, + "step": 2206 + }, + { + "epoch": 32.94, + "learning_rate": 0.00027529824561403507, + "loss": 0.2377, + "step": 2207 + }, + { + "epoch": 32.95, + "learning_rate": 0.0002752631578947368, + "loss": 0.1325, + "step": 2208 + }, + { + "epoch": 32.97, + "learning_rate": 0.00027522807017543857, + "loss": 0.4147, + "step": 2209 + }, + { + "epoch": 32.98, + "learning_rate": 0.0002751929824561403, + "loss": 0.1223, + "step": 2210 + }, + { + "epoch": 33.0, + "learning_rate": 0.00027515789473684207, + "loss": 0.1362, + "step": 2211 + }, + { + "epoch": 33.01, + "learning_rate": 0.0002751228070175438, + "loss": 0.3086, + "step": 2212 + }, + { + "epoch": 33.03, + "learning_rate": 0.0002750877192982456, + "loss": 0.3467, + "step": 2213 + }, + { + "epoch": 33.04, + "learning_rate": 0.00027505263157894737, + "loss": 0.1881, + "step": 2214 + }, + { + "epoch": 33.06, + "learning_rate": 0.0002750175438596491, + "loss": 0.2578, + "step": 2215 + }, + { + "epoch": 33.07, + "learning_rate": 0.00027498245614035086, + "loss": 0.2942, + "step": 2216 + }, + { + "epoch": 33.09, + "learning_rate": 0.0002749473684210526, + "loss": 0.3025, + "step": 2217 + }, + { + "epoch": 33.1, + "learning_rate": 0.00027491228070175436, + "loss": 0.1853, + "step": 2218 + }, + { + "epoch": 33.12, + "learning_rate": 0.0002748771929824561, + "loss": 0.052, + "step": 2219 + }, + { + "epoch": 33.13, + "learning_rate": 0.00027484210526315786, + "loss": 0.3404, + "step": 2220 + }, + { + "epoch": 33.15, + "learning_rate": 0.0002748070175438596, + "loss": 0.0831, + "step": 2221 + }, + { + "epoch": 33.16, + "learning_rate": 0.00027477192982456136, + "loss": 0.1659, + "step": 2222 + }, + { + "epoch": 33.18, + "learning_rate": 0.0002747368421052631, + "loss": 0.0441, + "step": 2223 + }, + { + "epoch": 33.19, + "learning_rate": 0.0002747017543859649, + "loss": 0.2571, + "step": 2224 + }, + { + "epoch": 33.21, + "learning_rate": 0.00027466666666666666, + "loss": 0.3079, + "step": 2225 + }, + { + "epoch": 33.22, + "learning_rate": 0.0002746315789473684, + "loss": 0.3602, + "step": 2226 + }, + { + "epoch": 33.24, + "learning_rate": 0.00027459649122807015, + "loss": 0.1547, + "step": 2227 + }, + { + "epoch": 33.25, + "learning_rate": 0.0002745614035087719, + "loss": 0.3108, + "step": 2228 + }, + { + "epoch": 33.27, + "learning_rate": 0.00027452631578947365, + "loss": 0.6134, + "step": 2229 + }, + { + "epoch": 33.28, + "learning_rate": 0.0002744912280701754, + "loss": 0.3319, + "step": 2230 + }, + { + "epoch": 33.3, + "learning_rate": 0.0002744561403508772, + "loss": 0.5913, + "step": 2231 + }, + { + "epoch": 33.31, + "learning_rate": 0.00027442105263157895, + "loss": 0.4172, + "step": 2232 + }, + { + "epoch": 33.33, + "learning_rate": 0.0002743859649122807, + "loss": 0.5581, + "step": 2233 + }, + { + "epoch": 33.34, + "learning_rate": 0.0002743508771929824, + "loss": 0.1744, + "step": 2234 + }, + { + "epoch": 33.36, + "learning_rate": 0.0002743157894736842, + "loss": 0.3023, + "step": 2235 + }, + { + "epoch": 33.37, + "learning_rate": 0.00027428070175438595, + "loss": 0.9059, + "step": 2236 + }, + { + "epoch": 33.39, + "learning_rate": 0.0002742456140350877, + "loss": 0.1562, + "step": 2237 + }, + { + "epoch": 33.4, + "learning_rate": 0.00027421052631578945, + "loss": 0.2438, + "step": 2238 + }, + { + "epoch": 33.42, + "learning_rate": 0.0002741754385964912, + "loss": 0.5985, + "step": 2239 + }, + { + "epoch": 33.43, + "learning_rate": 0.00027414035087719294, + "loss": 0.021, + "step": 2240 + }, + { + "epoch": 33.45, + "learning_rate": 0.0002741052631578947, + "loss": 0.4208, + "step": 2241 + }, + { + "epoch": 33.46, + "learning_rate": 0.00027407017543859644, + "loss": 0.2522, + "step": 2242 + }, + { + "epoch": 33.48, + "learning_rate": 0.00027403508771929824, + "loss": 0.2324, + "step": 2243 + }, + { + "epoch": 33.49, + "learning_rate": 0.000274, + "loss": 0.235, + "step": 2244 + }, + { + "epoch": 33.51, + "learning_rate": 0.00027396491228070174, + "loss": 0.3669, + "step": 2245 + }, + { + "epoch": 33.52, + "learning_rate": 0.0002739298245614035, + "loss": 0.3819, + "step": 2246 + }, + { + "epoch": 33.54, + "learning_rate": 0.00027389473684210524, + "loss": 0.1008, + "step": 2247 + }, + { + "epoch": 33.55, + "learning_rate": 0.000273859649122807, + "loss": 0.4268, + "step": 2248 + }, + { + "epoch": 33.57, + "learning_rate": 0.00027382456140350874, + "loss": 0.0733, + "step": 2249 + }, + { + "epoch": 33.58, + "learning_rate": 0.00027378947368421054, + "loss": 0.2913, + "step": 2250 + }, + { + "epoch": 33.59, + "learning_rate": 0.00027375438596491223, + "loss": 0.2841, + "step": 2251 + }, + { + "epoch": 33.61, + "learning_rate": 0.000273719298245614, + "loss": 0.0646, + "step": 2252 + }, + { + "epoch": 33.62, + "learning_rate": 0.00027368421052631573, + "loss": 0.4224, + "step": 2253 + }, + { + "epoch": 33.64, + "learning_rate": 0.00027364912280701753, + "loss": 0.1624, + "step": 2254 + }, + { + "epoch": 33.65, + "learning_rate": 0.0002736140350877193, + "loss": 0.1866, + "step": 2255 + }, + { + "epoch": 33.67, + "learning_rate": 0.00027357894736842103, + "loss": 0.0219, + "step": 2256 + }, + { + "epoch": 33.68, + "learning_rate": 0.0002735438596491228, + "loss": 0.123, + "step": 2257 + }, + { + "epoch": 33.7, + "learning_rate": 0.00027350877192982453, + "loss": 0.4814, + "step": 2258 + }, + { + "epoch": 33.71, + "learning_rate": 0.0002734736842105263, + "loss": 0.1772, + "step": 2259 + }, + { + "epoch": 33.73, + "learning_rate": 0.000273438596491228, + "loss": 0.2027, + "step": 2260 + }, + { + "epoch": 33.74, + "learning_rate": 0.00027340350877192983, + "loss": 0.2114, + "step": 2261 + }, + { + "epoch": 33.76, + "learning_rate": 0.0002733684210526316, + "loss": 0.4351, + "step": 2262 + }, + { + "epoch": 33.77, + "learning_rate": 0.00027333333333333333, + "loss": 0.4057, + "step": 2263 + }, + { + "epoch": 33.79, + "learning_rate": 0.0002732982456140351, + "loss": 0.2215, + "step": 2264 + }, + { + "epoch": 33.8, + "learning_rate": 0.0002732631578947368, + "loss": 0.1509, + "step": 2265 + }, + { + "epoch": 33.82, + "learning_rate": 0.0002732280701754386, + "loss": 0.4424, + "step": 2266 + }, + { + "epoch": 33.83, + "learning_rate": 0.0002731929824561403, + "loss": 0.2958, + "step": 2267 + }, + { + "epoch": 33.85, + "learning_rate": 0.00027315789473684207, + "loss": 0.0407, + "step": 2268 + }, + { + "epoch": 33.86, + "learning_rate": 0.0002731228070175438, + "loss": 0.2227, + "step": 2269 + }, + { + "epoch": 33.88, + "learning_rate": 0.00027308771929824557, + "loss": 0.6847, + "step": 2270 + }, + { + "epoch": 33.89, + "learning_rate": 0.0002730526315789473, + "loss": 0.1189, + "step": 2271 + }, + { + "epoch": 33.91, + "learning_rate": 0.0002730175438596491, + "loss": 0.4449, + "step": 2272 + }, + { + "epoch": 33.92, + "learning_rate": 0.00027298245614035087, + "loss": 0.1553, + "step": 2273 + }, + { + "epoch": 33.94, + "learning_rate": 0.0002729473684210526, + "loss": 0.0367, + "step": 2274 + }, + { + "epoch": 33.95, + "learning_rate": 0.00027291228070175437, + "loss": 0.0587, + "step": 2275 + }, + { + "epoch": 33.97, + "learning_rate": 0.0002728771929824561, + "loss": 0.4163, + "step": 2276 + }, + { + "epoch": 33.98, + "learning_rate": 0.00027284210526315786, + "loss": 0.3024, + "step": 2277 + }, + { + "epoch": 34.0, + "learning_rate": 0.0002728070175438596, + "loss": 0.6945, + "step": 2278 + }, + { + "epoch": 34.01, + "learning_rate": 0.00027277192982456136, + "loss": 0.4193, + "step": 2279 + }, + { + "epoch": 34.03, + "learning_rate": 0.00027273684210526317, + "loss": 0.2735, + "step": 2280 + }, + { + "epoch": 34.04, + "learning_rate": 0.0002727017543859649, + "loss": 0.1118, + "step": 2281 + }, + { + "epoch": 34.06, + "learning_rate": 0.0002726666666666666, + "loss": 0.6835, + "step": 2282 + }, + { + "epoch": 34.07, + "learning_rate": 0.0002726315789473684, + "loss": 0.3097, + "step": 2283 + }, + { + "epoch": 34.09, + "learning_rate": 0.00027259649122807016, + "loss": 0.2908, + "step": 2284 + }, + { + "epoch": 34.1, + "learning_rate": 0.0002725614035087719, + "loss": 0.168, + "step": 2285 + }, + { + "epoch": 34.12, + "learning_rate": 0.00027252631578947366, + "loss": 0.3362, + "step": 2286 + }, + { + "epoch": 34.13, + "learning_rate": 0.0002724912280701754, + "loss": 0.2603, + "step": 2287 + }, + { + "epoch": 34.15, + "learning_rate": 0.00027245614035087716, + "loss": 0.0215, + "step": 2288 + }, + { + "epoch": 34.16, + "learning_rate": 0.0002724210526315789, + "loss": 0.0313, + "step": 2289 + }, + { + "epoch": 34.18, + "learning_rate": 0.00027238596491228065, + "loss": 0.0771, + "step": 2290 + }, + { + "epoch": 34.19, + "learning_rate": 0.00027235087719298246, + "loss": 0.067, + "step": 2291 + }, + { + "epoch": 34.21, + "learning_rate": 0.0002723157894736842, + "loss": 0.5942, + "step": 2292 + }, + { + "epoch": 34.22, + "learning_rate": 0.00027228070175438595, + "loss": 0.0307, + "step": 2293 + }, + { + "epoch": 34.24, + "learning_rate": 0.0002722456140350877, + "loss": 0.226, + "step": 2294 + }, + { + "epoch": 34.25, + "learning_rate": 0.00027221052631578945, + "loss": 0.0363, + "step": 2295 + }, + { + "epoch": 34.27, + "learning_rate": 0.0002721754385964912, + "loss": 0.1999, + "step": 2296 + }, + { + "epoch": 34.28, + "learning_rate": 0.00027214035087719295, + "loss": 0.1683, + "step": 2297 + }, + { + "epoch": 34.3, + "learning_rate": 0.00027210526315789475, + "loss": 0.0772, + "step": 2298 + }, + { + "epoch": 34.31, + "learning_rate": 0.00027207017543859645, + "loss": 0.1123, + "step": 2299 + }, + { + "epoch": 34.33, + "learning_rate": 0.0002720350877192982, + "loss": 0.4395, + "step": 2300 + }, + { + "epoch": 34.34, + "learning_rate": 0.00027199999999999994, + "loss": 0.4303, + "step": 2301 + }, + { + "epoch": 34.36, + "learning_rate": 0.00027196491228070175, + "loss": 0.3241, + "step": 2302 + }, + { + "epoch": 34.37, + "learning_rate": 0.0002719298245614035, + "loss": 0.1974, + "step": 2303 + }, + { + "epoch": 34.39, + "learning_rate": 0.00027189473684210524, + "loss": 0.3825, + "step": 2304 + }, + { + "epoch": 34.4, + "learning_rate": 0.000271859649122807, + "loss": 0.1469, + "step": 2305 + }, + { + "epoch": 34.42, + "learning_rate": 0.00027182456140350874, + "loss": 0.2059, + "step": 2306 + }, + { + "epoch": 34.43, + "learning_rate": 0.0002717894736842105, + "loss": 0.026, + "step": 2307 + }, + { + "epoch": 34.45, + "learning_rate": 0.00027175438596491224, + "loss": 0.2546, + "step": 2308 + }, + { + "epoch": 34.46, + "learning_rate": 0.00027171929824561404, + "loss": 0.1752, + "step": 2309 + }, + { + "epoch": 34.48, + "learning_rate": 0.0002716842105263158, + "loss": 0.1128, + "step": 2310 + }, + { + "epoch": 34.49, + "learning_rate": 0.00027164912280701754, + "loss": 0.2363, + "step": 2311 + }, + { + "epoch": 34.51, + "learning_rate": 0.0002716140350877193, + "loss": 0.2964, + "step": 2312 + }, + { + "epoch": 34.52, + "learning_rate": 0.00027157894736842104, + "loss": 0.6012, + "step": 2313 + }, + { + "epoch": 34.54, + "learning_rate": 0.0002715438596491228, + "loss": 0.4373, + "step": 2314 + }, + { + "epoch": 34.55, + "learning_rate": 0.00027150877192982453, + "loss": 0.3563, + "step": 2315 + }, + { + "epoch": 34.57, + "learning_rate": 0.0002714736842105263, + "loss": 0.264, + "step": 2316 + }, + { + "epoch": 34.58, + "learning_rate": 0.00027143859649122803, + "loss": 0.0819, + "step": 2317 + }, + { + "epoch": 34.59, + "learning_rate": 0.0002714035087719298, + "loss": 0.2622, + "step": 2318 + }, + { + "epoch": 34.61, + "learning_rate": 0.00027136842105263153, + "loss": 0.2915, + "step": 2319 + }, + { + "epoch": 34.62, + "learning_rate": 0.00027133333333333333, + "loss": 0.2508, + "step": 2320 + }, + { + "epoch": 34.64, + "learning_rate": 0.0002712982456140351, + "loss": 0.2723, + "step": 2321 + }, + { + "epoch": 34.65, + "learning_rate": 0.00027126315789473683, + "loss": 0.3011, + "step": 2322 + }, + { + "epoch": 34.67, + "learning_rate": 0.0002712280701754386, + "loss": 0.3078, + "step": 2323 + }, + { + "epoch": 34.68, + "learning_rate": 0.00027119298245614033, + "loss": 0.0938, + "step": 2324 + }, + { + "epoch": 34.7, + "learning_rate": 0.0002711578947368421, + "loss": 0.0643, + "step": 2325 + }, + { + "epoch": 34.71, + "learning_rate": 0.0002711228070175438, + "loss": 0.1048, + "step": 2326 + }, + { + "epoch": 34.73, + "learning_rate": 0.0002710877192982456, + "loss": 0.0173, + "step": 2327 + }, + { + "epoch": 34.74, + "learning_rate": 0.0002710526315789474, + "loss": 0.1349, + "step": 2328 + }, + { + "epoch": 34.76, + "learning_rate": 0.0002710175438596491, + "loss": 0.3766, + "step": 2329 + }, + { + "epoch": 34.77, + "learning_rate": 0.0002709824561403508, + "loss": 0.2583, + "step": 2330 + }, + { + "epoch": 34.79, + "learning_rate": 0.0002709473684210526, + "loss": 0.1862, + "step": 2331 + }, + { + "epoch": 34.8, + "learning_rate": 0.00027091228070175437, + "loss": 0.253, + "step": 2332 + }, + { + "epoch": 34.82, + "learning_rate": 0.0002708771929824561, + "loss": 0.7504, + "step": 2333 + }, + { + "epoch": 34.83, + "learning_rate": 0.00027084210526315787, + "loss": 0.6195, + "step": 2334 + }, + { + "epoch": 34.85, + "learning_rate": 0.0002708070175438596, + "loss": 0.163, + "step": 2335 + }, + { + "epoch": 34.86, + "learning_rate": 0.00027077192982456137, + "loss": 0.0618, + "step": 2336 + }, + { + "epoch": 34.88, + "learning_rate": 0.0002707368421052631, + "loss": 0.3675, + "step": 2337 + }, + { + "epoch": 34.89, + "learning_rate": 0.00027070175438596487, + "loss": 0.2927, + "step": 2338 + }, + { + "epoch": 34.91, + "learning_rate": 0.00027066666666666667, + "loss": 0.7439, + "step": 2339 + }, + { + "epoch": 34.92, + "learning_rate": 0.0002706315789473684, + "loss": 0.2834, + "step": 2340 + }, + { + "epoch": 34.94, + "learning_rate": 0.00027059649122807017, + "loss": 0.6386, + "step": 2341 + }, + { + "epoch": 34.95, + "learning_rate": 0.0002705614035087719, + "loss": 0.2333, + "step": 2342 + }, + { + "epoch": 34.97, + "learning_rate": 0.00027052631578947366, + "loss": 0.5232, + "step": 2343 + }, + { + "epoch": 34.98, + "learning_rate": 0.0002704912280701754, + "loss": 0.2471, + "step": 2344 + }, + { + "epoch": 35.0, + "learning_rate": 0.00027045614035087716, + "loss": 1.1787, + "step": 2345 + }, + { + "epoch": 35.01, + "learning_rate": 0.00027042105263157896, + "loss": 0.2669, + "step": 2346 + }, + { + "epoch": 35.03, + "learning_rate": 0.00027038596491228066, + "loss": 0.7957, + "step": 2347 + }, + { + "epoch": 35.04, + "learning_rate": 0.0002703508771929824, + "loss": 0.4517, + "step": 2348 + }, + { + "epoch": 35.06, + "learning_rate": 0.00027031578947368416, + "loss": 0.299, + "step": 2349 + }, + { + "epoch": 35.07, + "learning_rate": 0.00027028070175438596, + "loss": 0.2013, + "step": 2350 + }, + { + "epoch": 35.09, + "learning_rate": 0.0002702456140350877, + "loss": 0.2649, + "step": 2351 + }, + { + "epoch": 35.1, + "learning_rate": 0.00027021052631578946, + "loss": 0.1592, + "step": 2352 + }, + { + "epoch": 35.12, + "learning_rate": 0.0002701754385964912, + "loss": 0.2795, + "step": 2353 + }, + { + "epoch": 35.13, + "learning_rate": 0.00027014035087719295, + "loss": 0.3779, + "step": 2354 + }, + { + "epoch": 35.15, + "learning_rate": 0.0002701052631578947, + "loss": 0.2906, + "step": 2355 + }, + { + "epoch": 35.16, + "learning_rate": 0.00027007017543859645, + "loss": 0.2444, + "step": 2356 + }, + { + "epoch": 35.18, + "learning_rate": 0.00027003508771929825, + "loss": 0.3235, + "step": 2357 + }, + { + "epoch": 35.19, + "learning_rate": 0.00027, + "loss": 0.248, + "step": 2358 + }, + { + "epoch": 35.21, + "learning_rate": 0.00026996491228070175, + "loss": 0.2753, + "step": 2359 + }, + { + "epoch": 35.22, + "learning_rate": 0.00026992982456140345, + "loss": 0.2824, + "step": 2360 + }, + { + "epoch": 35.24, + "learning_rate": 0.00026989473684210525, + "loss": 0.0507, + "step": 2361 + }, + { + "epoch": 35.25, + "learning_rate": 0.000269859649122807, + "loss": 0.1854, + "step": 2362 + }, + { + "epoch": 35.27, + "learning_rate": 0.00026982456140350875, + "loss": 0.2716, + "step": 2363 + }, + { + "epoch": 35.28, + "learning_rate": 0.0002697894736842105, + "loss": 0.8457, + "step": 2364 + }, + { + "epoch": 35.3, + "learning_rate": 0.00026975438596491224, + "loss": 0.3006, + "step": 2365 + }, + { + "epoch": 35.31, + "learning_rate": 0.000269719298245614, + "loss": 0.1396, + "step": 2366 + }, + { + "epoch": 35.33, + "learning_rate": 0.00026968421052631574, + "loss": 0.3765, + "step": 2367 + }, + { + "epoch": 35.34, + "learning_rate": 0.0002696491228070175, + "loss": 0.1906, + "step": 2368 + }, + { + "epoch": 35.36, + "learning_rate": 0.0002696140350877193, + "loss": 0.5598, + "step": 2369 + }, + { + "epoch": 35.37, + "learning_rate": 0.00026957894736842104, + "loss": 0.3803, + "step": 2370 + }, + { + "epoch": 35.39, + "learning_rate": 0.0002695438596491228, + "loss": 0.2419, + "step": 2371 + }, + { + "epoch": 35.4, + "learning_rate": 0.00026950877192982454, + "loss": 0.0922, + "step": 2372 + }, + { + "epoch": 35.42, + "learning_rate": 0.0002694736842105263, + "loss": 0.261, + "step": 2373 + }, + { + "epoch": 35.43, + "learning_rate": 0.00026943859649122804, + "loss": 0.1247, + "step": 2374 + }, + { + "epoch": 35.45, + "learning_rate": 0.0002694035087719298, + "loss": 0.1853, + "step": 2375 + }, + { + "epoch": 35.46, + "learning_rate": 0.0002693684210526316, + "loss": 0.5109, + "step": 2376 + }, + { + "epoch": 35.48, + "learning_rate": 0.00026933333333333334, + "loss": 0.6572, + "step": 2377 + }, + { + "epoch": 35.49, + "learning_rate": 0.00026929824561403503, + "loss": 0.1821, + "step": 2378 + }, + { + "epoch": 35.51, + "learning_rate": 0.0002692631578947368, + "loss": 0.1235, + "step": 2379 + }, + { + "epoch": 35.52, + "learning_rate": 0.0002692280701754386, + "loss": 0.1746, + "step": 2380 + }, + { + "epoch": 35.54, + "learning_rate": 0.00026919298245614033, + "loss": 0.3027, + "step": 2381 + }, + { + "epoch": 35.55, + "learning_rate": 0.0002691578947368421, + "loss": 0.1361, + "step": 2382 + }, + { + "epoch": 35.57, + "learning_rate": 0.00026912280701754383, + "loss": 0.2752, + "step": 2383 + }, + { + "epoch": 35.58, + "learning_rate": 0.0002690877192982456, + "loss": 0.1813, + "step": 2384 + }, + { + "epoch": 35.59, + "learning_rate": 0.00026905263157894733, + "loss": 0.2419, + "step": 2385 + }, + { + "epoch": 35.61, + "learning_rate": 0.0002690175438596491, + "loss": 0.542, + "step": 2386 + }, + { + "epoch": 35.62, + "learning_rate": 0.0002689824561403509, + "loss": 0.2718, + "step": 2387 + }, + { + "epoch": 35.64, + "learning_rate": 0.00026894736842105263, + "loss": 0.3817, + "step": 2388 + }, + { + "epoch": 35.65, + "learning_rate": 0.0002689122807017544, + "loss": 0.0568, + "step": 2389 + }, + { + "epoch": 35.67, + "learning_rate": 0.0002688771929824561, + "loss": 0.212, + "step": 2390 + }, + { + "epoch": 35.68, + "learning_rate": 0.0002688421052631579, + "loss": 0.0527, + "step": 2391 + }, + { + "epoch": 35.7, + "learning_rate": 0.0002688070175438596, + "loss": 0.2184, + "step": 2392 + }, + { + "epoch": 35.71, + "learning_rate": 0.0002687719298245614, + "loss": 0.0476, + "step": 2393 + }, + { + "epoch": 35.73, + "learning_rate": 0.0002687368421052632, + "loss": 0.138, + "step": 2394 + }, + { + "epoch": 35.74, + "learning_rate": 0.00026870175438596487, + "loss": 0.2552, + "step": 2395 + }, + { + "epoch": 35.76, + "learning_rate": 0.0002686666666666666, + "loss": 0.5772, + "step": 2396 + }, + { + "epoch": 35.77, + "learning_rate": 0.00026863157894736837, + "loss": 0.3532, + "step": 2397 + }, + { + "epoch": 35.79, + "learning_rate": 0.00026859649122807017, + "loss": 0.0265, + "step": 2398 + }, + { + "epoch": 35.8, + "learning_rate": 0.0002685614035087719, + "loss": 0.1063, + "step": 2399 + }, + { + "epoch": 35.82, + "learning_rate": 0.00026852631578947367, + "loss": 0.1412, + "step": 2400 + }, + { + "epoch": 35.82, + "eval_accuracy": 0.8311306901615272, + "eval_f1": 0.8352003849989705, + "eval_loss": 0.7131851315498352, + "eval_runtime": 344.3646, + "eval_samples_per_second": 11.865, + "eval_steps_per_second": 0.743, + "step": 2400 + }, + { + "epoch": 35.83, + "learning_rate": 0.0002684912280701754, + "loss": 0.3079, + "step": 2401 + }, + { + "epoch": 35.85, + "learning_rate": 0.00026845614035087717, + "loss": 0.4798, + "step": 2402 + }, + { + "epoch": 35.86, + "learning_rate": 0.0002684210526315789, + "loss": 0.058, + "step": 2403 + }, + { + "epoch": 35.88, + "learning_rate": 0.00026838596491228066, + "loss": 0.0388, + "step": 2404 + }, + { + "epoch": 35.89, + "learning_rate": 0.0002683508771929824, + "loss": 0.0393, + "step": 2405 + }, + { + "epoch": 35.91, + "learning_rate": 0.0002683157894736842, + "loss": 0.0151, + "step": 2406 + }, + { + "epoch": 35.92, + "learning_rate": 0.00026828070175438596, + "loss": 0.2027, + "step": 2407 + }, + { + "epoch": 35.94, + "learning_rate": 0.00026824561403508766, + "loss": 0.1488, + "step": 2408 + }, + { + "epoch": 35.95, + "learning_rate": 0.00026821052631578946, + "loss": 0.0274, + "step": 2409 + }, + { + "epoch": 35.97, + "learning_rate": 0.0002681754385964912, + "loss": 0.5102, + "step": 2410 + }, + { + "epoch": 35.98, + "learning_rate": 0.00026814035087719296, + "loss": 0.0618, + "step": 2411 + }, + { + "epoch": 36.0, + "learning_rate": 0.0002681052631578947, + "loss": 0.0711, + "step": 2412 + }, + { + "epoch": 36.01, + "learning_rate": 0.00026807017543859646, + "loss": 0.333, + "step": 2413 + }, + { + "epoch": 36.03, + "learning_rate": 0.0002680350877192982, + "loss": 0.2717, + "step": 2414 + }, + { + "epoch": 36.04, + "learning_rate": 0.00026799999999999995, + "loss": 0.027, + "step": 2415 + }, + { + "epoch": 36.06, + "learning_rate": 0.0002679649122807017, + "loss": 0.2816, + "step": 2416 + }, + { + "epoch": 36.07, + "learning_rate": 0.0002679298245614035, + "loss": 0.2222, + "step": 2417 + }, + { + "epoch": 36.09, + "learning_rate": 0.00026789473684210526, + "loss": 0.2245, + "step": 2418 + }, + { + "epoch": 36.1, + "learning_rate": 0.000267859649122807, + "loss": 0.3236, + "step": 2419 + }, + { + "epoch": 36.12, + "learning_rate": 0.00026782456140350875, + "loss": 0.0227, + "step": 2420 + }, + { + "epoch": 36.13, + "learning_rate": 0.0002677894736842105, + "loss": 0.1237, + "step": 2421 + }, + { + "epoch": 36.15, + "learning_rate": 0.00026775438596491225, + "loss": 0.2968, + "step": 2422 + }, + { + "epoch": 36.16, + "learning_rate": 0.000267719298245614, + "loss": 0.4212, + "step": 2423 + }, + { + "epoch": 36.18, + "learning_rate": 0.0002676842105263158, + "loss": 0.5356, + "step": 2424 + }, + { + "epoch": 36.19, + "learning_rate": 0.00026764912280701755, + "loss": 0.1707, + "step": 2425 + }, + { + "epoch": 36.21, + "learning_rate": 0.00026761403508771925, + "loss": 0.0538, + "step": 2426 + }, + { + "epoch": 36.22, + "learning_rate": 0.000267578947368421, + "loss": 0.5187, + "step": 2427 + }, + { + "epoch": 36.24, + "learning_rate": 0.0002675438596491228, + "loss": 0.1425, + "step": 2428 + }, + { + "epoch": 36.25, + "learning_rate": 0.00026750877192982455, + "loss": 0.2559, + "step": 2429 + }, + { + "epoch": 36.27, + "learning_rate": 0.0002674736842105263, + "loss": 0.2514, + "step": 2430 + }, + { + "epoch": 36.28, + "learning_rate": 0.00026743859649122804, + "loss": 0.3183, + "step": 2431 + }, + { + "epoch": 36.3, + "learning_rate": 0.0002674035087719298, + "loss": 0.4393, + "step": 2432 + }, + { + "epoch": 36.31, + "learning_rate": 0.00026736842105263154, + "loss": 0.2093, + "step": 2433 + }, + { + "epoch": 36.33, + "learning_rate": 0.0002673333333333333, + "loss": 0.3006, + "step": 2434 + }, + { + "epoch": 36.34, + "learning_rate": 0.0002672982456140351, + "loss": 0.1366, + "step": 2435 + }, + { + "epoch": 36.36, + "learning_rate": 0.00026726315789473684, + "loss": 0.1511, + "step": 2436 + }, + { + "epoch": 36.37, + "learning_rate": 0.0002672280701754386, + "loss": 0.3088, + "step": 2437 + }, + { + "epoch": 36.39, + "learning_rate": 0.00026719298245614034, + "loss": 0.2611, + "step": 2438 + }, + { + "epoch": 36.4, + "learning_rate": 0.0002671578947368421, + "loss": 0.0553, + "step": 2439 + }, + { + "epoch": 36.42, + "learning_rate": 0.00026712280701754384, + "loss": 0.0143, + "step": 2440 + }, + { + "epoch": 36.43, + "learning_rate": 0.0002670877192982456, + "loss": 0.2214, + "step": 2441 + }, + { + "epoch": 36.45, + "learning_rate": 0.00026705263157894733, + "loss": 0.3594, + "step": 2442 + }, + { + "epoch": 36.46, + "learning_rate": 0.0002670175438596491, + "loss": 0.0609, + "step": 2443 + }, + { + "epoch": 36.48, + "learning_rate": 0.00026698245614035083, + "loss": 0.1486, + "step": 2444 + }, + { + "epoch": 36.49, + "learning_rate": 0.0002669473684210526, + "loss": 0.033, + "step": 2445 + }, + { + "epoch": 36.51, + "learning_rate": 0.0002669122807017544, + "loss": 0.8025, + "step": 2446 + }, + { + "epoch": 36.52, + "learning_rate": 0.00026687719298245613, + "loss": 0.3945, + "step": 2447 + }, + { + "epoch": 36.54, + "learning_rate": 0.0002668421052631579, + "loss": 0.3908, + "step": 2448 + }, + { + "epoch": 36.55, + "learning_rate": 0.00026680701754385963, + "loss": 0.1322, + "step": 2449 + }, + { + "epoch": 36.57, + "learning_rate": 0.0002667719298245614, + "loss": 0.3136, + "step": 2450 + }, + { + "epoch": 36.58, + "learning_rate": 0.00026673684210526313, + "loss": 0.1262, + "step": 2451 + }, + { + "epoch": 36.59, + "learning_rate": 0.0002667017543859649, + "loss": 0.4639, + "step": 2452 + }, + { + "epoch": 36.61, + "learning_rate": 0.0002666666666666666, + "loss": 0.1867, + "step": 2453 + }, + { + "epoch": 36.62, + "learning_rate": 0.00026663157894736843, + "loss": 0.0463, + "step": 2454 + }, + { + "epoch": 36.64, + "learning_rate": 0.0002665964912280702, + "loss": 0.2519, + "step": 2455 + }, + { + "epoch": 36.65, + "learning_rate": 0.00026656140350877187, + "loss": 0.1951, + "step": 2456 + }, + { + "epoch": 36.67, + "learning_rate": 0.0002665263157894737, + "loss": 0.0287, + "step": 2457 + }, + { + "epoch": 36.68, + "learning_rate": 0.0002664912280701754, + "loss": 0.7845, + "step": 2458 + }, + { + "epoch": 36.7, + "learning_rate": 0.00026645614035087717, + "loss": 0.1309, + "step": 2459 + }, + { + "epoch": 36.71, + "learning_rate": 0.0002664210526315789, + "loss": 0.1365, + "step": 2460 + }, + { + "epoch": 36.73, + "learning_rate": 0.00026638596491228067, + "loss": 0.0312, + "step": 2461 + }, + { + "epoch": 36.74, + "learning_rate": 0.0002663508771929824, + "loss": 0.1098, + "step": 2462 + }, + { + "epoch": 36.76, + "learning_rate": 0.00026631578947368417, + "loss": 0.4963, + "step": 2463 + }, + { + "epoch": 36.77, + "learning_rate": 0.0002662807017543859, + "loss": 0.1178, + "step": 2464 + }, + { + "epoch": 36.79, + "learning_rate": 0.0002662456140350877, + "loss": 0.1513, + "step": 2465 + }, + { + "epoch": 36.8, + "learning_rate": 0.00026621052631578947, + "loss": 0.2865, + "step": 2466 + }, + { + "epoch": 36.82, + "learning_rate": 0.0002661754385964912, + "loss": 0.3721, + "step": 2467 + }, + { + "epoch": 36.83, + "learning_rate": 0.00026614035087719297, + "loss": 0.1291, + "step": 2468 + }, + { + "epoch": 36.85, + "learning_rate": 0.0002661052631578947, + "loss": 0.2043, + "step": 2469 + }, + { + "epoch": 36.86, + "learning_rate": 0.00026607017543859646, + "loss": 0.1493, + "step": 2470 + }, + { + "epoch": 36.88, + "learning_rate": 0.0002660350877192982, + "loss": 0.311, + "step": 2471 + }, + { + "epoch": 36.89, + "learning_rate": 0.000266, + "loss": 0.3776, + "step": 2472 + }, + { + "epoch": 36.91, + "learning_rate": 0.0002659649122807017, + "loss": 0.0136, + "step": 2473 + }, + { + "epoch": 36.92, + "learning_rate": 0.00026592982456140346, + "loss": 0.3089, + "step": 2474 + }, + { + "epoch": 36.94, + "learning_rate": 0.0002658947368421052, + "loss": 0.1018, + "step": 2475 + }, + { + "epoch": 36.95, + "learning_rate": 0.000265859649122807, + "loss": 0.2423, + "step": 2476 + }, + { + "epoch": 36.97, + "learning_rate": 0.00026582456140350876, + "loss": 0.3071, + "step": 2477 + }, + { + "epoch": 36.98, + "learning_rate": 0.0002657894736842105, + "loss": 0.2843, + "step": 2478 + }, + { + "epoch": 37.0, + "learning_rate": 0.00026575438596491226, + "loss": 0.4304, + "step": 2479 + }, + { + "epoch": 37.01, + "learning_rate": 0.000265719298245614, + "loss": 0.2623, + "step": 2480 + }, + { + "epoch": 37.03, + "learning_rate": 0.00026568421052631575, + "loss": 0.5204, + "step": 2481 + }, + { + "epoch": 37.04, + "learning_rate": 0.0002656491228070175, + "loss": 0.181, + "step": 2482 + }, + { + "epoch": 37.06, + "learning_rate": 0.0002656140350877193, + "loss": 0.2256, + "step": 2483 + }, + { + "epoch": 37.07, + "learning_rate": 0.00026557894736842105, + "loss": 0.6583, + "step": 2484 + }, + { + "epoch": 37.09, + "learning_rate": 0.0002655438596491228, + "loss": 0.3567, + "step": 2485 + }, + { + "epoch": 37.1, + "learning_rate": 0.00026550877192982455, + "loss": 0.2477, + "step": 2486 + }, + { + "epoch": 37.12, + "learning_rate": 0.0002654736842105263, + "loss": 0.4258, + "step": 2487 + }, + { + "epoch": 37.13, + "learning_rate": 0.00026543859649122805, + "loss": 0.0732, + "step": 2488 + }, + { + "epoch": 37.15, + "learning_rate": 0.0002654035087719298, + "loss": 0.0374, + "step": 2489 + }, + { + "epoch": 37.16, + "learning_rate": 0.00026536842105263155, + "loss": 0.1289, + "step": 2490 + }, + { + "epoch": 37.18, + "learning_rate": 0.0002653333333333333, + "loss": 0.0477, + "step": 2491 + }, + { + "epoch": 37.19, + "learning_rate": 0.00026529824561403504, + "loss": 0.1553, + "step": 2492 + }, + { + "epoch": 37.21, + "learning_rate": 0.0002652631578947368, + "loss": 0.1792, + "step": 2493 + }, + { + "epoch": 37.22, + "learning_rate": 0.0002652280701754386, + "loss": 0.0681, + "step": 2494 + }, + { + "epoch": 37.24, + "learning_rate": 0.00026519298245614034, + "loss": 0.3027, + "step": 2495 + }, + { + "epoch": 37.25, + "learning_rate": 0.0002651578947368421, + "loss": 0.2969, + "step": 2496 + }, + { + "epoch": 37.27, + "learning_rate": 0.00026512280701754384, + "loss": 0.4292, + "step": 2497 + }, + { + "epoch": 37.28, + "learning_rate": 0.0002650877192982456, + "loss": 0.285, + "step": 2498 + }, + { + "epoch": 37.3, + "learning_rate": 0.00026505263157894734, + "loss": 1.0064, + "step": 2499 + }, + { + "epoch": 37.31, + "learning_rate": 0.0002650175438596491, + "loss": 0.2859, + "step": 2500 + }, + { + "epoch": 37.33, + "learning_rate": 0.00026498245614035084, + "loss": 0.131, + "step": 2501 + }, + { + "epoch": 37.34, + "learning_rate": 0.00026494736842105264, + "loss": 0.3665, + "step": 2502 + }, + { + "epoch": 37.36, + "learning_rate": 0.0002649122807017544, + "loss": 0.4487, + "step": 2503 + }, + { + "epoch": 37.37, + "learning_rate": 0.0002648771929824561, + "loss": 0.722, + "step": 2504 + }, + { + "epoch": 37.39, + "learning_rate": 0.00026484210526315783, + "loss": 1.3793, + "step": 2505 + }, + { + "epoch": 37.4, + "learning_rate": 0.00026480701754385964, + "loss": 0.2841, + "step": 2506 + }, + { + "epoch": 37.42, + "learning_rate": 0.0002647719298245614, + "loss": 0.6317, + "step": 2507 + }, + { + "epoch": 37.43, + "learning_rate": 0.00026473684210526313, + "loss": 0.4639, + "step": 2508 + }, + { + "epoch": 37.45, + "learning_rate": 0.0002647017543859649, + "loss": 0.2349, + "step": 2509 + }, + { + "epoch": 37.46, + "learning_rate": 0.00026466666666666663, + "loss": 0.252, + "step": 2510 + }, + { + "epoch": 37.48, + "learning_rate": 0.0002646315789473684, + "loss": 0.2653, + "step": 2511 + }, + { + "epoch": 37.49, + "learning_rate": 0.00026459649122807013, + "loss": 0.1756, + "step": 2512 + }, + { + "epoch": 37.51, + "learning_rate": 0.00026456140350877193, + "loss": 0.6025, + "step": 2513 + }, + { + "epoch": 37.52, + "learning_rate": 0.0002645263157894737, + "loss": 0.1538, + "step": 2514 + }, + { + "epoch": 37.54, + "learning_rate": 0.00026449122807017543, + "loss": 0.7887, + "step": 2515 + }, + { + "epoch": 37.55, + "learning_rate": 0.0002644561403508772, + "loss": 0.1094, + "step": 2516 + }, + { + "epoch": 37.57, + "learning_rate": 0.0002644210526315789, + "loss": 0.1136, + "step": 2517 + }, + { + "epoch": 37.58, + "learning_rate": 0.0002643859649122807, + "loss": 0.054, + "step": 2518 + }, + { + "epoch": 37.59, + "learning_rate": 0.0002643508771929824, + "loss": 0.4868, + "step": 2519 + }, + { + "epoch": 37.61, + "learning_rate": 0.0002643157894736842, + "loss": 0.2704, + "step": 2520 + }, + { + "epoch": 37.62, + "learning_rate": 0.0002642807017543859, + "loss": 0.0936, + "step": 2521 + }, + { + "epoch": 37.64, + "learning_rate": 0.00026424561403508767, + "loss": 0.7214, + "step": 2522 + }, + { + "epoch": 37.65, + "learning_rate": 0.0002642105263157894, + "loss": 0.0592, + "step": 2523 + }, + { + "epoch": 37.67, + "learning_rate": 0.0002641754385964912, + "loss": 0.0877, + "step": 2524 + }, + { + "epoch": 37.68, + "learning_rate": 0.00026414035087719297, + "loss": 0.2035, + "step": 2525 + }, + { + "epoch": 37.7, + "learning_rate": 0.0002641052631578947, + "loss": 0.0444, + "step": 2526 + }, + { + "epoch": 37.71, + "learning_rate": 0.00026407017543859647, + "loss": 0.1678, + "step": 2527 + }, + { + "epoch": 37.73, + "learning_rate": 0.0002640350877192982, + "loss": 0.037, + "step": 2528 + }, + { + "epoch": 37.74, + "learning_rate": 0.00026399999999999997, + "loss": 0.0603, + "step": 2529 + }, + { + "epoch": 37.76, + "learning_rate": 0.0002639649122807017, + "loss": 0.2909, + "step": 2530 + }, + { + "epoch": 37.77, + "learning_rate": 0.00026392982456140346, + "loss": 0.1873, + "step": 2531 + }, + { + "epoch": 37.79, + "learning_rate": 0.00026389473684210527, + "loss": 0.0805, + "step": 2532 + }, + { + "epoch": 37.8, + "learning_rate": 0.000263859649122807, + "loss": 0.1528, + "step": 2533 + }, + { + "epoch": 37.82, + "learning_rate": 0.00026382456140350876, + "loss": 0.1128, + "step": 2534 + }, + { + "epoch": 37.83, + "learning_rate": 0.0002637894736842105, + "loss": 0.0441, + "step": 2535 + }, + { + "epoch": 37.85, + "learning_rate": 0.00026375438596491226, + "loss": 0.3716, + "step": 2536 + }, + { + "epoch": 37.86, + "learning_rate": 0.000263719298245614, + "loss": 0.2316, + "step": 2537 + }, + { + "epoch": 37.88, + "learning_rate": 0.00026368421052631576, + "loss": 0.397, + "step": 2538 + }, + { + "epoch": 37.89, + "learning_rate": 0.0002636491228070175, + "loss": 0.0878, + "step": 2539 + }, + { + "epoch": 37.91, + "learning_rate": 0.00026361403508771926, + "loss": 0.3564, + "step": 2540 + }, + { + "epoch": 37.92, + "learning_rate": 0.000263578947368421, + "loss": 0.2722, + "step": 2541 + }, + { + "epoch": 37.94, + "learning_rate": 0.00026354385964912275, + "loss": 0.2298, + "step": 2542 + }, + { + "epoch": 37.95, + "learning_rate": 0.00026350877192982456, + "loss": 0.2963, + "step": 2543 + }, + { + "epoch": 37.97, + "learning_rate": 0.0002634736842105263, + "loss": 0.2939, + "step": 2544 + }, + { + "epoch": 37.98, + "learning_rate": 0.00026343859649122805, + "loss": 0.1811, + "step": 2545 + }, + { + "epoch": 38.0, + "learning_rate": 0.0002634035087719298, + "loss": 0.2587, + "step": 2546 + }, + { + "epoch": 38.01, + "learning_rate": 0.00026336842105263155, + "loss": 0.335, + "step": 2547 + }, + { + "epoch": 38.03, + "learning_rate": 0.0002633333333333333, + "loss": 0.3061, + "step": 2548 + }, + { + "epoch": 38.04, + "learning_rate": 0.00026329824561403505, + "loss": 0.2655, + "step": 2549 + }, + { + "epoch": 38.06, + "learning_rate": 0.00026326315789473685, + "loss": 0.2371, + "step": 2550 + }, + { + "epoch": 38.07, + "learning_rate": 0.0002632280701754386, + "loss": 0.5286, + "step": 2551 + }, + { + "epoch": 38.09, + "learning_rate": 0.0002631929824561403, + "loss": 0.3128, + "step": 2552 + }, + { + "epoch": 38.1, + "learning_rate": 0.00026315789473684205, + "loss": 0.0212, + "step": 2553 + }, + { + "epoch": 38.12, + "learning_rate": 0.00026312280701754385, + "loss": 0.5485, + "step": 2554 + }, + { + "epoch": 38.13, + "learning_rate": 0.0002630877192982456, + "loss": 0.2955, + "step": 2555 + }, + { + "epoch": 38.15, + "learning_rate": 0.00026305263157894735, + "loss": 0.0294, + "step": 2556 + }, + { + "epoch": 38.16, + "learning_rate": 0.0002630175438596491, + "loss": 0.2217, + "step": 2557 + }, + { + "epoch": 38.18, + "learning_rate": 0.00026298245614035084, + "loss": 0.2246, + "step": 2558 + }, + { + "epoch": 38.19, + "learning_rate": 0.0002629473684210526, + "loss": 0.4677, + "step": 2559 + }, + { + "epoch": 38.21, + "learning_rate": 0.00026291228070175434, + "loss": 0.1354, + "step": 2560 + }, + { + "epoch": 38.22, + "learning_rate": 0.00026287719298245614, + "loss": 0.0242, + "step": 2561 + }, + { + "epoch": 38.24, + "learning_rate": 0.0002628421052631579, + "loss": 0.1915, + "step": 2562 + }, + { + "epoch": 38.25, + "learning_rate": 0.00026280701754385964, + "loss": 0.494, + "step": 2563 + }, + { + "epoch": 38.27, + "learning_rate": 0.0002627719298245614, + "loss": 0.1895, + "step": 2564 + }, + { + "epoch": 38.28, + "learning_rate": 0.00026273684210526314, + "loss": 0.0574, + "step": 2565 + }, + { + "epoch": 38.3, + "learning_rate": 0.0002627017543859649, + "loss": 0.085, + "step": 2566 + }, + { + "epoch": 38.31, + "learning_rate": 0.00026266666666666664, + "loss": 0.317, + "step": 2567 + }, + { + "epoch": 38.33, + "learning_rate": 0.0002626315789473684, + "loss": 0.1981, + "step": 2568 + }, + { + "epoch": 38.34, + "learning_rate": 0.00026259649122807013, + "loss": 0.2723, + "step": 2569 + }, + { + "epoch": 38.36, + "learning_rate": 0.0002625614035087719, + "loss": 0.4135, + "step": 2570 + }, + { + "epoch": 38.37, + "learning_rate": 0.00026252631578947363, + "loss": 0.3537, + "step": 2571 + }, + { + "epoch": 38.39, + "learning_rate": 0.00026249122807017543, + "loss": 0.223, + "step": 2572 + }, + { + "epoch": 38.4, + "learning_rate": 0.0002624561403508772, + "loss": 0.0888, + "step": 2573 + }, + { + "epoch": 38.42, + "learning_rate": 0.00026242105263157893, + "loss": 0.0888, + "step": 2574 + }, + { + "epoch": 38.43, + "learning_rate": 0.0002623859649122807, + "loss": 0.2686, + "step": 2575 + }, + { + "epoch": 38.45, + "learning_rate": 0.00026235087719298243, + "loss": 0.291, + "step": 2576 + }, + { + "epoch": 38.46, + "learning_rate": 0.0002623157894736842, + "loss": 0.0836, + "step": 2577 + }, + { + "epoch": 38.48, + "learning_rate": 0.00026228070175438593, + "loss": 0.2366, + "step": 2578 + }, + { + "epoch": 38.49, + "learning_rate": 0.0002622456140350877, + "loss": 0.0858, + "step": 2579 + }, + { + "epoch": 38.51, + "learning_rate": 0.0002622105263157895, + "loss": 0.5286, + "step": 2580 + }, + { + "epoch": 38.52, + "learning_rate": 0.00026217543859649123, + "loss": 0.5125, + "step": 2581 + }, + { + "epoch": 38.54, + "learning_rate": 0.000262140350877193, + "loss": 0.0903, + "step": 2582 + }, + { + "epoch": 38.55, + "learning_rate": 0.0002621052631578947, + "loss": 0.457, + "step": 2583 + }, + { + "epoch": 38.57, + "learning_rate": 0.0002620701754385965, + "loss": 0.6476, + "step": 2584 + }, + { + "epoch": 38.58, + "learning_rate": 0.0002620350877192982, + "loss": 0.0249, + "step": 2585 + }, + { + "epoch": 38.59, + "learning_rate": 0.00026199999999999997, + "loss": 0.2783, + "step": 2586 + }, + { + "epoch": 38.61, + "learning_rate": 0.0002619649122807017, + "loss": 0.2174, + "step": 2587 + }, + { + "epoch": 38.62, + "learning_rate": 0.00026192982456140347, + "loss": 0.2787, + "step": 2588 + }, + { + "epoch": 38.64, + "learning_rate": 0.0002618947368421052, + "loss": 0.073, + "step": 2589 + }, + { + "epoch": 38.65, + "learning_rate": 0.00026185964912280697, + "loss": 0.0238, + "step": 2590 + }, + { + "epoch": 38.67, + "learning_rate": 0.00026182456140350877, + "loss": 0.0319, + "step": 2591 + }, + { + "epoch": 38.68, + "learning_rate": 0.0002617894736842105, + "loss": 0.1248, + "step": 2592 + }, + { + "epoch": 38.7, + "learning_rate": 0.00026175438596491227, + "loss": 0.049, + "step": 2593 + }, + { + "epoch": 38.71, + "learning_rate": 0.000261719298245614, + "loss": 0.3386, + "step": 2594 + }, + { + "epoch": 38.73, + "learning_rate": 0.00026168421052631576, + "loss": 0.3199, + "step": 2595 + }, + { + "epoch": 38.74, + "learning_rate": 0.0002616491228070175, + "loss": 0.1528, + "step": 2596 + }, + { + "epoch": 38.76, + "learning_rate": 0.00026161403508771926, + "loss": 0.1884, + "step": 2597 + }, + { + "epoch": 38.77, + "learning_rate": 0.00026157894736842107, + "loss": 0.3862, + "step": 2598 + }, + { + "epoch": 38.79, + "learning_rate": 0.0002615438596491228, + "loss": 0.1399, + "step": 2599 + }, + { + "epoch": 38.8, + "learning_rate": 0.0002615087719298245, + "loss": 0.2141, + "step": 2600 + }, + { + "epoch": 38.8, + "eval_accuracy": 0.8262359275575134, + "eval_f1": 0.8275994930387057, + "eval_loss": 0.7550894618034363, + "eval_runtime": 345.2152, + "eval_samples_per_second": 11.836, + "eval_steps_per_second": 0.742, + "step": 2600 + }, + { + "epoch": 38.82, + "learning_rate": 0.00026147368421052626, + "loss": 0.0499, + "step": 2601 + }, + { + "epoch": 38.83, + "learning_rate": 0.00026143859649122806, + "loss": 0.3111, + "step": 2602 + }, + { + "epoch": 38.85, + "learning_rate": 0.0002614035087719298, + "loss": 0.1958, + "step": 2603 + }, + { + "epoch": 38.86, + "learning_rate": 0.00026136842105263156, + "loss": 0.0505, + "step": 2604 + }, + { + "epoch": 38.88, + "learning_rate": 0.0002613333333333333, + "loss": 0.1559, + "step": 2605 + }, + { + "epoch": 38.89, + "learning_rate": 0.00026129824561403506, + "loss": 0.0148, + "step": 2606 + }, + { + "epoch": 38.91, + "learning_rate": 0.0002612631578947368, + "loss": 0.3198, + "step": 2607 + }, + { + "epoch": 38.92, + "learning_rate": 0.00026122807017543855, + "loss": 0.5153, + "step": 2608 + }, + { + "epoch": 38.94, + "learning_rate": 0.00026119298245614036, + "loss": 0.2361, + "step": 2609 + }, + { + "epoch": 38.95, + "learning_rate": 0.0002611578947368421, + "loss": 0.3028, + "step": 2610 + }, + { + "epoch": 38.97, + "learning_rate": 0.00026112280701754385, + "loss": 0.3361, + "step": 2611 + }, + { + "epoch": 38.98, + "learning_rate": 0.0002610877192982456, + "loss": 0.0152, + "step": 2612 + }, + { + "epoch": 39.0, + "learning_rate": 0.00026105263157894735, + "loss": 0.0569, + "step": 2613 + }, + { + "epoch": 39.01, + "learning_rate": 0.0002610175438596491, + "loss": 0.2744, + "step": 2614 + }, + { + "epoch": 39.03, + "learning_rate": 0.00026098245614035085, + "loss": 0.2632, + "step": 2615 + }, + { + "epoch": 39.04, + "learning_rate": 0.0002609473684210526, + "loss": 0.0301, + "step": 2616 + }, + { + "epoch": 39.06, + "learning_rate": 0.00026091228070175435, + "loss": 0.2426, + "step": 2617 + }, + { + "epoch": 39.07, + "learning_rate": 0.0002608771929824561, + "loss": 0.4076, + "step": 2618 + }, + { + "epoch": 39.09, + "learning_rate": 0.00026084210526315784, + "loss": 0.2153, + "step": 2619 + }, + { + "epoch": 39.1, + "learning_rate": 0.00026080701754385965, + "loss": 0.0533, + "step": 2620 + }, + { + "epoch": 39.12, + "learning_rate": 0.0002607719298245614, + "loss": 0.4746, + "step": 2621 + }, + { + "epoch": 39.13, + "learning_rate": 0.00026073684210526314, + "loss": 0.71, + "step": 2622 + }, + { + "epoch": 39.15, + "learning_rate": 0.0002607017543859649, + "loss": 0.2725, + "step": 2623 + }, + { + "epoch": 39.16, + "learning_rate": 0.00026066666666666664, + "loss": 0.1129, + "step": 2624 + }, + { + "epoch": 39.18, + "learning_rate": 0.0002606315789473684, + "loss": 0.2319, + "step": 2625 + }, + { + "epoch": 39.19, + "learning_rate": 0.00026059649122807014, + "loss": 0.1732, + "step": 2626 + }, + { + "epoch": 39.21, + "learning_rate": 0.0002605614035087719, + "loss": 0.3672, + "step": 2627 + }, + { + "epoch": 39.22, + "learning_rate": 0.0002605263157894737, + "loss": 0.2615, + "step": 2628 + }, + { + "epoch": 39.24, + "learning_rate": 0.00026049122807017544, + "loss": 0.2643, + "step": 2629 + }, + { + "epoch": 39.25, + "learning_rate": 0.0002604561403508772, + "loss": 0.3267, + "step": 2630 + }, + { + "epoch": 39.27, + "learning_rate": 0.0002604210526315789, + "loss": 0.5538, + "step": 2631 + }, + { + "epoch": 39.28, + "learning_rate": 0.0002603859649122807, + "loss": 0.1607, + "step": 2632 + }, + { + "epoch": 39.3, + "learning_rate": 0.00026035087719298244, + "loss": 0.3511, + "step": 2633 + }, + { + "epoch": 39.31, + "learning_rate": 0.0002603157894736842, + "loss": 0.0744, + "step": 2634 + }, + { + "epoch": 39.33, + "learning_rate": 0.00026028070175438593, + "loss": 0.4115, + "step": 2635 + }, + { + "epoch": 39.34, + "learning_rate": 0.0002602456140350877, + "loss": 0.189, + "step": 2636 + }, + { + "epoch": 39.36, + "learning_rate": 0.00026021052631578943, + "loss": 0.437, + "step": 2637 + }, + { + "epoch": 39.37, + "learning_rate": 0.0002601754385964912, + "loss": 0.1349, + "step": 2638 + }, + { + "epoch": 39.39, + "learning_rate": 0.000260140350877193, + "loss": 0.1645, + "step": 2639 + }, + { + "epoch": 39.4, + "learning_rate": 0.00026010526315789473, + "loss": 0.7744, + "step": 2640 + }, + { + "epoch": 39.42, + "learning_rate": 0.0002600701754385965, + "loss": 0.0679, + "step": 2641 + }, + { + "epoch": 39.43, + "learning_rate": 0.00026003508771929823, + "loss": 0.1496, + "step": 2642 + }, + { + "epoch": 39.45, + "learning_rate": 0.00026, + "loss": 0.056, + "step": 2643 + }, + { + "epoch": 39.46, + "learning_rate": 0.0002599649122807017, + "loss": 0.3514, + "step": 2644 + }, + { + "epoch": 39.48, + "learning_rate": 0.0002599298245614035, + "loss": 0.112, + "step": 2645 + }, + { + "epoch": 39.49, + "learning_rate": 0.0002598947368421053, + "loss": 0.0599, + "step": 2646 + }, + { + "epoch": 39.51, + "learning_rate": 0.000259859649122807, + "loss": 0.4294, + "step": 2647 + }, + { + "epoch": 39.52, + "learning_rate": 0.0002598245614035087, + "loss": 0.1908, + "step": 2648 + }, + { + "epoch": 39.54, + "learning_rate": 0.00025978947368421047, + "loss": 0.4259, + "step": 2649 + }, + { + "epoch": 39.55, + "learning_rate": 0.00025975438596491227, + "loss": 0.2764, + "step": 2650 + }, + { + "epoch": 39.57, + "learning_rate": 0.000259719298245614, + "loss": 0.2793, + "step": 2651 + }, + { + "epoch": 39.58, + "learning_rate": 0.00025968421052631577, + "loss": 0.2748, + "step": 2652 + }, + { + "epoch": 39.59, + "learning_rate": 0.0002596491228070175, + "loss": 0.0622, + "step": 2653 + }, + { + "epoch": 39.61, + "learning_rate": 0.00025961403508771927, + "loss": 0.2978, + "step": 2654 + }, + { + "epoch": 39.62, + "learning_rate": 0.000259578947368421, + "loss": 0.0851, + "step": 2655 + }, + { + "epoch": 39.64, + "learning_rate": 0.00025954385964912277, + "loss": 0.2829, + "step": 2656 + }, + { + "epoch": 39.65, + "learning_rate": 0.00025950877192982457, + "loss": 0.1534, + "step": 2657 + }, + { + "epoch": 39.67, + "learning_rate": 0.0002594736842105263, + "loss": 0.4885, + "step": 2658 + }, + { + "epoch": 39.68, + "learning_rate": 0.00025943859649122807, + "loss": 0.3212, + "step": 2659 + }, + { + "epoch": 39.7, + "learning_rate": 0.0002594035087719298, + "loss": 0.054, + "step": 2660 + }, + { + "epoch": 39.71, + "learning_rate": 0.00025936842105263156, + "loss": 0.1702, + "step": 2661 + }, + { + "epoch": 39.73, + "learning_rate": 0.0002593333333333333, + "loss": 0.068, + "step": 2662 + }, + { + "epoch": 39.74, + "learning_rate": 0.00025929824561403506, + "loss": 0.4455, + "step": 2663 + }, + { + "epoch": 39.76, + "learning_rate": 0.0002592631578947368, + "loss": 0.5979, + "step": 2664 + }, + { + "epoch": 39.77, + "learning_rate": 0.00025922807017543856, + "loss": 0.1285, + "step": 2665 + }, + { + "epoch": 39.79, + "learning_rate": 0.0002591929824561403, + "loss": 0.259, + "step": 2666 + }, + { + "epoch": 39.8, + "learning_rate": 0.00025915789473684206, + "loss": 0.2704, + "step": 2667 + }, + { + "epoch": 39.82, + "learning_rate": 0.0002591228070175438, + "loss": 0.5382, + "step": 2668 + }, + { + "epoch": 39.83, + "learning_rate": 0.0002590877192982456, + "loss": 0.1596, + "step": 2669 + }, + { + "epoch": 39.85, + "learning_rate": 0.00025905263157894736, + "loss": 0.1808, + "step": 2670 + }, + { + "epoch": 39.86, + "learning_rate": 0.0002590175438596491, + "loss": 0.1089, + "step": 2671 + }, + { + "epoch": 39.88, + "learning_rate": 0.00025898245614035085, + "loss": 0.1954, + "step": 2672 + }, + { + "epoch": 39.89, + "learning_rate": 0.0002589473684210526, + "loss": 0.1062, + "step": 2673 + }, + { + "epoch": 39.91, + "learning_rate": 0.00025891228070175435, + "loss": 0.032, + "step": 2674 + }, + { + "epoch": 39.92, + "learning_rate": 0.0002588771929824561, + "loss": 0.2953, + "step": 2675 + }, + { + "epoch": 39.94, + "learning_rate": 0.0002588421052631579, + "loss": 0.2644, + "step": 2676 + }, + { + "epoch": 39.95, + "learning_rate": 0.00025880701754385965, + "loss": 0.0182, + "step": 2677 + }, + { + "epoch": 39.97, + "learning_rate": 0.0002587719298245614, + "loss": 0.0144, + "step": 2678 + }, + { + "epoch": 39.98, + "learning_rate": 0.0002587368421052631, + "loss": 0.0845, + "step": 2679 + }, + { + "epoch": 40.0, + "learning_rate": 0.0002587017543859649, + "loss": 0.3278, + "step": 2680 + }, + { + "epoch": 40.01, + "learning_rate": 0.00025866666666666665, + "loss": 0.1518, + "step": 2681 + }, + { + "epoch": 40.03, + "learning_rate": 0.0002586315789473684, + "loss": 0.3586, + "step": 2682 + }, + { + "epoch": 40.04, + "learning_rate": 0.00025859649122807015, + "loss": 0.0538, + "step": 2683 + }, + { + "epoch": 40.06, + "learning_rate": 0.0002585614035087719, + "loss": 0.211, + "step": 2684 + }, + { + "epoch": 40.07, + "learning_rate": 0.00025852631578947364, + "loss": 0.0149, + "step": 2685 + }, + { + "epoch": 40.09, + "learning_rate": 0.0002584912280701754, + "loss": 0.0996, + "step": 2686 + }, + { + "epoch": 40.1, + "learning_rate": 0.0002584561403508772, + "loss": 0.0451, + "step": 2687 + }, + { + "epoch": 40.12, + "learning_rate": 0.00025842105263157894, + "loss": 0.1934, + "step": 2688 + }, + { + "epoch": 40.13, + "learning_rate": 0.0002583859649122807, + "loss": 0.1372, + "step": 2689 + }, + { + "epoch": 40.15, + "learning_rate": 0.00025835087719298244, + "loss": 0.3718, + "step": 2690 + }, + { + "epoch": 40.16, + "learning_rate": 0.0002583157894736842, + "loss": 0.0081, + "step": 2691 + }, + { + "epoch": 40.18, + "learning_rate": 0.00025828070175438594, + "loss": 0.4675, + "step": 2692 + }, + { + "epoch": 40.19, + "learning_rate": 0.0002582456140350877, + "loss": 0.0657, + "step": 2693 + }, + { + "epoch": 40.21, + "learning_rate": 0.0002582105263157895, + "loss": 0.0938, + "step": 2694 + }, + { + "epoch": 40.22, + "learning_rate": 0.00025817543859649124, + "loss": 0.3227, + "step": 2695 + }, + { + "epoch": 40.24, + "learning_rate": 0.00025814035087719293, + "loss": 0.4954, + "step": 2696 + }, + { + "epoch": 40.25, + "learning_rate": 0.0002581052631578947, + "loss": 0.0188, + "step": 2697 + }, + { + "epoch": 40.27, + "learning_rate": 0.0002580701754385965, + "loss": 0.1594, + "step": 2698 + }, + { + "epoch": 40.28, + "learning_rate": 0.00025803508771929823, + "loss": 0.2826, + "step": 2699 + }, + { + "epoch": 40.3, + "learning_rate": 0.000258, + "loss": 0.135, + "step": 2700 + }, + { + "epoch": 40.31, + "learning_rate": 0.00025796491228070173, + "loss": 0.038, + "step": 2701 + }, + { + "epoch": 40.33, + "learning_rate": 0.0002579298245614035, + "loss": 0.2259, + "step": 2702 + }, + { + "epoch": 40.34, + "learning_rate": 0.00025789473684210523, + "loss": 0.0391, + "step": 2703 + }, + { + "epoch": 40.36, + "learning_rate": 0.000257859649122807, + "loss": 0.2152, + "step": 2704 + }, + { + "epoch": 40.37, + "learning_rate": 0.0002578245614035087, + "loss": 0.1713, + "step": 2705 + }, + { + "epoch": 40.39, + "learning_rate": 0.00025778947368421053, + "loss": 0.3175, + "step": 2706 + }, + { + "epoch": 40.4, + "learning_rate": 0.0002577543859649123, + "loss": 0.1026, + "step": 2707 + }, + { + "epoch": 40.42, + "learning_rate": 0.00025771929824561403, + "loss": 0.0078, + "step": 2708 + }, + { + "epoch": 40.43, + "learning_rate": 0.0002576842105263158, + "loss": 0.1131, + "step": 2709 + }, + { + "epoch": 40.45, + "learning_rate": 0.0002576491228070175, + "loss": 0.209, + "step": 2710 + }, + { + "epoch": 40.46, + "learning_rate": 0.0002576140350877193, + "loss": 0.161, + "step": 2711 + }, + { + "epoch": 40.48, + "learning_rate": 0.000257578947368421, + "loss": 0.096, + "step": 2712 + }, + { + "epoch": 40.49, + "learning_rate": 0.00025754385964912277, + "loss": 0.1569, + "step": 2713 + }, + { + "epoch": 40.51, + "learning_rate": 0.0002575087719298245, + "loss": 0.1204, + "step": 2714 + }, + { + "epoch": 40.52, + "learning_rate": 0.00025747368421052627, + "loss": 0.3505, + "step": 2715 + }, + { + "epoch": 40.54, + "learning_rate": 0.000257438596491228, + "loss": 0.172, + "step": 2716 + }, + { + "epoch": 40.55, + "learning_rate": 0.0002574035087719298, + "loss": 0.0452, + "step": 2717 + }, + { + "epoch": 40.57, + "learning_rate": 0.00025736842105263157, + "loss": 0.2946, + "step": 2718 + }, + { + "epoch": 40.58, + "learning_rate": 0.0002573333333333333, + "loss": 0.0231, + "step": 2719 + }, + { + "epoch": 40.59, + "learning_rate": 0.00025729824561403507, + "loss": 0.1766, + "step": 2720 + }, + { + "epoch": 40.61, + "learning_rate": 0.0002572631578947368, + "loss": 0.2658, + "step": 2721 + }, + { + "epoch": 40.62, + "learning_rate": 0.00025722807017543856, + "loss": 0.2161, + "step": 2722 + }, + { + "epoch": 40.64, + "learning_rate": 0.0002571929824561403, + "loss": 0.0936, + "step": 2723 + }, + { + "epoch": 40.65, + "learning_rate": 0.0002571578947368421, + "loss": 0.1312, + "step": 2724 + }, + { + "epoch": 40.67, + "learning_rate": 0.00025712280701754386, + "loss": 0.1671, + "step": 2725 + }, + { + "epoch": 40.68, + "learning_rate": 0.0002570877192982456, + "loss": 0.1141, + "step": 2726 + }, + { + "epoch": 40.7, + "learning_rate": 0.0002570526315789473, + "loss": 0.1463, + "step": 2727 + }, + { + "epoch": 40.71, + "learning_rate": 0.0002570175438596491, + "loss": 0.2518, + "step": 2728 + }, + { + "epoch": 40.73, + "learning_rate": 0.00025698245614035086, + "loss": 0.0229, + "step": 2729 + }, + { + "epoch": 40.74, + "learning_rate": 0.0002569473684210526, + "loss": 0.0613, + "step": 2730 + }, + { + "epoch": 40.76, + "learning_rate": 0.00025691228070175436, + "loss": 0.1352, + "step": 2731 + }, + { + "epoch": 40.77, + "learning_rate": 0.0002568771929824561, + "loss": 0.1684, + "step": 2732 + }, + { + "epoch": 40.79, + "learning_rate": 0.00025684210526315786, + "loss": 0.2984, + "step": 2733 + }, + { + "epoch": 40.8, + "learning_rate": 0.0002568070175438596, + "loss": 0.2261, + "step": 2734 + }, + { + "epoch": 40.82, + "learning_rate": 0.0002567719298245614, + "loss": 0.0484, + "step": 2735 + }, + { + "epoch": 40.83, + "learning_rate": 0.00025673684210526316, + "loss": 0.4353, + "step": 2736 + }, + { + "epoch": 40.85, + "learning_rate": 0.0002567017543859649, + "loss": 0.1866, + "step": 2737 + }, + { + "epoch": 40.86, + "learning_rate": 0.00025666666666666665, + "loss": 0.2467, + "step": 2738 + }, + { + "epoch": 40.88, + "learning_rate": 0.0002566315789473684, + "loss": 0.0329, + "step": 2739 + }, + { + "epoch": 40.89, + "learning_rate": 0.00025659649122807015, + "loss": 0.108, + "step": 2740 + }, + { + "epoch": 40.91, + "learning_rate": 0.0002565614035087719, + "loss": 0.025, + "step": 2741 + }, + { + "epoch": 40.92, + "learning_rate": 0.00025652631578947365, + "loss": 0.1324, + "step": 2742 + }, + { + "epoch": 40.94, + "learning_rate": 0.00025649122807017545, + "loss": 0.2477, + "step": 2743 + }, + { + "epoch": 40.95, + "learning_rate": 0.00025645614035087715, + "loss": 0.0161, + "step": 2744 + }, + { + "epoch": 40.97, + "learning_rate": 0.0002564210526315789, + "loss": 0.1783, + "step": 2745 + }, + { + "epoch": 40.98, + "learning_rate": 0.0002563859649122807, + "loss": 0.0191, + "step": 2746 + }, + { + "epoch": 41.0, + "learning_rate": 0.00025635087719298245, + "loss": 0.1988, + "step": 2747 + }, + { + "epoch": 41.01, + "learning_rate": 0.0002563157894736842, + "loss": 0.9461, + "step": 2748 + }, + { + "epoch": 41.03, + "learning_rate": 0.00025628070175438594, + "loss": 0.6253, + "step": 2749 + }, + { + "epoch": 41.04, + "learning_rate": 0.0002562456140350877, + "loss": 0.3213, + "step": 2750 + }, + { + "epoch": 41.06, + "learning_rate": 0.00025621052631578944, + "loss": 0.3665, + "step": 2751 + }, + { + "epoch": 41.07, + "learning_rate": 0.0002561754385964912, + "loss": 0.1163, + "step": 2752 + }, + { + "epoch": 41.09, + "learning_rate": 0.00025614035087719294, + "loss": 0.0786, + "step": 2753 + }, + { + "epoch": 41.1, + "learning_rate": 0.00025610526315789474, + "loss": 0.2727, + "step": 2754 + }, + { + "epoch": 41.12, + "learning_rate": 0.0002560701754385965, + "loss": 0.1635, + "step": 2755 + }, + { + "epoch": 41.13, + "learning_rate": 0.00025603508771929824, + "loss": 0.029, + "step": 2756 + }, + { + "epoch": 41.15, + "learning_rate": 0.000256, + "loss": 0.0872, + "step": 2757 + }, + { + "epoch": 41.16, + "learning_rate": 0.00025596491228070174, + "loss": 0.3625, + "step": 2758 + }, + { + "epoch": 41.18, + "learning_rate": 0.0002559298245614035, + "loss": 0.0278, + "step": 2759 + }, + { + "epoch": 41.19, + "learning_rate": 0.00025589473684210523, + "loss": 0.0368, + "step": 2760 + }, + { + "epoch": 41.21, + "learning_rate": 0.000255859649122807, + "loss": 0.6355, + "step": 2761 + }, + { + "epoch": 41.22, + "learning_rate": 0.00025582456140350873, + "loss": 0.0689, + "step": 2762 + }, + { + "epoch": 41.24, + "learning_rate": 0.0002557894736842105, + "loss": 0.0832, + "step": 2763 + }, + { + "epoch": 41.25, + "learning_rate": 0.00025575438596491223, + "loss": 0.3266, + "step": 2764 + }, + { + "epoch": 41.27, + "learning_rate": 0.00025571929824561403, + "loss": 0.0166, + "step": 2765 + }, + { + "epoch": 41.28, + "learning_rate": 0.0002556842105263158, + "loss": 0.3294, + "step": 2766 + }, + { + "epoch": 41.3, + "learning_rate": 0.00025564912280701753, + "loss": 0.3561, + "step": 2767 + }, + { + "epoch": 41.31, + "learning_rate": 0.0002556140350877193, + "loss": 0.4908, + "step": 2768 + }, + { + "epoch": 41.33, + "learning_rate": 0.00025557894736842103, + "loss": 0.3531, + "step": 2769 + }, + { + "epoch": 41.34, + "learning_rate": 0.0002555438596491228, + "loss": 0.3064, + "step": 2770 + }, + { + "epoch": 41.36, + "learning_rate": 0.0002555087719298245, + "loss": 0.2952, + "step": 2771 + }, + { + "epoch": 41.37, + "learning_rate": 0.00025547368421052633, + "loss": 0.0237, + "step": 2772 + }, + { + "epoch": 41.39, + "learning_rate": 0.0002554385964912281, + "loss": 0.0403, + "step": 2773 + }, + { + "epoch": 41.4, + "learning_rate": 0.0002554035087719298, + "loss": 0.0116, + "step": 2774 + }, + { + "epoch": 41.42, + "learning_rate": 0.0002553684210526315, + "loss": 0.0635, + "step": 2775 + }, + { + "epoch": 41.43, + "learning_rate": 0.0002553333333333333, + "loss": 0.0113, + "step": 2776 + }, + { + "epoch": 41.45, + "learning_rate": 0.00025529824561403507, + "loss": 0.0259, + "step": 2777 + }, + { + "epoch": 41.46, + "learning_rate": 0.0002552631578947368, + "loss": 0.025, + "step": 2778 + }, + { + "epoch": 41.48, + "learning_rate": 0.00025522807017543857, + "loss": 0.1297, + "step": 2779 + }, + { + "epoch": 41.49, + "learning_rate": 0.0002551929824561403, + "loss": 0.4284, + "step": 2780 + }, + { + "epoch": 41.51, + "learning_rate": 0.00025515789473684207, + "loss": 0.2254, + "step": 2781 + }, + { + "epoch": 41.52, + "learning_rate": 0.0002551228070175438, + "loss": 0.246, + "step": 2782 + }, + { + "epoch": 41.54, + "learning_rate": 0.0002550877192982456, + "loss": 0.1542, + "step": 2783 + }, + { + "epoch": 41.55, + "learning_rate": 0.00025505263157894737, + "loss": 0.0179, + "step": 2784 + }, + { + "epoch": 41.57, + "learning_rate": 0.0002550175438596491, + "loss": 0.1614, + "step": 2785 + }, + { + "epoch": 41.58, + "learning_rate": 0.00025498245614035087, + "loss": 0.4609, + "step": 2786 + }, + { + "epoch": 41.59, + "learning_rate": 0.0002549473684210526, + "loss": 0.24, + "step": 2787 + }, + { + "epoch": 41.61, + "learning_rate": 0.00025491228070175436, + "loss": 0.4677, + "step": 2788 + }, + { + "epoch": 41.62, + "learning_rate": 0.0002548771929824561, + "loss": 0.2245, + "step": 2789 + }, + { + "epoch": 41.64, + "learning_rate": 0.00025484210526315786, + "loss": 0.1657, + "step": 2790 + }, + { + "epoch": 41.65, + "learning_rate": 0.00025480701754385966, + "loss": 0.0554, + "step": 2791 + }, + { + "epoch": 41.67, + "learning_rate": 0.00025477192982456136, + "loss": 0.0357, + "step": 2792 + }, + { + "epoch": 41.68, + "learning_rate": 0.0002547368421052631, + "loss": 0.0624, + "step": 2793 + }, + { + "epoch": 41.7, + "learning_rate": 0.00025470175438596486, + "loss": 0.0994, + "step": 2794 + }, + { + "epoch": 41.71, + "learning_rate": 0.00025466666666666666, + "loss": 0.1196, + "step": 2795 + }, + { + "epoch": 41.73, + "learning_rate": 0.0002546315789473684, + "loss": 0.2126, + "step": 2796 + }, + { + "epoch": 41.74, + "learning_rate": 0.00025459649122807016, + "loss": 0.2458, + "step": 2797 + }, + { + "epoch": 41.76, + "learning_rate": 0.0002545614035087719, + "loss": 0.0517, + "step": 2798 + }, + { + "epoch": 41.77, + "learning_rate": 0.00025452631578947365, + "loss": 0.0656, + "step": 2799 + }, + { + "epoch": 41.79, + "learning_rate": 0.0002544912280701754, + "loss": 0.2169, + "step": 2800 + }, + { + "epoch": 41.79, + "eval_accuracy": 0.8159569260890847, + "eval_f1": 0.8147649383825286, + "eval_loss": 0.789986789226532, + "eval_runtime": 344.2728, + "eval_samples_per_second": 11.868, + "eval_steps_per_second": 0.744, + "step": 2800 + }, + { + "epoch": 41.8, + "learning_rate": 0.00025445614035087715, + "loss": 0.472, + "step": 2801 + }, + { + "epoch": 41.82, + "learning_rate": 0.00025442105263157895, + "loss": 0.3, + "step": 2802 + }, + { + "epoch": 41.83, + "learning_rate": 0.0002543859649122807, + "loss": 0.2579, + "step": 2803 + }, + { + "epoch": 41.85, + "learning_rate": 0.00025435087719298245, + "loss": 0.0847, + "step": 2804 + }, + { + "epoch": 41.86, + "learning_rate": 0.00025431578947368415, + "loss": 0.014, + "step": 2805 + }, + { + "epoch": 41.88, + "learning_rate": 0.00025428070175438595, + "loss": 0.0145, + "step": 2806 + }, + { + "epoch": 41.89, + "learning_rate": 0.0002542456140350877, + "loss": 0.1619, + "step": 2807 + }, + { + "epoch": 41.91, + "learning_rate": 0.00025421052631578945, + "loss": 0.1608, + "step": 2808 + }, + { + "epoch": 41.92, + "learning_rate": 0.0002541754385964912, + "loss": 0.0117, + "step": 2809 + }, + { + "epoch": 41.94, + "learning_rate": 0.00025414035087719294, + "loss": 0.3642, + "step": 2810 + }, + { + "epoch": 41.95, + "learning_rate": 0.0002541052631578947, + "loss": 0.0423, + "step": 2811 + }, + { + "epoch": 41.97, + "learning_rate": 0.00025407017543859644, + "loss": 0.0056, + "step": 2812 + }, + { + "epoch": 41.98, + "learning_rate": 0.00025403508771929825, + "loss": 0.0841, + "step": 2813 + }, + { + "epoch": 42.0, + "learning_rate": 0.000254, + "loss": 0.2236, + "step": 2814 + }, + { + "epoch": 42.01, + "learning_rate": 0.00025396491228070174, + "loss": 0.0366, + "step": 2815 + }, + { + "epoch": 42.03, + "learning_rate": 0.0002539298245614035, + "loss": 0.1367, + "step": 2816 + }, + { + "epoch": 42.04, + "learning_rate": 0.00025389473684210524, + "loss": 0.0367, + "step": 2817 + }, + { + "epoch": 42.06, + "learning_rate": 0.000253859649122807, + "loss": 0.0964, + "step": 2818 + }, + { + "epoch": 42.07, + "learning_rate": 0.00025382456140350874, + "loss": 0.3533, + "step": 2819 + }, + { + "epoch": 42.09, + "learning_rate": 0.00025378947368421054, + "loss": 0.1794, + "step": 2820 + }, + { + "epoch": 42.1, + "learning_rate": 0.0002537543859649123, + "loss": 0.3442, + "step": 2821 + }, + { + "epoch": 42.12, + "learning_rate": 0.000253719298245614, + "loss": 0.0575, + "step": 2822 + }, + { + "epoch": 42.13, + "learning_rate": 0.00025368421052631573, + "loss": 0.2326, + "step": 2823 + }, + { + "epoch": 42.15, + "learning_rate": 0.00025364912280701754, + "loss": 0.4745, + "step": 2824 + }, + { + "epoch": 42.16, + "learning_rate": 0.0002536140350877193, + "loss": 0.0171, + "step": 2825 + }, + { + "epoch": 42.18, + "learning_rate": 0.00025357894736842103, + "loss": 0.1074, + "step": 2826 + }, + { + "epoch": 42.19, + "learning_rate": 0.0002535438596491228, + "loss": 0.2265, + "step": 2827 + }, + { + "epoch": 42.21, + "learning_rate": 0.00025350877192982453, + "loss": 0.0297, + "step": 2828 + }, + { + "epoch": 42.22, + "learning_rate": 0.0002534736842105263, + "loss": 0.0187, + "step": 2829 + }, + { + "epoch": 42.24, + "learning_rate": 0.00025343859649122803, + "loss": 0.0116, + "step": 2830 + }, + { + "epoch": 42.25, + "learning_rate": 0.0002534035087719298, + "loss": 0.3781, + "step": 2831 + }, + { + "epoch": 42.27, + "learning_rate": 0.0002533684210526316, + "loss": 0.5599, + "step": 2832 + }, + { + "epoch": 42.28, + "learning_rate": 0.00025333333333333333, + "loss": 0.0833, + "step": 2833 + }, + { + "epoch": 42.3, + "learning_rate": 0.0002532982456140351, + "loss": 0.0891, + "step": 2834 + }, + { + "epoch": 42.31, + "learning_rate": 0.0002532631578947368, + "loss": 0.1705, + "step": 2835 + }, + { + "epoch": 42.33, + "learning_rate": 0.0002532280701754386, + "loss": 0.5718, + "step": 2836 + }, + { + "epoch": 42.34, + "learning_rate": 0.0002531929824561403, + "loss": 0.2741, + "step": 2837 + }, + { + "epoch": 42.36, + "learning_rate": 0.0002531578947368421, + "loss": 0.1348, + "step": 2838 + }, + { + "epoch": 42.37, + "learning_rate": 0.0002531228070175439, + "loss": 0.0928, + "step": 2839 + }, + { + "epoch": 42.39, + "learning_rate": 0.00025308771929824557, + "loss": 0.1806, + "step": 2840 + }, + { + "epoch": 42.4, + "learning_rate": 0.0002530526315789473, + "loss": 0.1709, + "step": 2841 + }, + { + "epoch": 42.42, + "learning_rate": 0.00025301754385964907, + "loss": 0.2734, + "step": 2842 + }, + { + "epoch": 42.43, + "learning_rate": 0.00025298245614035087, + "loss": 0.3118, + "step": 2843 + }, + { + "epoch": 42.45, + "learning_rate": 0.0002529473684210526, + "loss": 0.0629, + "step": 2844 + }, + { + "epoch": 42.46, + "learning_rate": 0.00025291228070175437, + "loss": 0.6791, + "step": 2845 + }, + { + "epoch": 42.48, + "learning_rate": 0.0002528771929824561, + "loss": 0.2058, + "step": 2846 + }, + { + "epoch": 42.49, + "learning_rate": 0.00025284210526315787, + "loss": 0.0604, + "step": 2847 + }, + { + "epoch": 42.51, + "learning_rate": 0.0002528070175438596, + "loss": 0.1178, + "step": 2848 + }, + { + "epoch": 42.52, + "learning_rate": 0.00025277192982456136, + "loss": 0.1147, + "step": 2849 + }, + { + "epoch": 42.54, + "learning_rate": 0.00025273684210526317, + "loss": 0.126, + "step": 2850 + }, + { + "epoch": 42.55, + "learning_rate": 0.0002527017543859649, + "loss": 0.0091, + "step": 2851 + }, + { + "epoch": 42.57, + "learning_rate": 0.00025266666666666666, + "loss": 0.1277, + "step": 2852 + }, + { + "epoch": 42.58, + "learning_rate": 0.00025263157894736836, + "loss": 0.5943, + "step": 2853 + }, + { + "epoch": 42.59, + "learning_rate": 0.00025259649122807016, + "loss": 0.1464, + "step": 2854 + }, + { + "epoch": 42.61, + "learning_rate": 0.0002525614035087719, + "loss": 0.6232, + "step": 2855 + }, + { + "epoch": 42.62, + "learning_rate": 0.00025252631578947366, + "loss": 0.2973, + "step": 2856 + }, + { + "epoch": 42.64, + "learning_rate": 0.0002524912280701754, + "loss": 0.1586, + "step": 2857 + }, + { + "epoch": 42.65, + "learning_rate": 0.00025245614035087716, + "loss": 0.0124, + "step": 2858 + }, + { + "epoch": 42.67, + "learning_rate": 0.0002524210526315789, + "loss": 0.0061, + "step": 2859 + }, + { + "epoch": 42.68, + "learning_rate": 0.00025238596491228065, + "loss": 0.015, + "step": 2860 + }, + { + "epoch": 42.7, + "learning_rate": 0.00025235087719298246, + "loss": 0.337, + "step": 2861 + }, + { + "epoch": 42.71, + "learning_rate": 0.0002523157894736842, + "loss": 0.1753, + "step": 2862 + }, + { + "epoch": 42.73, + "learning_rate": 0.00025228070175438596, + "loss": 0.0205, + "step": 2863 + }, + { + "epoch": 42.74, + "learning_rate": 0.0002522456140350877, + "loss": 0.1048, + "step": 2864 + }, + { + "epoch": 42.76, + "learning_rate": 0.00025221052631578945, + "loss": 0.6739, + "step": 2865 + }, + { + "epoch": 42.77, + "learning_rate": 0.0002521754385964912, + "loss": 0.0291, + "step": 2866 + }, + { + "epoch": 42.79, + "learning_rate": 0.00025214035087719295, + "loss": 0.0214, + "step": 2867 + }, + { + "epoch": 42.8, + "learning_rate": 0.0002521052631578947, + "loss": 0.2474, + "step": 2868 + }, + { + "epoch": 42.82, + "learning_rate": 0.0002520701754385965, + "loss": 0.3674, + "step": 2869 + }, + { + "epoch": 42.83, + "learning_rate": 0.0002520350877192982, + "loss": 0.2382, + "step": 2870 + }, + { + "epoch": 42.85, + "learning_rate": 0.00025199999999999995, + "loss": 0.2272, + "step": 2871 + }, + { + "epoch": 42.86, + "learning_rate": 0.00025196491228070175, + "loss": 0.0438, + "step": 2872 + }, + { + "epoch": 42.88, + "learning_rate": 0.0002519298245614035, + "loss": 0.2005, + "step": 2873 + }, + { + "epoch": 42.89, + "learning_rate": 0.00025189473684210525, + "loss": 0.1324, + "step": 2874 + }, + { + "epoch": 42.91, + "learning_rate": 0.000251859649122807, + "loss": 0.1544, + "step": 2875 + }, + { + "epoch": 42.92, + "learning_rate": 0.00025182456140350874, + "loss": 0.1731, + "step": 2876 + }, + { + "epoch": 42.94, + "learning_rate": 0.0002517894736842105, + "loss": 0.0689, + "step": 2877 + }, + { + "epoch": 42.95, + "learning_rate": 0.00025175438596491224, + "loss": 0.114, + "step": 2878 + }, + { + "epoch": 42.97, + "learning_rate": 0.000251719298245614, + "loss": 0.0568, + "step": 2879 + }, + { + "epoch": 42.98, + "learning_rate": 0.0002516842105263158, + "loss": 0.3209, + "step": 2880 + }, + { + "epoch": 43.0, + "learning_rate": 0.00025164912280701754, + "loss": 0.2897, + "step": 2881 + }, + { + "epoch": 43.01, + "learning_rate": 0.0002516140350877193, + "loss": 0.2322, + "step": 2882 + }, + { + "epoch": 43.03, + "learning_rate": 0.00025157894736842104, + "loss": 0.1146, + "step": 2883 + }, + { + "epoch": 43.04, + "learning_rate": 0.0002515438596491228, + "loss": 0.0472, + "step": 2884 + }, + { + "epoch": 43.06, + "learning_rate": 0.00025150877192982454, + "loss": 0.0171, + "step": 2885 + }, + { + "epoch": 43.07, + "learning_rate": 0.0002514736842105263, + "loss": 0.2853, + "step": 2886 + }, + { + "epoch": 43.09, + "learning_rate": 0.0002514385964912281, + "loss": 0.1367, + "step": 2887 + }, + { + "epoch": 43.1, + "learning_rate": 0.0002514035087719298, + "loss": 0.5374, + "step": 2888 + }, + { + "epoch": 43.12, + "learning_rate": 0.00025136842105263153, + "loss": 0.0226, + "step": 2889 + }, + { + "epoch": 43.13, + "learning_rate": 0.0002513333333333333, + "loss": 0.3714, + "step": 2890 + }, + { + "epoch": 43.15, + "learning_rate": 0.0002512982456140351, + "loss": 0.0909, + "step": 2891 + }, + { + "epoch": 43.16, + "learning_rate": 0.00025126315789473683, + "loss": 0.5632, + "step": 2892 + }, + { + "epoch": 43.18, + "learning_rate": 0.0002512280701754386, + "loss": 0.1337, + "step": 2893 + }, + { + "epoch": 43.19, + "learning_rate": 0.00025119298245614033, + "loss": 0.0837, + "step": 2894 + }, + { + "epoch": 43.21, + "learning_rate": 0.0002511578947368421, + "loss": 0.0137, + "step": 2895 + }, + { + "epoch": 43.22, + "learning_rate": 0.00025112280701754383, + "loss": 0.0378, + "step": 2896 + }, + { + "epoch": 43.24, + "learning_rate": 0.0002510877192982456, + "loss": 0.0816, + "step": 2897 + }, + { + "epoch": 43.25, + "learning_rate": 0.0002510526315789474, + "loss": 0.039, + "step": 2898 + }, + { + "epoch": 43.27, + "learning_rate": 0.00025101754385964913, + "loss": 0.4815, + "step": 2899 + }, + { + "epoch": 43.28, + "learning_rate": 0.0002509824561403509, + "loss": 0.0348, + "step": 2900 + }, + { + "epoch": 43.3, + "learning_rate": 0.00025094736842105257, + "loss": 0.3811, + "step": 2901 + }, + { + "epoch": 43.31, + "learning_rate": 0.0002509122807017544, + "loss": 0.1087, + "step": 2902 + }, + { + "epoch": 43.33, + "learning_rate": 0.0002508771929824561, + "loss": 0.0497, + "step": 2903 + }, + { + "epoch": 43.34, + "learning_rate": 0.00025084210526315787, + "loss": 0.1322, + "step": 2904 + }, + { + "epoch": 43.36, + "learning_rate": 0.0002508070175438596, + "loss": 0.2186, + "step": 2905 + }, + { + "epoch": 43.37, + "learning_rate": 0.00025077192982456137, + "loss": 0.1789, + "step": 2906 + }, + { + "epoch": 43.39, + "learning_rate": 0.0002507368421052631, + "loss": 0.3261, + "step": 2907 + }, + { + "epoch": 43.4, + "learning_rate": 0.00025070175438596487, + "loss": 0.0251, + "step": 2908 + }, + { + "epoch": 43.42, + "learning_rate": 0.00025066666666666667, + "loss": 0.3344, + "step": 2909 + }, + { + "epoch": 43.43, + "learning_rate": 0.0002506315789473684, + "loss": 0.2145, + "step": 2910 + }, + { + "epoch": 43.45, + "learning_rate": 0.00025059649122807017, + "loss": 0.2443, + "step": 2911 + }, + { + "epoch": 43.46, + "learning_rate": 0.0002505614035087719, + "loss": 0.1077, + "step": 2912 + }, + { + "epoch": 43.48, + "learning_rate": 0.00025052631578947367, + "loss": 0.0922, + "step": 2913 + }, + { + "epoch": 43.49, + "learning_rate": 0.0002504912280701754, + "loss": 0.2037, + "step": 2914 + }, + { + "epoch": 43.51, + "learning_rate": 0.00025045614035087716, + "loss": 0.1559, + "step": 2915 + }, + { + "epoch": 43.52, + "learning_rate": 0.0002504210526315789, + "loss": 0.1043, + "step": 2916 + }, + { + "epoch": 43.54, + "learning_rate": 0.0002503859649122807, + "loss": 0.2706, + "step": 2917 + }, + { + "epoch": 43.55, + "learning_rate": 0.0002503508771929824, + "loss": 0.0522, + "step": 2918 + }, + { + "epoch": 43.57, + "learning_rate": 0.00025031578947368416, + "loss": 0.0158, + "step": 2919 + }, + { + "epoch": 43.58, + "learning_rate": 0.00025028070175438596, + "loss": 0.3399, + "step": 2920 + }, + { + "epoch": 43.59, + "learning_rate": 0.0002502456140350877, + "loss": 0.1641, + "step": 2921 + }, + { + "epoch": 43.61, + "learning_rate": 0.00025021052631578946, + "loss": 0.3141, + "step": 2922 + }, + { + "epoch": 43.62, + "learning_rate": 0.0002501754385964912, + "loss": 0.0162, + "step": 2923 + }, + { + "epoch": 43.64, + "learning_rate": 0.00025014035087719296, + "loss": 0.561, + "step": 2924 + }, + { + "epoch": 43.65, + "learning_rate": 0.0002501052631578947, + "loss": 0.1644, + "step": 2925 + }, + { + "epoch": 43.67, + "learning_rate": 0.00025007017543859645, + "loss": 0.2219, + "step": 2926 + }, + { + "epoch": 43.68, + "learning_rate": 0.0002500350877192982, + "loss": 0.1341, + "step": 2927 + }, + { + "epoch": 43.7, + "learning_rate": 0.00025, + "loss": 0.33, + "step": 2928 + }, + { + "epoch": 43.71, + "learning_rate": 0.00024996491228070175, + "loss": 0.1807, + "step": 2929 + }, + { + "epoch": 43.73, + "learning_rate": 0.0002499298245614035, + "loss": 0.1551, + "step": 2930 + }, + { + "epoch": 43.74, + "learning_rate": 0.00024989473684210525, + "loss": 0.1855, + "step": 2931 + }, + { + "epoch": 43.76, + "learning_rate": 0.000249859649122807, + "loss": 0.0575, + "step": 2932 + }, + { + "epoch": 43.77, + "learning_rate": 0.00024982456140350875, + "loss": 0.2949, + "step": 2933 + }, + { + "epoch": 43.79, + "learning_rate": 0.0002497894736842105, + "loss": 0.1351, + "step": 2934 + }, + { + "epoch": 43.8, + "learning_rate": 0.00024975438596491225, + "loss": 0.1623, + "step": 2935 + }, + { + "epoch": 43.82, + "learning_rate": 0.000249719298245614, + "loss": 0.3443, + "step": 2936 + }, + { + "epoch": 43.83, + "learning_rate": 0.00024968421052631574, + "loss": 0.1855, + "step": 2937 + }, + { + "epoch": 43.85, + "learning_rate": 0.0002496491228070175, + "loss": 0.498, + "step": 2938 + }, + { + "epoch": 43.86, + "learning_rate": 0.0002496140350877193, + "loss": 0.1217, + "step": 2939 + }, + { + "epoch": 43.88, + "learning_rate": 0.00024957894736842104, + "loss": 0.021, + "step": 2940 + }, + { + "epoch": 43.89, + "learning_rate": 0.0002495438596491228, + "loss": 0.0545, + "step": 2941 + }, + { + "epoch": 43.91, + "learning_rate": 0.00024950877192982454, + "loss": 0.0702, + "step": 2942 + }, + { + "epoch": 43.92, + "learning_rate": 0.0002494736842105263, + "loss": 0.2528, + "step": 2943 + }, + { + "epoch": 43.94, + "learning_rate": 0.00024943859649122804, + "loss": 0.0393, + "step": 2944 + }, + { + "epoch": 43.95, + "learning_rate": 0.0002494035087719298, + "loss": 0.2664, + "step": 2945 + }, + { + "epoch": 43.97, + "learning_rate": 0.0002493684210526316, + "loss": 0.3916, + "step": 2946 + }, + { + "epoch": 43.98, + "learning_rate": 0.00024933333333333334, + "loss": 0.08, + "step": 2947 + }, + { + "epoch": 44.0, + "learning_rate": 0.0002492982456140351, + "loss": 0.07, + "step": 2948 + }, + { + "epoch": 44.01, + "learning_rate": 0.0002492631578947368, + "loss": 0.4027, + "step": 2949 + }, + { + "epoch": 44.03, + "learning_rate": 0.0002492280701754386, + "loss": 0.1744, + "step": 2950 + }, + { + "epoch": 44.04, + "learning_rate": 0.00024919298245614034, + "loss": 0.1237, + "step": 2951 + }, + { + "epoch": 44.06, + "learning_rate": 0.0002491578947368421, + "loss": 0.1386, + "step": 2952 + }, + { + "epoch": 44.07, + "learning_rate": 0.00024912280701754383, + "loss": 0.1239, + "step": 2953 + }, + { + "epoch": 44.09, + "learning_rate": 0.0002490877192982456, + "loss": 0.3292, + "step": 2954 + }, + { + "epoch": 44.1, + "learning_rate": 0.00024905263157894733, + "loss": 0.1744, + "step": 2955 + }, + { + "epoch": 44.12, + "learning_rate": 0.0002490175438596491, + "loss": 0.2993, + "step": 2956 + }, + { + "epoch": 44.13, + "learning_rate": 0.00024898245614035083, + "loss": 0.0331, + "step": 2957 + }, + { + "epoch": 44.15, + "learning_rate": 0.00024894736842105263, + "loss": 0.1431, + "step": 2958 + }, + { + "epoch": 44.16, + "learning_rate": 0.0002489122807017544, + "loss": 0.0802, + "step": 2959 + }, + { + "epoch": 44.18, + "learning_rate": 0.00024887719298245613, + "loss": 0.131, + "step": 2960 + }, + { + "epoch": 44.19, + "learning_rate": 0.0002488421052631579, + "loss": 0.098, + "step": 2961 + }, + { + "epoch": 44.21, + "learning_rate": 0.0002488070175438596, + "loss": 0.0511, + "step": 2962 + }, + { + "epoch": 44.22, + "learning_rate": 0.0002487719298245614, + "loss": 0.2082, + "step": 2963 + }, + { + "epoch": 44.24, + "learning_rate": 0.0002487368421052631, + "loss": 0.0368, + "step": 2964 + }, + { + "epoch": 44.25, + "learning_rate": 0.0002487017543859649, + "loss": 0.0232, + "step": 2965 + }, + { + "epoch": 44.27, + "learning_rate": 0.0002486666666666666, + "loss": 0.1291, + "step": 2966 + }, + { + "epoch": 44.28, + "learning_rate": 0.00024863157894736837, + "loss": 0.015, + "step": 2967 + }, + { + "epoch": 44.3, + "learning_rate": 0.0002485964912280701, + "loss": 0.3891, + "step": 2968 + }, + { + "epoch": 44.31, + "learning_rate": 0.0002485614035087719, + "loss": 0.0984, + "step": 2969 + }, + { + "epoch": 44.33, + "learning_rate": 0.00024852631578947367, + "loss": 0.3002, + "step": 2970 + }, + { + "epoch": 44.34, + "learning_rate": 0.0002484912280701754, + "loss": 0.0087, + "step": 2971 + }, + { + "epoch": 44.36, + "learning_rate": 0.00024845614035087717, + "loss": 0.2016, + "step": 2972 + }, + { + "epoch": 44.37, + "learning_rate": 0.0002484210526315789, + "loss": 0.0873, + "step": 2973 + }, + { + "epoch": 44.39, + "learning_rate": 0.00024838596491228067, + "loss": 0.0506, + "step": 2974 + }, + { + "epoch": 44.4, + "learning_rate": 0.0002483508771929824, + "loss": 0.2401, + "step": 2975 + }, + { + "epoch": 44.42, + "learning_rate": 0.0002483157894736842, + "loss": 0.0794, + "step": 2976 + }, + { + "epoch": 44.43, + "learning_rate": 0.00024828070175438597, + "loss": 0.0242, + "step": 2977 + }, + { + "epoch": 44.45, + "learning_rate": 0.0002482456140350877, + "loss": 0.0321, + "step": 2978 + }, + { + "epoch": 44.46, + "learning_rate": 0.00024821052631578946, + "loss": 0.1454, + "step": 2979 + }, + { + "epoch": 44.48, + "learning_rate": 0.0002481754385964912, + "loss": 0.0272, + "step": 2980 + }, + { + "epoch": 44.49, + "learning_rate": 0.00024814035087719296, + "loss": 0.0047, + "step": 2981 + }, + { + "epoch": 44.51, + "learning_rate": 0.0002481052631578947, + "loss": 0.0876, + "step": 2982 + }, + { + "epoch": 44.52, + "learning_rate": 0.00024807017543859646, + "loss": 0.053, + "step": 2983 + }, + { + "epoch": 44.54, + "learning_rate": 0.0002480350877192982, + "loss": 0.0717, + "step": 2984 + }, + { + "epoch": 44.55, + "learning_rate": 0.00024799999999999996, + "loss": 0.5333, + "step": 2985 + }, + { + "epoch": 44.57, + "learning_rate": 0.0002479649122807017, + "loss": 0.0326, + "step": 2986 + }, + { + "epoch": 44.58, + "learning_rate": 0.0002479298245614035, + "loss": 0.1626, + "step": 2987 + }, + { + "epoch": 44.59, + "learning_rate": 0.00024789473684210526, + "loss": 0.1837, + "step": 2988 + }, + { + "epoch": 44.61, + "learning_rate": 0.000247859649122807, + "loss": 0.3757, + "step": 2989 + }, + { + "epoch": 44.62, + "learning_rate": 0.00024782456140350875, + "loss": 0.065, + "step": 2990 + }, + { + "epoch": 44.64, + "learning_rate": 0.0002477894736842105, + "loss": 0.21, + "step": 2991 + }, + { + "epoch": 44.65, + "learning_rate": 0.00024775438596491225, + "loss": 0.0765, + "step": 2992 + }, + { + "epoch": 44.67, + "learning_rate": 0.000247719298245614, + "loss": 0.1718, + "step": 2993 + }, + { + "epoch": 44.68, + "learning_rate": 0.00024768421052631575, + "loss": 0.0413, + "step": 2994 + }, + { + "epoch": 44.7, + "learning_rate": 0.00024764912280701755, + "loss": 0.0597, + "step": 2995 + }, + { + "epoch": 44.71, + "learning_rate": 0.0002476140350877193, + "loss": 0.2591, + "step": 2996 + }, + { + "epoch": 44.73, + "learning_rate": 0.000247578947368421, + "loss": 0.0086, + "step": 2997 + }, + { + "epoch": 44.74, + "learning_rate": 0.0002475438596491228, + "loss": 0.2637, + "step": 2998 + }, + { + "epoch": 44.76, + "learning_rate": 0.00024750877192982455, + "loss": 0.7918, + "step": 2999 + }, + { + "epoch": 44.77, + "learning_rate": 0.0002474736842105263, + "loss": 0.3942, + "step": 3000 + }, + { + "epoch": 44.77, + "eval_accuracy": 0.8042094958394518, + "eval_f1": 0.8129959248685728, + "eval_loss": 0.8620648384094238, + "eval_runtime": 344.2906, + "eval_samples_per_second": 11.868, + "eval_steps_per_second": 0.744, + "step": 3000 + }, + { + "epoch": 44.79, + "learning_rate": 0.00024743859649122805, + "loss": 0.4602, + "step": 3001 + }, + { + "epoch": 44.8, + "learning_rate": 0.0002474035087719298, + "loss": 0.1656, + "step": 3002 + }, + { + "epoch": 44.82, + "learning_rate": 0.00024736842105263154, + "loss": 0.3406, + "step": 3003 + }, + { + "epoch": 44.83, + "learning_rate": 0.0002473333333333333, + "loss": 0.2357, + "step": 3004 + }, + { + "epoch": 44.85, + "learning_rate": 0.00024729824561403504, + "loss": 0.5817, + "step": 3005 + }, + { + "epoch": 44.86, + "learning_rate": 0.00024726315789473684, + "loss": 0.0355, + "step": 3006 + }, + { + "epoch": 44.88, + "learning_rate": 0.0002472280701754386, + "loss": 0.332, + "step": 3007 + }, + { + "epoch": 44.89, + "learning_rate": 0.00024719298245614034, + "loss": 0.0443, + "step": 3008 + }, + { + "epoch": 44.91, + "learning_rate": 0.0002471578947368421, + "loss": 0.3544, + "step": 3009 + }, + { + "epoch": 44.92, + "learning_rate": 0.00024712280701754384, + "loss": 0.2409, + "step": 3010 + }, + { + "epoch": 44.94, + "learning_rate": 0.0002470877192982456, + "loss": 0.025, + "step": 3011 + }, + { + "epoch": 44.95, + "learning_rate": 0.00024705263157894734, + "loss": 0.1748, + "step": 3012 + }, + { + "epoch": 44.97, + "learning_rate": 0.00024701754385964914, + "loss": 0.0428, + "step": 3013 + }, + { + "epoch": 44.98, + "learning_rate": 0.00024698245614035083, + "loss": 0.1191, + "step": 3014 + }, + { + "epoch": 45.0, + "learning_rate": 0.0002469473684210526, + "loss": 0.2148, + "step": 3015 + }, + { + "epoch": 45.01, + "learning_rate": 0.00024691228070175433, + "loss": 0.3834, + "step": 3016 + }, + { + "epoch": 45.03, + "learning_rate": 0.00024687719298245613, + "loss": 0.2043, + "step": 3017 + }, + { + "epoch": 45.04, + "learning_rate": 0.0002468421052631579, + "loss": 0.0231, + "step": 3018 + }, + { + "epoch": 45.06, + "learning_rate": 0.00024680701754385963, + "loss": 0.17, + "step": 3019 + }, + { + "epoch": 45.07, + "learning_rate": 0.0002467719298245614, + "loss": 0.2151, + "step": 3020 + }, + { + "epoch": 45.09, + "learning_rate": 0.00024673684210526313, + "loss": 0.0963, + "step": 3021 + }, + { + "epoch": 45.1, + "learning_rate": 0.0002467017543859649, + "loss": 0.0191, + "step": 3022 + }, + { + "epoch": 45.12, + "learning_rate": 0.0002466666666666666, + "loss": 0.1489, + "step": 3023 + }, + { + "epoch": 45.13, + "learning_rate": 0.00024663157894736843, + "loss": 0.3331, + "step": 3024 + }, + { + "epoch": 45.15, + "learning_rate": 0.0002465964912280702, + "loss": 0.3295, + "step": 3025 + }, + { + "epoch": 45.16, + "learning_rate": 0.00024656140350877193, + "loss": 0.2693, + "step": 3026 + }, + { + "epoch": 45.18, + "learning_rate": 0.0002465263157894737, + "loss": 0.4132, + "step": 3027 + }, + { + "epoch": 45.19, + "learning_rate": 0.0002464912280701754, + "loss": 0.0197, + "step": 3028 + }, + { + "epoch": 45.21, + "learning_rate": 0.0002464561403508772, + "loss": 0.0577, + "step": 3029 + }, + { + "epoch": 45.22, + "learning_rate": 0.0002464210526315789, + "loss": 0.1374, + "step": 3030 + }, + { + "epoch": 45.24, + "learning_rate": 0.00024638596491228067, + "loss": 0.1153, + "step": 3031 + }, + { + "epoch": 45.25, + "learning_rate": 0.0002463508771929824, + "loss": 0.1013, + "step": 3032 + }, + { + "epoch": 45.27, + "learning_rate": 0.00024631578947368417, + "loss": 0.2066, + "step": 3033 + }, + { + "epoch": 45.28, + "learning_rate": 0.0002462807017543859, + "loss": 0.0493, + "step": 3034 + }, + { + "epoch": 45.3, + "learning_rate": 0.0002462456140350877, + "loss": 0.1365, + "step": 3035 + }, + { + "epoch": 45.31, + "learning_rate": 0.00024621052631578947, + "loss": 0.088, + "step": 3036 + }, + { + "epoch": 45.33, + "learning_rate": 0.0002461754385964912, + "loss": 0.0117, + "step": 3037 + }, + { + "epoch": 45.34, + "learning_rate": 0.00024614035087719297, + "loss": 0.0095, + "step": 3038 + }, + { + "epoch": 45.36, + "learning_rate": 0.0002461052631578947, + "loss": 0.1326, + "step": 3039 + }, + { + "epoch": 45.37, + "learning_rate": 0.00024607017543859646, + "loss": 0.1197, + "step": 3040 + }, + { + "epoch": 45.39, + "learning_rate": 0.0002460350877192982, + "loss": 0.029, + "step": 3041 + }, + { + "epoch": 45.4, + "learning_rate": 0.00024599999999999996, + "loss": 0.1934, + "step": 3042 + }, + { + "epoch": 45.42, + "learning_rate": 0.00024596491228070177, + "loss": 0.1238, + "step": 3043 + }, + { + "epoch": 45.43, + "learning_rate": 0.0002459298245614035, + "loss": 0.0603, + "step": 3044 + }, + { + "epoch": 45.45, + "learning_rate": 0.0002458947368421052, + "loss": 0.0289, + "step": 3045 + }, + { + "epoch": 45.46, + "learning_rate": 0.000245859649122807, + "loss": 0.3161, + "step": 3046 + }, + { + "epoch": 45.48, + "learning_rate": 0.00024582456140350876, + "loss": 0.1879, + "step": 3047 + }, + { + "epoch": 45.49, + "learning_rate": 0.0002457894736842105, + "loss": 0.2013, + "step": 3048 + }, + { + "epoch": 45.51, + "learning_rate": 0.00024575438596491226, + "loss": 0.2212, + "step": 3049 + }, + { + "epoch": 45.52, + "learning_rate": 0.000245719298245614, + "loss": 0.4477, + "step": 3050 + }, + { + "epoch": 45.54, + "learning_rate": 0.00024568421052631576, + "loss": 0.2636, + "step": 3051 + }, + { + "epoch": 45.55, + "learning_rate": 0.0002456491228070175, + "loss": 0.3108, + "step": 3052 + }, + { + "epoch": 45.57, + "learning_rate": 0.00024561403508771925, + "loss": 0.2828, + "step": 3053 + }, + { + "epoch": 45.58, + "learning_rate": 0.00024557894736842106, + "loss": 0.0839, + "step": 3054 + }, + { + "epoch": 45.59, + "learning_rate": 0.0002455438596491228, + "loss": 0.1829, + "step": 3055 + }, + { + "epoch": 45.61, + "learning_rate": 0.00024550877192982455, + "loss": 0.0478, + "step": 3056 + }, + { + "epoch": 45.62, + "learning_rate": 0.0002454736842105263, + "loss": 0.2361, + "step": 3057 + }, + { + "epoch": 45.64, + "learning_rate": 0.00024543859649122805, + "loss": 0.1828, + "step": 3058 + }, + { + "epoch": 45.65, + "learning_rate": 0.0002454035087719298, + "loss": 0.245, + "step": 3059 + }, + { + "epoch": 45.67, + "learning_rate": 0.00024536842105263155, + "loss": 0.139, + "step": 3060 + }, + { + "epoch": 45.68, + "learning_rate": 0.00024533333333333335, + "loss": 0.1808, + "step": 3061 + }, + { + "epoch": 45.7, + "learning_rate": 0.00024529824561403505, + "loss": 0.0683, + "step": 3062 + }, + { + "epoch": 45.71, + "learning_rate": 0.0002452631578947368, + "loss": 0.0367, + "step": 3063 + }, + { + "epoch": 45.73, + "learning_rate": 0.00024522807017543854, + "loss": 0.1152, + "step": 3064 + }, + { + "epoch": 45.74, + "learning_rate": 0.00024519298245614035, + "loss": 0.1368, + "step": 3065 + }, + { + "epoch": 45.76, + "learning_rate": 0.0002451578947368421, + "loss": 0.0907, + "step": 3066 + }, + { + "epoch": 45.77, + "learning_rate": 0.00024512280701754384, + "loss": 0.0304, + "step": 3067 + }, + { + "epoch": 45.79, + "learning_rate": 0.0002450877192982456, + "loss": 0.156, + "step": 3068 + }, + { + "epoch": 45.8, + "learning_rate": 0.00024505263157894734, + "loss": 0.3528, + "step": 3069 + }, + { + "epoch": 45.82, + "learning_rate": 0.0002450175438596491, + "loss": 0.0618, + "step": 3070 + }, + { + "epoch": 45.83, + "learning_rate": 0.00024498245614035084, + "loss": 0.2586, + "step": 3071 + }, + { + "epoch": 45.85, + "learning_rate": 0.00024494736842105264, + "loss": 0.2432, + "step": 3072 + }, + { + "epoch": 45.86, + "learning_rate": 0.0002449122807017544, + "loss": 0.1289, + "step": 3073 + }, + { + "epoch": 45.88, + "learning_rate": 0.00024487719298245614, + "loss": 0.0413, + "step": 3074 + }, + { + "epoch": 45.89, + "learning_rate": 0.0002448421052631579, + "loss": 0.228, + "step": 3075 + }, + { + "epoch": 45.91, + "learning_rate": 0.00024480701754385964, + "loss": 0.2158, + "step": 3076 + }, + { + "epoch": 45.92, + "learning_rate": 0.0002447719298245614, + "loss": 0.0989, + "step": 3077 + }, + { + "epoch": 45.94, + "learning_rate": 0.00024473684210526314, + "loss": 0.3633, + "step": 3078 + }, + { + "epoch": 45.95, + "learning_rate": 0.0002447017543859649, + "loss": 0.2802, + "step": 3079 + }, + { + "epoch": 45.97, + "learning_rate": 0.00024466666666666663, + "loss": 0.1715, + "step": 3080 + }, + { + "epoch": 45.98, + "learning_rate": 0.0002446315789473684, + "loss": 0.0474, + "step": 3081 + }, + { + "epoch": 46.0, + "learning_rate": 0.00024459649122807013, + "loss": 0.3234, + "step": 3082 + }, + { + "epoch": 46.01, + "learning_rate": 0.00024456140350877193, + "loss": 0.3223, + "step": 3083 + }, + { + "epoch": 46.03, + "learning_rate": 0.0002445263157894737, + "loss": 0.0872, + "step": 3084 + }, + { + "epoch": 46.04, + "learning_rate": 0.00024449122807017543, + "loss": 0.0712, + "step": 3085 + }, + { + "epoch": 46.06, + "learning_rate": 0.0002444561403508772, + "loss": 0.1009, + "step": 3086 + }, + { + "epoch": 46.07, + "learning_rate": 0.00024442105263157893, + "loss": 0.3466, + "step": 3087 + }, + { + "epoch": 46.09, + "learning_rate": 0.0002443859649122807, + "loss": 0.3214, + "step": 3088 + }, + { + "epoch": 46.1, + "learning_rate": 0.0002443508771929824, + "loss": 0.507, + "step": 3089 + }, + { + "epoch": 46.12, + "learning_rate": 0.0002443157894736842, + "loss": 0.0643, + "step": 3090 + }, + { + "epoch": 46.13, + "learning_rate": 0.000244280701754386, + "loss": 0.0152, + "step": 3091 + }, + { + "epoch": 46.15, + "learning_rate": 0.0002442456140350877, + "loss": 0.0998, + "step": 3092 + }, + { + "epoch": 46.16, + "learning_rate": 0.0002442105263157894, + "loss": 0.0875, + "step": 3093 + }, + { + "epoch": 46.18, + "learning_rate": 0.00024417543859649117, + "loss": 0.1965, + "step": 3094 + }, + { + "epoch": 46.19, + "learning_rate": 0.00024414035087719297, + "loss": 0.124, + "step": 3095 + }, + { + "epoch": 46.21, + "learning_rate": 0.00024410526315789472, + "loss": 0.0267, + "step": 3096 + }, + { + "epoch": 46.22, + "learning_rate": 0.00024407017543859647, + "loss": 0.0135, + "step": 3097 + }, + { + "epoch": 46.24, + "learning_rate": 0.00024403508771929825, + "loss": 0.1685, + "step": 3098 + }, + { + "epoch": 46.25, + "learning_rate": 0.000244, + "loss": 0.2474, + "step": 3099 + }, + { + "epoch": 46.27, + "learning_rate": 0.00024396491228070172, + "loss": 0.1671, + "step": 3100 + }, + { + "epoch": 46.28, + "learning_rate": 0.00024392982456140347, + "loss": 0.1471, + "step": 3101 + }, + { + "epoch": 46.3, + "learning_rate": 0.00024389473684210524, + "loss": 0.2128, + "step": 3102 + }, + { + "epoch": 46.31, + "learning_rate": 0.000243859649122807, + "loss": 0.0124, + "step": 3103 + }, + { + "epoch": 46.33, + "learning_rate": 0.00024382456140350874, + "loss": 0.3404, + "step": 3104 + }, + { + "epoch": 46.34, + "learning_rate": 0.0002437894736842105, + "loss": 0.1251, + "step": 3105 + }, + { + "epoch": 46.36, + "learning_rate": 0.00024375438596491226, + "loss": 0.1597, + "step": 3106 + }, + { + "epoch": 46.37, + "learning_rate": 0.000243719298245614, + "loss": 0.0564, + "step": 3107 + }, + { + "epoch": 46.39, + "learning_rate": 0.00024368421052631576, + "loss": 0.0055, + "step": 3108 + }, + { + "epoch": 46.4, + "learning_rate": 0.00024364912280701754, + "loss": 0.206, + "step": 3109 + }, + { + "epoch": 46.42, + "learning_rate": 0.00024361403508771929, + "loss": 0.1958, + "step": 3110 + }, + { + "epoch": 46.43, + "learning_rate": 0.00024357894736842103, + "loss": 0.2692, + "step": 3111 + }, + { + "epoch": 46.45, + "learning_rate": 0.00024354385964912278, + "loss": 0.045, + "step": 3112 + }, + { + "epoch": 46.46, + "learning_rate": 0.00024350877192982456, + "loss": 0.1451, + "step": 3113 + }, + { + "epoch": 46.48, + "learning_rate": 0.0002434736842105263, + "loss": 0.066, + "step": 3114 + }, + { + "epoch": 46.49, + "learning_rate": 0.00024343859649122806, + "loss": 0.0914, + "step": 3115 + }, + { + "epoch": 46.51, + "learning_rate": 0.00024340350877192978, + "loss": 0.4548, + "step": 3116 + }, + { + "epoch": 46.52, + "learning_rate": 0.00024336842105263158, + "loss": 0.2507, + "step": 3117 + }, + { + "epoch": 46.54, + "learning_rate": 0.0002433333333333333, + "loss": 0.0096, + "step": 3118 + }, + { + "epoch": 46.55, + "learning_rate": 0.00024329824561403505, + "loss": 0.1576, + "step": 3119 + }, + { + "epoch": 46.57, + "learning_rate": 0.0002432631578947368, + "loss": 0.2727, + "step": 3120 + }, + { + "epoch": 46.58, + "learning_rate": 0.00024322807017543858, + "loss": 0.0633, + "step": 3121 + }, + { + "epoch": 46.59, + "learning_rate": 0.00024319298245614033, + "loss": 0.0677, + "step": 3122 + }, + { + "epoch": 46.61, + "learning_rate": 0.00024315789473684207, + "loss": 0.2125, + "step": 3123 + }, + { + "epoch": 46.62, + "learning_rate": 0.00024312280701754385, + "loss": 0.06, + "step": 3124 + }, + { + "epoch": 46.64, + "learning_rate": 0.0002430877192982456, + "loss": 0.0515, + "step": 3125 + }, + { + "epoch": 46.65, + "learning_rate": 0.00024305263157894735, + "loss": 0.0243, + "step": 3126 + }, + { + "epoch": 46.67, + "learning_rate": 0.0002430175438596491, + "loss": 0.1532, + "step": 3127 + }, + { + "epoch": 46.68, + "learning_rate": 0.00024298245614035087, + "loss": 0.2456, + "step": 3128 + }, + { + "epoch": 46.7, + "learning_rate": 0.00024294736842105262, + "loss": 0.034, + "step": 3129 + }, + { + "epoch": 46.71, + "learning_rate": 0.00024291228070175437, + "loss": 0.1093, + "step": 3130 + }, + { + "epoch": 46.73, + "learning_rate": 0.0002428771929824561, + "loss": 0.0393, + "step": 3131 + }, + { + "epoch": 46.74, + "learning_rate": 0.0002428421052631579, + "loss": 0.1327, + "step": 3132 + }, + { + "epoch": 46.76, + "learning_rate": 0.00024280701754385962, + "loss": 0.1199, + "step": 3133 + }, + { + "epoch": 46.77, + "learning_rate": 0.00024277192982456136, + "loss": 0.358, + "step": 3134 + }, + { + "epoch": 46.79, + "learning_rate": 0.00024273684210526314, + "loss": 0.08, + "step": 3135 + }, + { + "epoch": 46.8, + "learning_rate": 0.0002427017543859649, + "loss": 0.3817, + "step": 3136 + }, + { + "epoch": 46.82, + "learning_rate": 0.00024266666666666664, + "loss": 0.0779, + "step": 3137 + }, + { + "epoch": 46.83, + "learning_rate": 0.0002426315789473684, + "loss": 0.1329, + "step": 3138 + }, + { + "epoch": 46.85, + "learning_rate": 0.00024259649122807016, + "loss": 0.1074, + "step": 3139 + }, + { + "epoch": 46.86, + "learning_rate": 0.0002425614035087719, + "loss": 0.1433, + "step": 3140 + }, + { + "epoch": 46.88, + "learning_rate": 0.00024252631578947366, + "loss": 0.1577, + "step": 3141 + }, + { + "epoch": 46.89, + "learning_rate": 0.0002424912280701754, + "loss": 0.3007, + "step": 3142 + }, + { + "epoch": 46.91, + "learning_rate": 0.00024245614035087719, + "loss": 0.2814, + "step": 3143 + }, + { + "epoch": 46.92, + "learning_rate": 0.00024242105263157893, + "loss": 0.3594, + "step": 3144 + }, + { + "epoch": 46.94, + "learning_rate": 0.00024238596491228068, + "loss": 0.1507, + "step": 3145 + }, + { + "epoch": 46.95, + "learning_rate": 0.00024235087719298246, + "loss": 0.1063, + "step": 3146 + }, + { + "epoch": 46.97, + "learning_rate": 0.0002423157894736842, + "loss": 0.2693, + "step": 3147 + }, + { + "epoch": 46.98, + "learning_rate": 0.00024228070175438593, + "loss": 0.112, + "step": 3148 + }, + { + "epoch": 47.0, + "learning_rate": 0.00024224561403508768, + "loss": 0.3829, + "step": 3149 + }, + { + "epoch": 47.01, + "learning_rate": 0.00024221052631578945, + "loss": 0.1906, + "step": 3150 + }, + { + "epoch": 47.03, + "learning_rate": 0.0002421754385964912, + "loss": 0.1587, + "step": 3151 + }, + { + "epoch": 47.04, + "learning_rate": 0.00024214035087719295, + "loss": 0.1186, + "step": 3152 + }, + { + "epoch": 47.06, + "learning_rate": 0.0002421052631578947, + "loss": 0.1939, + "step": 3153 + }, + { + "epoch": 47.07, + "learning_rate": 0.00024207017543859648, + "loss": 0.0562, + "step": 3154 + }, + { + "epoch": 47.09, + "learning_rate": 0.00024203508771929822, + "loss": 0.2518, + "step": 3155 + }, + { + "epoch": 47.1, + "learning_rate": 0.00024199999999999997, + "loss": 0.255, + "step": 3156 + }, + { + "epoch": 47.12, + "learning_rate": 0.00024196491228070172, + "loss": 0.0486, + "step": 3157 + }, + { + "epoch": 47.13, + "learning_rate": 0.0002419298245614035, + "loss": 0.2381, + "step": 3158 + }, + { + "epoch": 47.15, + "learning_rate": 0.00024189473684210525, + "loss": 0.1361, + "step": 3159 + }, + { + "epoch": 47.16, + "learning_rate": 0.000241859649122807, + "loss": 0.157, + "step": 3160 + }, + { + "epoch": 47.18, + "learning_rate": 0.00024182456140350877, + "loss": 0.5923, + "step": 3161 + }, + { + "epoch": 47.19, + "learning_rate": 0.00024178947368421052, + "loss": 0.1404, + "step": 3162 + }, + { + "epoch": 47.21, + "learning_rate": 0.00024175438596491227, + "loss": 0.013, + "step": 3163 + }, + { + "epoch": 47.22, + "learning_rate": 0.000241719298245614, + "loss": 0.0112, + "step": 3164 + }, + { + "epoch": 47.24, + "learning_rate": 0.0002416842105263158, + "loss": 0.2291, + "step": 3165 + }, + { + "epoch": 47.25, + "learning_rate": 0.00024164912280701752, + "loss": 0.1353, + "step": 3166 + }, + { + "epoch": 47.27, + "learning_rate": 0.00024161403508771926, + "loss": 0.0393, + "step": 3167 + }, + { + "epoch": 47.28, + "learning_rate": 0.000241578947368421, + "loss": 0.0808, + "step": 3168 + }, + { + "epoch": 47.3, + "learning_rate": 0.0002415438596491228, + "loss": 0.0112, + "step": 3169 + }, + { + "epoch": 47.31, + "learning_rate": 0.00024150877192982454, + "loss": 0.0361, + "step": 3170 + }, + { + "epoch": 47.33, + "learning_rate": 0.00024147368421052629, + "loss": 0.1137, + "step": 3171 + }, + { + "epoch": 47.34, + "learning_rate": 0.00024143859649122806, + "loss": 0.0466, + "step": 3172 + }, + { + "epoch": 47.36, + "learning_rate": 0.0002414035087719298, + "loss": 0.0181, + "step": 3173 + }, + { + "epoch": 47.37, + "learning_rate": 0.00024136842105263156, + "loss": 0.0182, + "step": 3174 + }, + { + "epoch": 47.39, + "learning_rate": 0.0002413333333333333, + "loss": 0.0145, + "step": 3175 + }, + { + "epoch": 47.4, + "learning_rate": 0.00024129824561403508, + "loss": 0.2759, + "step": 3176 + }, + { + "epoch": 47.42, + "learning_rate": 0.00024126315789473683, + "loss": 0.1418, + "step": 3177 + }, + { + "epoch": 47.43, + "learning_rate": 0.00024122807017543858, + "loss": 0.1672, + "step": 3178 + }, + { + "epoch": 47.45, + "learning_rate": 0.0002411929824561403, + "loss": 0.1402, + "step": 3179 + }, + { + "epoch": 47.46, + "learning_rate": 0.0002411578947368421, + "loss": 0.1563, + "step": 3180 + }, + { + "epoch": 47.48, + "learning_rate": 0.00024112280701754383, + "loss": 0.0914, + "step": 3181 + }, + { + "epoch": 47.49, + "learning_rate": 0.00024108771929824558, + "loss": 0.0067, + "step": 3182 + }, + { + "epoch": 47.51, + "learning_rate": 0.00024105263157894735, + "loss": 0.1717, + "step": 3183 + }, + { + "epoch": 47.52, + "learning_rate": 0.0002410175438596491, + "loss": 0.2388, + "step": 3184 + }, + { + "epoch": 47.54, + "learning_rate": 0.00024098245614035085, + "loss": 0.561, + "step": 3185 + }, + { + "epoch": 47.55, + "learning_rate": 0.0002409473684210526, + "loss": 0.2598, + "step": 3186 + }, + { + "epoch": 47.57, + "learning_rate": 0.00024091228070175438, + "loss": 0.0856, + "step": 3187 + }, + { + "epoch": 47.58, + "learning_rate": 0.00024087719298245612, + "loss": 0.2389, + "step": 3188 + }, + { + "epoch": 47.59, + "learning_rate": 0.00024084210526315787, + "loss": 0.0659, + "step": 3189 + }, + { + "epoch": 47.61, + "learning_rate": 0.00024080701754385962, + "loss": 0.1238, + "step": 3190 + }, + { + "epoch": 47.62, + "learning_rate": 0.0002407719298245614, + "loss": 0.0759, + "step": 3191 + }, + { + "epoch": 47.64, + "learning_rate": 0.00024073684210526315, + "loss": 0.1634, + "step": 3192 + }, + { + "epoch": 47.65, + "learning_rate": 0.0002407017543859649, + "loss": 0.184, + "step": 3193 + }, + { + "epoch": 47.67, + "learning_rate": 0.00024066666666666662, + "loss": 0.2784, + "step": 3194 + }, + { + "epoch": 47.68, + "learning_rate": 0.00024063157894736842, + "loss": 0.1188, + "step": 3195 + }, + { + "epoch": 47.7, + "learning_rate": 0.00024059649122807014, + "loss": 0.0494, + "step": 3196 + }, + { + "epoch": 47.71, + "learning_rate": 0.0002405614035087719, + "loss": 0.0575, + "step": 3197 + }, + { + "epoch": 47.73, + "learning_rate": 0.00024052631578947367, + "loss": 0.2455, + "step": 3198 + }, + { + "epoch": 47.74, + "learning_rate": 0.00024049122807017541, + "loss": 0.1662, + "step": 3199 + }, + { + "epoch": 47.76, + "learning_rate": 0.00024045614035087716, + "loss": 0.2306, + "step": 3200 + }, + { + "epoch": 47.76, + "eval_accuracy": 0.8252569750367107, + "eval_f1": 0.8263631777806041, + "eval_loss": 0.6788427233695984, + "eval_runtime": 344.1209, + "eval_samples_per_second": 11.874, + "eval_steps_per_second": 0.744, + "step": 3200 + }, + { + "epoch": 47.77, + "learning_rate": 0.0002404210526315789, + "loss": 0.0692, + "step": 3201 + }, + { + "epoch": 47.79, + "learning_rate": 0.0002403859649122807, + "loss": 0.0733, + "step": 3202 + }, + { + "epoch": 47.8, + "learning_rate": 0.00024035087719298244, + "loss": 0.2759, + "step": 3203 + }, + { + "epoch": 47.82, + "learning_rate": 0.00024031578947368419, + "loss": 0.0444, + "step": 3204 + }, + { + "epoch": 47.83, + "learning_rate": 0.00024028070175438593, + "loss": 0.5526, + "step": 3205 + }, + { + "epoch": 47.85, + "learning_rate": 0.0002402456140350877, + "loss": 0.1555, + "step": 3206 + }, + { + "epoch": 47.86, + "learning_rate": 0.00024021052631578946, + "loss": 0.1475, + "step": 3207 + }, + { + "epoch": 47.88, + "learning_rate": 0.0002401754385964912, + "loss": 0.1136, + "step": 3208 + }, + { + "epoch": 47.89, + "learning_rate": 0.00024014035087719298, + "loss": 0.0093, + "step": 3209 + }, + { + "epoch": 47.91, + "learning_rate": 0.00024010526315789473, + "loss": 0.0347, + "step": 3210 + }, + { + "epoch": 47.92, + "learning_rate": 0.00024007017543859648, + "loss": 0.1611, + "step": 3211 + }, + { + "epoch": 47.94, + "learning_rate": 0.0002400350877192982, + "loss": 0.103, + "step": 3212 + }, + { + "epoch": 47.95, + "learning_rate": 0.00023999999999999998, + "loss": 0.1416, + "step": 3213 + }, + { + "epoch": 47.97, + "learning_rate": 0.00023996491228070173, + "loss": 0.3555, + "step": 3214 + }, + { + "epoch": 47.98, + "learning_rate": 0.00023992982456140348, + "loss": 0.3259, + "step": 3215 + }, + { + "epoch": 48.0, + "learning_rate": 0.00023989473684210523, + "loss": 0.0298, + "step": 3216 + }, + { + "epoch": 48.01, + "learning_rate": 0.000239859649122807, + "loss": 0.3933, + "step": 3217 + }, + { + "epoch": 48.03, + "learning_rate": 0.00023982456140350875, + "loss": 0.0306, + "step": 3218 + }, + { + "epoch": 48.04, + "learning_rate": 0.0002397894736842105, + "loss": 0.242, + "step": 3219 + }, + { + "epoch": 48.06, + "learning_rate": 0.00023975438596491225, + "loss": 0.0583, + "step": 3220 + }, + { + "epoch": 48.07, + "learning_rate": 0.00023971929824561402, + "loss": 0.0489, + "step": 3221 + }, + { + "epoch": 48.09, + "learning_rate": 0.00023968421052631577, + "loss": 0.2804, + "step": 3222 + }, + { + "epoch": 48.1, + "learning_rate": 0.00023964912280701752, + "loss": 0.2151, + "step": 3223 + }, + { + "epoch": 48.12, + "learning_rate": 0.0002396140350877193, + "loss": 0.1241, + "step": 3224 + }, + { + "epoch": 48.13, + "learning_rate": 0.00023957894736842105, + "loss": 0.0905, + "step": 3225 + }, + { + "epoch": 48.15, + "learning_rate": 0.0002395438596491228, + "loss": 0.0301, + "step": 3226 + }, + { + "epoch": 48.16, + "learning_rate": 0.00023950877192982452, + "loss": 0.045, + "step": 3227 + }, + { + "epoch": 48.18, + "learning_rate": 0.00023947368421052632, + "loss": 0.1225, + "step": 3228 + }, + { + "epoch": 48.19, + "learning_rate": 0.00023943859649122804, + "loss": 0.0339, + "step": 3229 + }, + { + "epoch": 48.21, + "learning_rate": 0.0002394035087719298, + "loss": 0.0699, + "step": 3230 + }, + { + "epoch": 48.22, + "learning_rate": 0.00023936842105263154, + "loss": 0.0094, + "step": 3231 + }, + { + "epoch": 48.24, + "learning_rate": 0.00023933333333333331, + "loss": 0.3309, + "step": 3232 + }, + { + "epoch": 48.25, + "learning_rate": 0.00023929824561403506, + "loss": 0.0171, + "step": 3233 + }, + { + "epoch": 48.27, + "learning_rate": 0.0002392631578947368, + "loss": 0.1391, + "step": 3234 + }, + { + "epoch": 48.28, + "learning_rate": 0.0002392280701754386, + "loss": 0.3318, + "step": 3235 + }, + { + "epoch": 48.3, + "learning_rate": 0.00023919298245614034, + "loss": 0.0258, + "step": 3236 + }, + { + "epoch": 48.31, + "learning_rate": 0.00023915789473684209, + "loss": 0.0946, + "step": 3237 + }, + { + "epoch": 48.33, + "learning_rate": 0.00023912280701754383, + "loss": 0.2643, + "step": 3238 + }, + { + "epoch": 48.34, + "learning_rate": 0.0002390877192982456, + "loss": 0.3604, + "step": 3239 + }, + { + "epoch": 48.36, + "learning_rate": 0.00023905263157894736, + "loss": 0.1013, + "step": 3240 + }, + { + "epoch": 48.37, + "learning_rate": 0.0002390175438596491, + "loss": 0.3569, + "step": 3241 + }, + { + "epoch": 48.39, + "learning_rate": 0.00023898245614035083, + "loss": 0.0471, + "step": 3242 + }, + { + "epoch": 48.4, + "learning_rate": 0.00023894736842105263, + "loss": 0.1155, + "step": 3243 + }, + { + "epoch": 48.42, + "learning_rate": 0.00023891228070175435, + "loss": 0.0065, + "step": 3244 + }, + { + "epoch": 48.43, + "learning_rate": 0.0002388771929824561, + "loss": 0.0241, + "step": 3245 + }, + { + "epoch": 48.45, + "learning_rate": 0.00023884210526315788, + "loss": 0.0109, + "step": 3246 + }, + { + "epoch": 48.46, + "learning_rate": 0.00023880701754385963, + "loss": 0.0422, + "step": 3247 + }, + { + "epoch": 48.48, + "learning_rate": 0.00023877192982456138, + "loss": 0.1441, + "step": 3248 + }, + { + "epoch": 48.49, + "learning_rate": 0.00023873684210526312, + "loss": 0.0596, + "step": 3249 + }, + { + "epoch": 48.51, + "learning_rate": 0.0002387017543859649, + "loss": 0.0554, + "step": 3250 + }, + { + "epoch": 48.52, + "learning_rate": 0.00023866666666666665, + "loss": 0.1545, + "step": 3251 + }, + { + "epoch": 48.54, + "learning_rate": 0.0002386315789473684, + "loss": 0.235, + "step": 3252 + }, + { + "epoch": 48.55, + "learning_rate": 0.00023859649122807015, + "loss": 0.2796, + "step": 3253 + }, + { + "epoch": 48.57, + "learning_rate": 0.00023856140350877192, + "loss": 0.1655, + "step": 3254 + }, + { + "epoch": 48.58, + "learning_rate": 0.00023852631578947367, + "loss": 0.0283, + "step": 3255 + }, + { + "epoch": 48.59, + "learning_rate": 0.00023849122807017542, + "loss": 0.15, + "step": 3256 + }, + { + "epoch": 48.61, + "learning_rate": 0.00023845614035087717, + "loss": 0.1791, + "step": 3257 + }, + { + "epoch": 48.62, + "learning_rate": 0.00023842105263157895, + "loss": 0.167, + "step": 3258 + }, + { + "epoch": 48.64, + "learning_rate": 0.0002383859649122807, + "loss": 0.1388, + "step": 3259 + }, + { + "epoch": 48.65, + "learning_rate": 0.00023835087719298242, + "loss": 0.1566, + "step": 3260 + }, + { + "epoch": 48.67, + "learning_rate": 0.0002383157894736842, + "loss": 0.0098, + "step": 3261 + }, + { + "epoch": 48.68, + "learning_rate": 0.00023828070175438594, + "loss": 0.06, + "step": 3262 + }, + { + "epoch": 48.7, + "learning_rate": 0.0002382456140350877, + "loss": 0.2902, + "step": 3263 + }, + { + "epoch": 48.71, + "learning_rate": 0.00023821052631578944, + "loss": 0.0044, + "step": 3264 + }, + { + "epoch": 48.73, + "learning_rate": 0.00023817543859649121, + "loss": 0.3811, + "step": 3265 + }, + { + "epoch": 48.74, + "learning_rate": 0.00023814035087719296, + "loss": 0.5167, + "step": 3266 + }, + { + "epoch": 48.76, + "learning_rate": 0.0002381052631578947, + "loss": 0.1743, + "step": 3267 + }, + { + "epoch": 48.77, + "learning_rate": 0.00023807017543859646, + "loss": 0.0538, + "step": 3268 + }, + { + "epoch": 48.79, + "learning_rate": 0.00023803508771929824, + "loss": 0.4199, + "step": 3269 + }, + { + "epoch": 48.8, + "learning_rate": 0.00023799999999999998, + "loss": 0.2232, + "step": 3270 + }, + { + "epoch": 48.82, + "learning_rate": 0.00023796491228070173, + "loss": 0.1179, + "step": 3271 + }, + { + "epoch": 48.83, + "learning_rate": 0.0002379298245614035, + "loss": 0.2499, + "step": 3272 + }, + { + "epoch": 48.85, + "learning_rate": 0.00023789473684210526, + "loss": 0.2151, + "step": 3273 + }, + { + "epoch": 48.86, + "learning_rate": 0.000237859649122807, + "loss": 0.1367, + "step": 3274 + }, + { + "epoch": 48.88, + "learning_rate": 0.00023782456140350873, + "loss": 0.0687, + "step": 3275 + }, + { + "epoch": 48.89, + "learning_rate": 0.00023778947368421053, + "loss": 0.0195, + "step": 3276 + }, + { + "epoch": 48.91, + "learning_rate": 0.00023775438596491225, + "loss": 0.2018, + "step": 3277 + }, + { + "epoch": 48.92, + "learning_rate": 0.000237719298245614, + "loss": 0.187, + "step": 3278 + }, + { + "epoch": 48.94, + "learning_rate": 0.00023768421052631575, + "loss": 0.1456, + "step": 3279 + }, + { + "epoch": 48.95, + "learning_rate": 0.00023764912280701753, + "loss": 0.152, + "step": 3280 + }, + { + "epoch": 48.97, + "learning_rate": 0.00023761403508771928, + "loss": 0.0065, + "step": 3281 + }, + { + "epoch": 48.98, + "learning_rate": 0.00023757894736842102, + "loss": 0.9299, + "step": 3282 + }, + { + "epoch": 49.0, + "learning_rate": 0.00023754385964912277, + "loss": 0.2614, + "step": 3283 + }, + { + "epoch": 49.01, + "learning_rate": 0.00023750877192982455, + "loss": 0.3121, + "step": 3284 + }, + { + "epoch": 49.03, + "learning_rate": 0.0002374736842105263, + "loss": 0.2731, + "step": 3285 + }, + { + "epoch": 49.04, + "learning_rate": 0.00023743859649122805, + "loss": 0.5073, + "step": 3286 + }, + { + "epoch": 49.06, + "learning_rate": 0.00023740350877192982, + "loss": 0.1915, + "step": 3287 + }, + { + "epoch": 49.07, + "learning_rate": 0.00023736842105263157, + "loss": 0.1298, + "step": 3288 + }, + { + "epoch": 49.09, + "learning_rate": 0.00023733333333333332, + "loss": 0.1686, + "step": 3289 + }, + { + "epoch": 49.1, + "learning_rate": 0.00023729824561403504, + "loss": 0.0814, + "step": 3290 + }, + { + "epoch": 49.12, + "learning_rate": 0.00023726315789473684, + "loss": 0.221, + "step": 3291 + }, + { + "epoch": 49.13, + "learning_rate": 0.00023722807017543857, + "loss": 0.1197, + "step": 3292 + }, + { + "epoch": 49.15, + "learning_rate": 0.00023719298245614031, + "loss": 0.4632, + "step": 3293 + }, + { + "epoch": 49.16, + "learning_rate": 0.00023715789473684206, + "loss": 0.4504, + "step": 3294 + }, + { + "epoch": 49.18, + "learning_rate": 0.00023712280701754384, + "loss": 0.0194, + "step": 3295 + }, + { + "epoch": 49.19, + "learning_rate": 0.0002370877192982456, + "loss": 0.1115, + "step": 3296 + }, + { + "epoch": 49.21, + "learning_rate": 0.00023705263157894734, + "loss": 0.194, + "step": 3297 + }, + { + "epoch": 49.22, + "learning_rate": 0.0002370175438596491, + "loss": 0.1706, + "step": 3298 + }, + { + "epoch": 49.24, + "learning_rate": 0.00023698245614035086, + "loss": 0.2053, + "step": 3299 + }, + { + "epoch": 49.25, + "learning_rate": 0.0002369473684210526, + "loss": 0.0102, + "step": 3300 + }, + { + "epoch": 49.27, + "learning_rate": 0.00023691228070175436, + "loss": 0.1797, + "step": 3301 + }, + { + "epoch": 49.28, + "learning_rate": 0.00023687719298245614, + "loss": 0.1465, + "step": 3302 + }, + { + "epoch": 49.3, + "learning_rate": 0.00023684210526315788, + "loss": 0.2035, + "step": 3303 + }, + { + "epoch": 49.31, + "learning_rate": 0.00023680701754385963, + "loss": 0.266, + "step": 3304 + }, + { + "epoch": 49.33, + "learning_rate": 0.00023677192982456138, + "loss": 0.2117, + "step": 3305 + }, + { + "epoch": 49.34, + "learning_rate": 0.00023673684210526316, + "loss": 0.1285, + "step": 3306 + }, + { + "epoch": 49.36, + "learning_rate": 0.00023670175438596488, + "loss": 0.156, + "step": 3307 + }, + { + "epoch": 49.37, + "learning_rate": 0.00023666666666666663, + "loss": 0.2169, + "step": 3308 + }, + { + "epoch": 49.39, + "learning_rate": 0.0002366315789473684, + "loss": 0.0067, + "step": 3309 + }, + { + "epoch": 49.4, + "learning_rate": 0.00023659649122807015, + "loss": 0.0784, + "step": 3310 + }, + { + "epoch": 49.42, + "learning_rate": 0.0002365614035087719, + "loss": 0.1451, + "step": 3311 + }, + { + "epoch": 49.43, + "learning_rate": 0.00023652631578947365, + "loss": 0.0804, + "step": 3312 + }, + { + "epoch": 49.45, + "learning_rate": 0.00023649122807017543, + "loss": 0.155, + "step": 3313 + }, + { + "epoch": 49.46, + "learning_rate": 0.00023645614035087717, + "loss": 0.1286, + "step": 3314 + }, + { + "epoch": 49.48, + "learning_rate": 0.00023642105263157892, + "loss": 0.0605, + "step": 3315 + }, + { + "epoch": 49.49, + "learning_rate": 0.00023638596491228067, + "loss": 0.0368, + "step": 3316 + }, + { + "epoch": 49.51, + "learning_rate": 0.00023635087719298245, + "loss": 0.3509, + "step": 3317 + }, + { + "epoch": 49.52, + "learning_rate": 0.0002363157894736842, + "loss": 0.226, + "step": 3318 + }, + { + "epoch": 49.54, + "learning_rate": 0.00023628070175438595, + "loss": 0.2064, + "step": 3319 + }, + { + "epoch": 49.55, + "learning_rate": 0.0002362456140350877, + "loss": 0.0506, + "step": 3320 + }, + { + "epoch": 49.57, + "learning_rate": 0.00023621052631578947, + "loss": 0.0928, + "step": 3321 + }, + { + "epoch": 49.58, + "learning_rate": 0.00023617543859649122, + "loss": 0.1743, + "step": 3322 + }, + { + "epoch": 49.59, + "learning_rate": 0.00023614035087719294, + "loss": 0.0956, + "step": 3323 + }, + { + "epoch": 49.61, + "learning_rate": 0.00023610526315789474, + "loss": 0.2717, + "step": 3324 + }, + { + "epoch": 49.62, + "learning_rate": 0.00023607017543859647, + "loss": 0.0131, + "step": 3325 + }, + { + "epoch": 49.64, + "learning_rate": 0.00023603508771929821, + "loss": 0.1284, + "step": 3326 + }, + { + "epoch": 49.65, + "learning_rate": 0.00023599999999999996, + "loss": 0.0224, + "step": 3327 + }, + { + "epoch": 49.67, + "learning_rate": 0.00023596491228070174, + "loss": 0.2971, + "step": 3328 + }, + { + "epoch": 49.68, + "learning_rate": 0.0002359298245614035, + "loss": 0.0894, + "step": 3329 + }, + { + "epoch": 49.7, + "learning_rate": 0.00023589473684210524, + "loss": 0.0096, + "step": 3330 + }, + { + "epoch": 49.71, + "learning_rate": 0.00023585964912280699, + "loss": 0.1581, + "step": 3331 + }, + { + "epoch": 49.73, + "learning_rate": 0.00023582456140350876, + "loss": 0.0086, + "step": 3332 + }, + { + "epoch": 49.74, + "learning_rate": 0.0002357894736842105, + "loss": 0.2042, + "step": 3333 + }, + { + "epoch": 49.76, + "learning_rate": 0.00023575438596491226, + "loss": 0.0336, + "step": 3334 + }, + { + "epoch": 49.77, + "learning_rate": 0.00023571929824561403, + "loss": 0.047, + "step": 3335 + }, + { + "epoch": 49.79, + "learning_rate": 0.00023568421052631578, + "loss": 0.114, + "step": 3336 + }, + { + "epoch": 49.8, + "learning_rate": 0.00023564912280701753, + "loss": 0.0184, + "step": 3337 + }, + { + "epoch": 49.82, + "learning_rate": 0.00023561403508771925, + "loss": 0.2492, + "step": 3338 + }, + { + "epoch": 49.83, + "learning_rate": 0.00023557894736842106, + "loss": 0.0591, + "step": 3339 + }, + { + "epoch": 49.85, + "learning_rate": 0.00023554385964912278, + "loss": 0.0539, + "step": 3340 + }, + { + "epoch": 49.86, + "learning_rate": 0.00023550877192982453, + "loss": 0.0631, + "step": 3341 + }, + { + "epoch": 49.88, + "learning_rate": 0.00023547368421052628, + "loss": 0.0913, + "step": 3342 + }, + { + "epoch": 49.89, + "learning_rate": 0.00023543859649122805, + "loss": 0.1166, + "step": 3343 + }, + { + "epoch": 49.91, + "learning_rate": 0.0002354035087719298, + "loss": 0.0261, + "step": 3344 + }, + { + "epoch": 49.92, + "learning_rate": 0.00023536842105263155, + "loss": 0.4387, + "step": 3345 + }, + { + "epoch": 49.94, + "learning_rate": 0.00023533333333333333, + "loss": 0.1136, + "step": 3346 + }, + { + "epoch": 49.95, + "learning_rate": 0.00023529824561403507, + "loss": 0.0142, + "step": 3347 + }, + { + "epoch": 49.97, + "learning_rate": 0.00023526315789473682, + "loss": 0.1371, + "step": 3348 + }, + { + "epoch": 49.98, + "learning_rate": 0.00023522807017543857, + "loss": 0.0146, + "step": 3349 + }, + { + "epoch": 50.0, + "learning_rate": 0.00023519298245614035, + "loss": 0.006, + "step": 3350 + }, + { + "epoch": 50.01, + "learning_rate": 0.0002351578947368421, + "loss": 0.013, + "step": 3351 + }, + { + "epoch": 50.03, + "learning_rate": 0.00023512280701754385, + "loss": 0.0484, + "step": 3352 + }, + { + "epoch": 50.04, + "learning_rate": 0.0002350877192982456, + "loss": 0.1242, + "step": 3353 + }, + { + "epoch": 50.06, + "learning_rate": 0.00023505263157894737, + "loss": 0.158, + "step": 3354 + }, + { + "epoch": 50.07, + "learning_rate": 0.0002350175438596491, + "loss": 0.2993, + "step": 3355 + }, + { + "epoch": 50.09, + "learning_rate": 0.00023498245614035084, + "loss": 0.035, + "step": 3356 + }, + { + "epoch": 50.1, + "learning_rate": 0.0002349473684210526, + "loss": 0.0617, + "step": 3357 + }, + { + "epoch": 50.12, + "learning_rate": 0.00023491228070175436, + "loss": 0.2287, + "step": 3358 + }, + { + "epoch": 50.13, + "learning_rate": 0.00023487719298245611, + "loss": 0.1619, + "step": 3359 + }, + { + "epoch": 50.15, + "learning_rate": 0.00023484210526315786, + "loss": 0.0349, + "step": 3360 + }, + { + "epoch": 50.16, + "learning_rate": 0.00023480701754385964, + "loss": 0.0262, + "step": 3361 + }, + { + "epoch": 50.18, + "learning_rate": 0.0002347719298245614, + "loss": 0.0156, + "step": 3362 + }, + { + "epoch": 50.19, + "learning_rate": 0.00023473684210526314, + "loss": 0.0146, + "step": 3363 + }, + { + "epoch": 50.21, + "learning_rate": 0.00023470175438596488, + "loss": 0.0343, + "step": 3364 + }, + { + "epoch": 50.22, + "learning_rate": 0.00023466666666666666, + "loss": 0.0789, + "step": 3365 + }, + { + "epoch": 50.24, + "learning_rate": 0.0002346315789473684, + "loss": 0.0122, + "step": 3366 + }, + { + "epoch": 50.25, + "learning_rate": 0.00023459649122807016, + "loss": 0.0105, + "step": 3367 + }, + { + "epoch": 50.27, + "learning_rate": 0.0002345614035087719, + "loss": 0.1957, + "step": 3368 + }, + { + "epoch": 50.28, + "learning_rate": 0.00023452631578947368, + "loss": 0.2115, + "step": 3369 + }, + { + "epoch": 50.3, + "learning_rate": 0.00023449122807017543, + "loss": 0.242, + "step": 3370 + }, + { + "epoch": 50.31, + "learning_rate": 0.00023445614035087715, + "loss": 0.3058, + "step": 3371 + }, + { + "epoch": 50.33, + "learning_rate": 0.00023442105263157896, + "loss": 0.0748, + "step": 3372 + }, + { + "epoch": 50.34, + "learning_rate": 0.00023438596491228068, + "loss": 0.0155, + "step": 3373 + }, + { + "epoch": 50.36, + "learning_rate": 0.00023435087719298243, + "loss": 0.3034, + "step": 3374 + }, + { + "epoch": 50.37, + "learning_rate": 0.00023431578947368418, + "loss": 0.0136, + "step": 3375 + }, + { + "epoch": 50.39, + "learning_rate": 0.00023428070175438595, + "loss": 0.1864, + "step": 3376 + }, + { + "epoch": 50.4, + "learning_rate": 0.0002342456140350877, + "loss": 0.1253, + "step": 3377 + }, + { + "epoch": 50.42, + "learning_rate": 0.00023421052631578945, + "loss": 0.1835, + "step": 3378 + }, + { + "epoch": 50.43, + "learning_rate": 0.0002341754385964912, + "loss": 0.1046, + "step": 3379 + }, + { + "epoch": 50.45, + "learning_rate": 0.00023414035087719297, + "loss": 0.036, + "step": 3380 + }, + { + "epoch": 50.46, + "learning_rate": 0.00023410526315789472, + "loss": 0.0947, + "step": 3381 + }, + { + "epoch": 50.48, + "learning_rate": 0.00023407017543859647, + "loss": 0.0058, + "step": 3382 + }, + { + "epoch": 50.49, + "learning_rate": 0.00023403508771929822, + "loss": 0.1306, + "step": 3383 + }, + { + "epoch": 50.51, + "learning_rate": 0.000234, + "loss": 0.1747, + "step": 3384 + }, + { + "epoch": 50.52, + "learning_rate": 0.00023396491228070174, + "loss": 0.037, + "step": 3385 + }, + { + "epoch": 50.54, + "learning_rate": 0.00023392982456140347, + "loss": 0.2375, + "step": 3386 + }, + { + "epoch": 50.55, + "learning_rate": 0.00023389473684210527, + "loss": 0.4548, + "step": 3387 + }, + { + "epoch": 50.57, + "learning_rate": 0.000233859649122807, + "loss": 0.0223, + "step": 3388 + }, + { + "epoch": 50.58, + "learning_rate": 0.00023382456140350874, + "loss": 0.0359, + "step": 3389 + }, + { + "epoch": 50.59, + "learning_rate": 0.0002337894736842105, + "loss": 0.1929, + "step": 3390 + }, + { + "epoch": 50.61, + "learning_rate": 0.00023375438596491226, + "loss": 0.0979, + "step": 3391 + }, + { + "epoch": 50.62, + "learning_rate": 0.000233719298245614, + "loss": 0.0213, + "step": 3392 + }, + { + "epoch": 50.64, + "learning_rate": 0.00023368421052631576, + "loss": 0.05, + "step": 3393 + }, + { + "epoch": 50.65, + "learning_rate": 0.0002336491228070175, + "loss": 0.2083, + "step": 3394 + }, + { + "epoch": 50.67, + "learning_rate": 0.0002336140350877193, + "loss": 0.5703, + "step": 3395 + }, + { + "epoch": 50.68, + "learning_rate": 0.00023357894736842104, + "loss": 0.0331, + "step": 3396 + }, + { + "epoch": 50.7, + "learning_rate": 0.00023354385964912278, + "loss": 0.0684, + "step": 3397 + }, + { + "epoch": 50.71, + "learning_rate": 0.00023350877192982456, + "loss": 0.1066, + "step": 3398 + }, + { + "epoch": 50.73, + "learning_rate": 0.0002334736842105263, + "loss": 0.1049, + "step": 3399 + }, + { + "epoch": 50.74, + "learning_rate": 0.00023343859649122806, + "loss": 0.0975, + "step": 3400 + }, + { + "epoch": 50.74, + "eval_accuracy": 0.828928046989721, + "eval_f1": 0.8295199623127844, + "eval_loss": 0.723612368106842, + "eval_runtime": 345.274, + "eval_samples_per_second": 11.834, + "eval_steps_per_second": 0.741, + "step": 3400 + }, + { + "epoch": 50.76, + "learning_rate": 0.00023340350877192978, + "loss": 0.0765, + "step": 3401 + }, + { + "epoch": 50.77, + "learning_rate": 0.00023336842105263158, + "loss": 0.0704, + "step": 3402 + }, + { + "epoch": 50.79, + "learning_rate": 0.0002333333333333333, + "loss": 0.0318, + "step": 3403 + }, + { + "epoch": 50.8, + "learning_rate": 0.00023329824561403505, + "loss": 0.2962, + "step": 3404 + }, + { + "epoch": 50.82, + "learning_rate": 0.0002332631578947368, + "loss": 0.0897, + "step": 3405 + }, + { + "epoch": 50.83, + "learning_rate": 0.00023322807017543858, + "loss": 0.3142, + "step": 3406 + }, + { + "epoch": 50.85, + "learning_rate": 0.00023319298245614033, + "loss": 0.2082, + "step": 3407 + }, + { + "epoch": 50.86, + "learning_rate": 0.00023315789473684207, + "loss": 0.0214, + "step": 3408 + }, + { + "epoch": 50.88, + "learning_rate": 0.00023312280701754385, + "loss": 0.0919, + "step": 3409 + }, + { + "epoch": 50.89, + "learning_rate": 0.0002330877192982456, + "loss": 0.1899, + "step": 3410 + }, + { + "epoch": 50.91, + "learning_rate": 0.00023305263157894735, + "loss": 0.1403, + "step": 3411 + }, + { + "epoch": 50.92, + "learning_rate": 0.0002330175438596491, + "loss": 0.0434, + "step": 3412 + }, + { + "epoch": 50.94, + "learning_rate": 0.00023298245614035087, + "loss": 0.0165, + "step": 3413 + }, + { + "epoch": 50.95, + "learning_rate": 0.00023294736842105262, + "loss": 0.1556, + "step": 3414 + }, + { + "epoch": 50.97, + "learning_rate": 0.00023291228070175437, + "loss": 0.1885, + "step": 3415 + }, + { + "epoch": 50.98, + "learning_rate": 0.00023287719298245612, + "loss": 0.0331, + "step": 3416 + }, + { + "epoch": 51.0, + "learning_rate": 0.0002328421052631579, + "loss": 0.1439, + "step": 3417 + }, + { + "epoch": 51.01, + "learning_rate": 0.00023280701754385964, + "loss": 0.1991, + "step": 3418 + }, + { + "epoch": 51.03, + "learning_rate": 0.00023277192982456137, + "loss": 0.2616, + "step": 3419 + }, + { + "epoch": 51.04, + "learning_rate": 0.00023273684210526311, + "loss": 0.0325, + "step": 3420 + }, + { + "epoch": 51.06, + "learning_rate": 0.0002327017543859649, + "loss": 0.1062, + "step": 3421 + }, + { + "epoch": 51.07, + "learning_rate": 0.00023266666666666664, + "loss": 0.0539, + "step": 3422 + }, + { + "epoch": 51.09, + "learning_rate": 0.0002326315789473684, + "loss": 0.3147, + "step": 3423 + }, + { + "epoch": 51.1, + "learning_rate": 0.00023259649122807016, + "loss": 0.2051, + "step": 3424 + }, + { + "epoch": 51.12, + "learning_rate": 0.0002325614035087719, + "loss": 0.0063, + "step": 3425 + }, + { + "epoch": 51.13, + "learning_rate": 0.00023252631578947366, + "loss": 0.1891, + "step": 3426 + }, + { + "epoch": 51.15, + "learning_rate": 0.0002324912280701754, + "loss": 0.2393, + "step": 3427 + }, + { + "epoch": 51.16, + "learning_rate": 0.00023245614035087719, + "loss": 0.2608, + "step": 3428 + }, + { + "epoch": 51.18, + "learning_rate": 0.00023242105263157893, + "loss": 0.155, + "step": 3429 + }, + { + "epoch": 51.19, + "learning_rate": 0.00023238596491228068, + "loss": 0.2852, + "step": 3430 + }, + { + "epoch": 51.21, + "learning_rate": 0.00023235087719298243, + "loss": 0.0219, + "step": 3431 + }, + { + "epoch": 51.22, + "learning_rate": 0.0002323157894736842, + "loss": 0.0044, + "step": 3432 + }, + { + "epoch": 51.24, + "learning_rate": 0.00023228070175438596, + "loss": 0.2911, + "step": 3433 + }, + { + "epoch": 51.25, + "learning_rate": 0.00023224561403508768, + "loss": 0.1041, + "step": 3434 + }, + { + "epoch": 51.27, + "learning_rate": 0.00023221052631578948, + "loss": 0.1581, + "step": 3435 + }, + { + "epoch": 51.28, + "learning_rate": 0.0002321754385964912, + "loss": 0.0615, + "step": 3436 + }, + { + "epoch": 51.3, + "learning_rate": 0.00023214035087719295, + "loss": 0.0482, + "step": 3437 + }, + { + "epoch": 51.31, + "learning_rate": 0.0002321052631578947, + "loss": 0.1814, + "step": 3438 + }, + { + "epoch": 51.33, + "learning_rate": 0.00023207017543859648, + "loss": 0.0087, + "step": 3439 + }, + { + "epoch": 51.34, + "learning_rate": 0.00023203508771929823, + "loss": 0.0068, + "step": 3440 + }, + { + "epoch": 51.36, + "learning_rate": 0.00023199999999999997, + "loss": 0.4871, + "step": 3441 + }, + { + "epoch": 51.37, + "learning_rate": 0.00023196491228070172, + "loss": 0.0375, + "step": 3442 + }, + { + "epoch": 51.39, + "learning_rate": 0.0002319298245614035, + "loss": 0.0656, + "step": 3443 + }, + { + "epoch": 51.4, + "learning_rate": 0.00023189473684210525, + "loss": 0.0113, + "step": 3444 + }, + { + "epoch": 51.42, + "learning_rate": 0.000231859649122807, + "loss": 0.0957, + "step": 3445 + }, + { + "epoch": 51.43, + "learning_rate": 0.00023182456140350875, + "loss": 0.0936, + "step": 3446 + }, + { + "epoch": 51.45, + "learning_rate": 0.00023178947368421052, + "loss": 0.2175, + "step": 3447 + }, + { + "epoch": 51.46, + "learning_rate": 0.00023175438596491227, + "loss": 0.1612, + "step": 3448 + }, + { + "epoch": 51.48, + "learning_rate": 0.000231719298245614, + "loss": 0.0313, + "step": 3449 + }, + { + "epoch": 51.49, + "learning_rate": 0.0002316842105263158, + "loss": 0.0096, + "step": 3450 + }, + { + "epoch": 51.51, + "learning_rate": 0.00023164912280701752, + "loss": 0.0376, + "step": 3451 + }, + { + "epoch": 51.52, + "learning_rate": 0.00023161403508771927, + "loss": 0.0071, + "step": 3452 + }, + { + "epoch": 51.54, + "learning_rate": 0.00023157894736842101, + "loss": 0.2736, + "step": 3453 + }, + { + "epoch": 51.55, + "learning_rate": 0.0002315438596491228, + "loss": 0.1893, + "step": 3454 + }, + { + "epoch": 51.57, + "learning_rate": 0.00023150877192982454, + "loss": 0.0625, + "step": 3455 + }, + { + "epoch": 51.58, + "learning_rate": 0.0002314736842105263, + "loss": 0.0211, + "step": 3456 + }, + { + "epoch": 51.59, + "learning_rate": 0.00023143859649122804, + "loss": 0.026, + "step": 3457 + }, + { + "epoch": 51.61, + "learning_rate": 0.0002314035087719298, + "loss": 0.0453, + "step": 3458 + }, + { + "epoch": 51.62, + "learning_rate": 0.00023136842105263156, + "loss": 0.0545, + "step": 3459 + }, + { + "epoch": 51.64, + "learning_rate": 0.0002313333333333333, + "loss": 0.0336, + "step": 3460 + }, + { + "epoch": 51.65, + "learning_rate": 0.00023129824561403509, + "loss": 0.0492, + "step": 3461 + }, + { + "epoch": 51.67, + "learning_rate": 0.00023126315789473683, + "loss": 0.2222, + "step": 3462 + }, + { + "epoch": 51.68, + "learning_rate": 0.00023122807017543858, + "loss": 0.006, + "step": 3463 + }, + { + "epoch": 51.7, + "learning_rate": 0.00023119298245614033, + "loss": 0.0502, + "step": 3464 + }, + { + "epoch": 51.71, + "learning_rate": 0.0002311578947368421, + "loss": 0.0217, + "step": 3465 + }, + { + "epoch": 51.73, + "learning_rate": 0.00023112280701754386, + "loss": 0.4115, + "step": 3466 + }, + { + "epoch": 51.74, + "learning_rate": 0.00023108771929824558, + "loss": 0.0063, + "step": 3467 + }, + { + "epoch": 51.76, + "learning_rate": 0.00023105263157894733, + "loss": 0.386, + "step": 3468 + }, + { + "epoch": 51.77, + "learning_rate": 0.0002310175438596491, + "loss": 0.0952, + "step": 3469 + }, + { + "epoch": 51.79, + "learning_rate": 0.00023098245614035085, + "loss": 0.075, + "step": 3470 + }, + { + "epoch": 51.8, + "learning_rate": 0.0002309473684210526, + "loss": 0.2486, + "step": 3471 + }, + { + "epoch": 51.82, + "learning_rate": 0.00023091228070175438, + "loss": 0.5256, + "step": 3472 + }, + { + "epoch": 51.83, + "learning_rate": 0.00023087719298245612, + "loss": 0.0866, + "step": 3473 + }, + { + "epoch": 51.85, + "learning_rate": 0.00023084210526315787, + "loss": 0.1161, + "step": 3474 + }, + { + "epoch": 51.86, + "learning_rate": 0.00023080701754385962, + "loss": 0.035, + "step": 3475 + }, + { + "epoch": 51.88, + "learning_rate": 0.0002307719298245614, + "loss": 0.0224, + "step": 3476 + }, + { + "epoch": 51.89, + "learning_rate": 0.00023073684210526315, + "loss": 0.0111, + "step": 3477 + }, + { + "epoch": 51.91, + "learning_rate": 0.0002307017543859649, + "loss": 0.0374, + "step": 3478 + }, + { + "epoch": 51.92, + "learning_rate": 0.00023066666666666664, + "loss": 0.0163, + "step": 3479 + }, + { + "epoch": 51.94, + "learning_rate": 0.00023063157894736842, + "loss": 0.1842, + "step": 3480 + }, + { + "epoch": 51.95, + "learning_rate": 0.00023059649122807017, + "loss": 0.2779, + "step": 3481 + }, + { + "epoch": 51.97, + "learning_rate": 0.0002305614035087719, + "loss": 0.0568, + "step": 3482 + }, + { + "epoch": 51.98, + "learning_rate": 0.00023052631578947364, + "loss": 0.2603, + "step": 3483 + }, + { + "epoch": 52.0, + "learning_rate": 0.00023049122807017542, + "loss": 0.126, + "step": 3484 + }, + { + "epoch": 52.01, + "learning_rate": 0.00023045614035087716, + "loss": 0.7035, + "step": 3485 + }, + { + "epoch": 52.03, + "learning_rate": 0.0002304210526315789, + "loss": 0.0054, + "step": 3486 + }, + { + "epoch": 52.04, + "learning_rate": 0.0002303859649122807, + "loss": 0.0937, + "step": 3487 + }, + { + "epoch": 52.06, + "learning_rate": 0.00023035087719298244, + "loss": 0.1501, + "step": 3488 + }, + { + "epoch": 52.07, + "learning_rate": 0.0002303157894736842, + "loss": 0.0795, + "step": 3489 + }, + { + "epoch": 52.09, + "learning_rate": 0.00023028070175438594, + "loss": 0.1251, + "step": 3490 + }, + { + "epoch": 52.1, + "learning_rate": 0.0002302456140350877, + "loss": 0.01, + "step": 3491 + }, + { + "epoch": 52.12, + "learning_rate": 0.00023021052631578946, + "loss": 0.3727, + "step": 3492 + }, + { + "epoch": 52.13, + "learning_rate": 0.0002301754385964912, + "loss": 0.0147, + "step": 3493 + }, + { + "epoch": 52.15, + "learning_rate": 0.00023014035087719296, + "loss": 0.2178, + "step": 3494 + }, + { + "epoch": 52.16, + "learning_rate": 0.00023010526315789473, + "loss": 0.0409, + "step": 3495 + }, + { + "epoch": 52.18, + "learning_rate": 0.00023007017543859648, + "loss": 0.1585, + "step": 3496 + }, + { + "epoch": 52.19, + "learning_rate": 0.0002300350877192982, + "loss": 0.0069, + "step": 3497 + }, + { + "epoch": 52.21, + "learning_rate": 0.00023, + "loss": 0.0145, + "step": 3498 + }, + { + "epoch": 52.22, + "learning_rate": 0.00022996491228070173, + "loss": 0.0131, + "step": 3499 + }, + { + "epoch": 52.24, + "learning_rate": 0.00022992982456140348, + "loss": 0.2495, + "step": 3500 + }, + { + "epoch": 52.25, + "learning_rate": 0.00022989473684210523, + "loss": 0.1867, + "step": 3501 + }, + { + "epoch": 52.27, + "learning_rate": 0.000229859649122807, + "loss": 0.0064, + "step": 3502 + }, + { + "epoch": 52.28, + "learning_rate": 0.00022982456140350875, + "loss": 0.0257, + "step": 3503 + }, + { + "epoch": 52.3, + "learning_rate": 0.0002297894736842105, + "loss": 0.0654, + "step": 3504 + }, + { + "epoch": 52.31, + "learning_rate": 0.00022975438596491225, + "loss": 0.0885, + "step": 3505 + }, + { + "epoch": 52.33, + "learning_rate": 0.00022971929824561402, + "loss": 0.0556, + "step": 3506 + }, + { + "epoch": 52.34, + "learning_rate": 0.00022968421052631577, + "loss": 0.0429, + "step": 3507 + }, + { + "epoch": 52.36, + "learning_rate": 0.00022964912280701752, + "loss": 0.0837, + "step": 3508 + }, + { + "epoch": 52.37, + "learning_rate": 0.0002296140350877193, + "loss": 0.1373, + "step": 3509 + }, + { + "epoch": 52.39, + "learning_rate": 0.00022957894736842105, + "loss": 0.022, + "step": 3510 + }, + { + "epoch": 52.4, + "learning_rate": 0.0002295438596491228, + "loss": 0.356, + "step": 3511 + }, + { + "epoch": 52.42, + "learning_rate": 0.00022950877192982454, + "loss": 0.0119, + "step": 3512 + }, + { + "epoch": 52.43, + "learning_rate": 0.00022947368421052632, + "loss": 0.0376, + "step": 3513 + }, + { + "epoch": 52.45, + "learning_rate": 0.00022943859649122804, + "loss": 0.0049, + "step": 3514 + }, + { + "epoch": 52.46, + "learning_rate": 0.0002294035087719298, + "loss": 0.1981, + "step": 3515 + }, + { + "epoch": 52.48, + "learning_rate": 0.00022936842105263154, + "loss": 0.2462, + "step": 3516 + }, + { + "epoch": 52.49, + "learning_rate": 0.00022933333333333332, + "loss": 0.0036, + "step": 3517 + }, + { + "epoch": 52.51, + "learning_rate": 0.00022929824561403506, + "loss": 0.2211, + "step": 3518 + }, + { + "epoch": 52.52, + "learning_rate": 0.0002292631578947368, + "loss": 0.1351, + "step": 3519 + }, + { + "epoch": 52.54, + "learning_rate": 0.00022922807017543856, + "loss": 0.1374, + "step": 3520 + }, + { + "epoch": 52.55, + "learning_rate": 0.00022919298245614034, + "loss": 0.2577, + "step": 3521 + }, + { + "epoch": 52.57, + "learning_rate": 0.00022915789473684209, + "loss": 0.3107, + "step": 3522 + }, + { + "epoch": 52.58, + "learning_rate": 0.00022912280701754383, + "loss": 0.3881, + "step": 3523 + }, + { + "epoch": 52.59, + "learning_rate": 0.0002290877192982456, + "loss": 0.0885, + "step": 3524 + }, + { + "epoch": 52.61, + "learning_rate": 0.00022905263157894736, + "loss": 0.0183, + "step": 3525 + }, + { + "epoch": 52.62, + "learning_rate": 0.0002290175438596491, + "loss": 0.006, + "step": 3526 + }, + { + "epoch": 52.64, + "learning_rate": 0.00022898245614035086, + "loss": 0.1872, + "step": 3527 + }, + { + "epoch": 52.65, + "learning_rate": 0.00022894736842105263, + "loss": 0.1191, + "step": 3528 + }, + { + "epoch": 52.67, + "learning_rate": 0.00022891228070175438, + "loss": 0.1761, + "step": 3529 + }, + { + "epoch": 52.68, + "learning_rate": 0.0002288771929824561, + "loss": 0.0031, + "step": 3530 + }, + { + "epoch": 52.7, + "learning_rate": 0.00022884210526315785, + "loss": 0.0431, + "step": 3531 + }, + { + "epoch": 52.71, + "learning_rate": 0.00022880701754385963, + "loss": 0.0034, + "step": 3532 + }, + { + "epoch": 52.73, + "learning_rate": 0.00022877192982456138, + "loss": 0.1489, + "step": 3533 + }, + { + "epoch": 52.74, + "learning_rate": 0.00022873684210526313, + "loss": 0.0079, + "step": 3534 + }, + { + "epoch": 52.76, + "learning_rate": 0.0002287017543859649, + "loss": 0.2767, + "step": 3535 + }, + { + "epoch": 52.77, + "learning_rate": 0.00022866666666666665, + "loss": 0.4586, + "step": 3536 + }, + { + "epoch": 52.79, + "learning_rate": 0.0002286315789473684, + "loss": 0.0055, + "step": 3537 + }, + { + "epoch": 52.8, + "learning_rate": 0.00022859649122807015, + "loss": 0.2642, + "step": 3538 + }, + { + "epoch": 52.82, + "learning_rate": 0.00022856140350877192, + "loss": 0.0057, + "step": 3539 + }, + { + "epoch": 52.83, + "learning_rate": 0.00022852631578947367, + "loss": 0.0305, + "step": 3540 + }, + { + "epoch": 52.85, + "learning_rate": 0.00022849122807017542, + "loss": 0.1817, + "step": 3541 + }, + { + "epoch": 52.86, + "learning_rate": 0.00022845614035087717, + "loss": 0.1287, + "step": 3542 + }, + { + "epoch": 52.88, + "learning_rate": 0.00022842105263157895, + "loss": 0.2467, + "step": 3543 + }, + { + "epoch": 52.89, + "learning_rate": 0.0002283859649122807, + "loss": 0.0116, + "step": 3544 + }, + { + "epoch": 52.91, + "learning_rate": 0.00022835087719298242, + "loss": 0.4328, + "step": 3545 + }, + { + "epoch": 52.92, + "learning_rate": 0.00022831578947368417, + "loss": 0.0037, + "step": 3546 + }, + { + "epoch": 52.94, + "learning_rate": 0.00022828070175438594, + "loss": 0.2324, + "step": 3547 + }, + { + "epoch": 52.95, + "learning_rate": 0.0002282456140350877, + "loss": 0.0079, + "step": 3548 + }, + { + "epoch": 52.97, + "learning_rate": 0.00022821052631578944, + "loss": 0.2288, + "step": 3549 + }, + { + "epoch": 52.98, + "learning_rate": 0.00022817543859649121, + "loss": 0.0985, + "step": 3550 + }, + { + "epoch": 53.0, + "learning_rate": 0.00022814035087719296, + "loss": 0.3461, + "step": 3551 + }, + { + "epoch": 53.01, + "learning_rate": 0.0002281052631578947, + "loss": 0.115, + "step": 3552 + }, + { + "epoch": 53.03, + "learning_rate": 0.00022807017543859646, + "loss": 0.1899, + "step": 3553 + }, + { + "epoch": 53.04, + "learning_rate": 0.00022803508771929824, + "loss": 0.0037, + "step": 3554 + }, + { + "epoch": 53.06, + "learning_rate": 0.00022799999999999999, + "loss": 0.0942, + "step": 3555 + }, + { + "epoch": 53.07, + "learning_rate": 0.00022796491228070173, + "loss": 0.1492, + "step": 3556 + }, + { + "epoch": 53.09, + "learning_rate": 0.00022792982456140348, + "loss": 0.014, + "step": 3557 + }, + { + "epoch": 53.1, + "learning_rate": 0.00022789473684210526, + "loss": 0.015, + "step": 3558 + }, + { + "epoch": 53.12, + "learning_rate": 0.000227859649122807, + "loss": 0.0556, + "step": 3559 + }, + { + "epoch": 53.13, + "learning_rate": 0.00022782456140350876, + "loss": 0.0535, + "step": 3560 + }, + { + "epoch": 53.15, + "learning_rate": 0.00022778947368421053, + "loss": 0.0459, + "step": 3561 + }, + { + "epoch": 53.16, + "learning_rate": 0.00022775438596491225, + "loss": 0.5427, + "step": 3562 + }, + { + "epoch": 53.18, + "learning_rate": 0.000227719298245614, + "loss": 0.0204, + "step": 3563 + }, + { + "epoch": 53.19, + "learning_rate": 0.00022768421052631575, + "loss": 0.0051, + "step": 3564 + }, + { + "epoch": 53.21, + "learning_rate": 0.00022764912280701753, + "loss": 0.1251, + "step": 3565 + }, + { + "epoch": 53.22, + "learning_rate": 0.00022761403508771928, + "loss": 0.1806, + "step": 3566 + }, + { + "epoch": 53.24, + "learning_rate": 0.00022757894736842102, + "loss": 0.0308, + "step": 3567 + }, + { + "epoch": 53.25, + "learning_rate": 0.00022754385964912277, + "loss": 0.2411, + "step": 3568 + }, + { + "epoch": 53.27, + "learning_rate": 0.00022750877192982455, + "loss": 0.2055, + "step": 3569 + }, + { + "epoch": 53.28, + "learning_rate": 0.0002274736842105263, + "loss": 0.0065, + "step": 3570 + }, + { + "epoch": 53.3, + "learning_rate": 0.00022743859649122805, + "loss": 0.0063, + "step": 3571 + }, + { + "epoch": 53.31, + "learning_rate": 0.00022740350877192982, + "loss": 0.1127, + "step": 3572 + }, + { + "epoch": 53.33, + "learning_rate": 0.00022736842105263157, + "loss": 0.15, + "step": 3573 + }, + { + "epoch": 53.34, + "learning_rate": 0.00022733333333333332, + "loss": 0.1083, + "step": 3574 + }, + { + "epoch": 53.36, + "learning_rate": 0.00022729824561403507, + "loss": 0.2448, + "step": 3575 + }, + { + "epoch": 53.37, + "learning_rate": 0.00022726315789473685, + "loss": 0.0562, + "step": 3576 + }, + { + "epoch": 53.39, + "learning_rate": 0.0002272280701754386, + "loss": 0.0156, + "step": 3577 + }, + { + "epoch": 53.4, + "learning_rate": 0.00022719298245614032, + "loss": 0.1385, + "step": 3578 + }, + { + "epoch": 53.42, + "learning_rate": 0.00022715789473684206, + "loss": 0.2497, + "step": 3579 + }, + { + "epoch": 53.43, + "learning_rate": 0.00022712280701754384, + "loss": 0.1978, + "step": 3580 + }, + { + "epoch": 53.45, + "learning_rate": 0.0002270877192982456, + "loss": 0.0094, + "step": 3581 + }, + { + "epoch": 53.46, + "learning_rate": 0.00022705263157894734, + "loss": 0.0449, + "step": 3582 + }, + { + "epoch": 53.48, + "learning_rate": 0.0002270175438596491, + "loss": 0.092, + "step": 3583 + }, + { + "epoch": 53.49, + "learning_rate": 0.00022698245614035086, + "loss": 0.0045, + "step": 3584 + }, + { + "epoch": 53.51, + "learning_rate": 0.0002269473684210526, + "loss": 0.034, + "step": 3585 + }, + { + "epoch": 53.52, + "learning_rate": 0.00022691228070175436, + "loss": 0.1851, + "step": 3586 + }, + { + "epoch": 53.54, + "learning_rate": 0.00022687719298245614, + "loss": 0.2542, + "step": 3587 + }, + { + "epoch": 53.55, + "learning_rate": 0.00022684210526315788, + "loss": 0.0296, + "step": 3588 + }, + { + "epoch": 53.57, + "learning_rate": 0.00022680701754385963, + "loss": 0.0841, + "step": 3589 + }, + { + "epoch": 53.58, + "learning_rate": 0.00022677192982456138, + "loss": 0.1696, + "step": 3590 + }, + { + "epoch": 53.59, + "learning_rate": 0.00022673684210526316, + "loss": 0.021, + "step": 3591 + }, + { + "epoch": 53.61, + "learning_rate": 0.0002267017543859649, + "loss": 0.0085, + "step": 3592 + }, + { + "epoch": 53.62, + "learning_rate": 0.00022666666666666663, + "loss": 0.0084, + "step": 3593 + }, + { + "epoch": 53.64, + "learning_rate": 0.00022663157894736838, + "loss": 0.1819, + "step": 3594 + }, + { + "epoch": 53.65, + "learning_rate": 0.00022659649122807015, + "loss": 0.0062, + "step": 3595 + }, + { + "epoch": 53.67, + "learning_rate": 0.0002265614035087719, + "loss": 0.2001, + "step": 3596 + }, + { + "epoch": 53.68, + "learning_rate": 0.00022652631578947365, + "loss": 0.0464, + "step": 3597 + }, + { + "epoch": 53.7, + "learning_rate": 0.00022649122807017543, + "loss": 0.1304, + "step": 3598 + }, + { + "epoch": 53.71, + "learning_rate": 0.00022645614035087718, + "loss": 0.0339, + "step": 3599 + }, + { + "epoch": 53.73, + "learning_rate": 0.00022642105263157892, + "loss": 0.0062, + "step": 3600 + }, + { + "epoch": 53.73, + "eval_accuracy": 0.8277043563387175, + "eval_f1": 0.8286378990021197, + "eval_loss": 0.6871868968009949, + "eval_runtime": 343.9702, + "eval_samples_per_second": 11.879, + "eval_steps_per_second": 0.744, + "step": 3600 + }, + { + "epoch": 53.74, + "learning_rate": 0.00022638596491228067, + "loss": 0.1994, + "step": 3601 + }, + { + "epoch": 53.76, + "learning_rate": 0.00022635087719298245, + "loss": 0.0094, + "step": 3602 + }, + { + "epoch": 53.77, + "learning_rate": 0.0002263157894736842, + "loss": 0.0321, + "step": 3603 + }, + { + "epoch": 53.79, + "learning_rate": 0.00022628070175438595, + "loss": 0.2154, + "step": 3604 + }, + { + "epoch": 53.8, + "learning_rate": 0.0002262456140350877, + "loss": 0.0209, + "step": 3605 + }, + { + "epoch": 53.82, + "learning_rate": 0.00022621052631578947, + "loss": 0.159, + "step": 3606 + }, + { + "epoch": 53.83, + "learning_rate": 0.00022617543859649122, + "loss": 0.0349, + "step": 3607 + }, + { + "epoch": 53.85, + "learning_rate": 0.00022614035087719294, + "loss": 0.0212, + "step": 3608 + }, + { + "epoch": 53.86, + "learning_rate": 0.0002261052631578947, + "loss": 0.1698, + "step": 3609 + }, + { + "epoch": 53.88, + "learning_rate": 0.00022607017543859647, + "loss": 0.0174, + "step": 3610 + }, + { + "epoch": 53.89, + "learning_rate": 0.00022603508771929822, + "loss": 0.1267, + "step": 3611 + }, + { + "epoch": 53.91, + "learning_rate": 0.00022599999999999996, + "loss": 0.0366, + "step": 3612 + }, + { + "epoch": 53.92, + "learning_rate": 0.00022596491228070174, + "loss": 0.1204, + "step": 3613 + }, + { + "epoch": 53.94, + "learning_rate": 0.0002259298245614035, + "loss": 0.071, + "step": 3614 + }, + { + "epoch": 53.95, + "learning_rate": 0.00022589473684210524, + "loss": 0.0097, + "step": 3615 + }, + { + "epoch": 53.97, + "learning_rate": 0.00022585964912280699, + "loss": 0.0107, + "step": 3616 + }, + { + "epoch": 53.98, + "learning_rate": 0.00022582456140350876, + "loss": 0.1535, + "step": 3617 + }, + { + "epoch": 54.0, + "learning_rate": 0.0002257894736842105, + "loss": 0.009, + "step": 3618 + }, + { + "epoch": 54.01, + "learning_rate": 0.00022575438596491226, + "loss": 0.0093, + "step": 3619 + }, + { + "epoch": 54.03, + "learning_rate": 0.000225719298245614, + "loss": 0.3425, + "step": 3620 + }, + { + "epoch": 54.04, + "learning_rate": 0.00022568421052631578, + "loss": 0.035, + "step": 3621 + }, + { + "epoch": 54.06, + "learning_rate": 0.00022564912280701753, + "loss": 0.0755, + "step": 3622 + }, + { + "epoch": 54.07, + "learning_rate": 0.00022561403508771928, + "loss": 0.0045, + "step": 3623 + }, + { + "epoch": 54.09, + "learning_rate": 0.00022557894736842106, + "loss": 0.2203, + "step": 3624 + }, + { + "epoch": 54.1, + "learning_rate": 0.0002255438596491228, + "loss": 0.326, + "step": 3625 + }, + { + "epoch": 54.12, + "learning_rate": 0.00022550877192982453, + "loss": 0.0032, + "step": 3626 + }, + { + "epoch": 54.13, + "learning_rate": 0.00022547368421052628, + "loss": 0.1305, + "step": 3627 + }, + { + "epoch": 54.15, + "learning_rate": 0.00022543859649122805, + "loss": 0.2871, + "step": 3628 + }, + { + "epoch": 54.16, + "learning_rate": 0.0002254035087719298, + "loss": 0.0593, + "step": 3629 + }, + { + "epoch": 54.18, + "learning_rate": 0.00022536842105263155, + "loss": 0.022, + "step": 3630 + }, + { + "epoch": 54.19, + "learning_rate": 0.0002253333333333333, + "loss": 0.0904, + "step": 3631 + }, + { + "epoch": 54.21, + "learning_rate": 0.00022529824561403508, + "loss": 0.0036, + "step": 3632 + }, + { + "epoch": 54.22, + "learning_rate": 0.00022526315789473682, + "loss": 0.0114, + "step": 3633 + }, + { + "epoch": 54.24, + "learning_rate": 0.00022522807017543857, + "loss": 0.1628, + "step": 3634 + }, + { + "epoch": 54.25, + "learning_rate": 0.00022519298245614035, + "loss": 0.2939, + "step": 3635 + }, + { + "epoch": 54.27, + "learning_rate": 0.0002251578947368421, + "loss": 0.0156, + "step": 3636 + }, + { + "epoch": 54.28, + "learning_rate": 0.00022512280701754385, + "loss": 0.0028, + "step": 3637 + }, + { + "epoch": 54.3, + "learning_rate": 0.0002250877192982456, + "loss": 0.0056, + "step": 3638 + }, + { + "epoch": 54.31, + "learning_rate": 0.00022505263157894737, + "loss": 0.0847, + "step": 3639 + }, + { + "epoch": 54.33, + "learning_rate": 0.00022501754385964912, + "loss": 0.3336, + "step": 3640 + }, + { + "epoch": 54.34, + "learning_rate": 0.00022498245614035084, + "loss": 0.1036, + "step": 3641 + }, + { + "epoch": 54.36, + "learning_rate": 0.0002249473684210526, + "loss": 0.0032, + "step": 3642 + }, + { + "epoch": 54.37, + "learning_rate": 0.00022491228070175437, + "loss": 0.0345, + "step": 3643 + }, + { + "epoch": 54.39, + "learning_rate": 0.00022487719298245611, + "loss": 0.1888, + "step": 3644 + }, + { + "epoch": 54.4, + "learning_rate": 0.00022484210526315786, + "loss": 0.0094, + "step": 3645 + }, + { + "epoch": 54.42, + "learning_rate": 0.0002248070175438596, + "loss": 0.0244, + "step": 3646 + }, + { + "epoch": 54.43, + "learning_rate": 0.0002247719298245614, + "loss": 0.0999, + "step": 3647 + }, + { + "epoch": 54.45, + "learning_rate": 0.00022473684210526314, + "loss": 0.0718, + "step": 3648 + }, + { + "epoch": 54.46, + "learning_rate": 0.00022470175438596489, + "loss": 0.2473, + "step": 3649 + }, + { + "epoch": 54.48, + "learning_rate": 0.00022466666666666666, + "loss": 0.164, + "step": 3650 + }, + { + "epoch": 54.49, + "learning_rate": 0.0002246315789473684, + "loss": 0.2189, + "step": 3651 + }, + { + "epoch": 54.51, + "learning_rate": 0.00022459649122807016, + "loss": 0.1645, + "step": 3652 + }, + { + "epoch": 54.52, + "learning_rate": 0.0002245614035087719, + "loss": 0.0121, + "step": 3653 + }, + { + "epoch": 54.54, + "learning_rate": 0.00022452631578947368, + "loss": 0.0113, + "step": 3654 + }, + { + "epoch": 54.55, + "learning_rate": 0.00022449122807017543, + "loss": 0.056, + "step": 3655 + }, + { + "epoch": 54.57, + "learning_rate": 0.00022445614035087715, + "loss": 0.1074, + "step": 3656 + }, + { + "epoch": 54.58, + "learning_rate": 0.0002244210526315789, + "loss": 0.2388, + "step": 3657 + }, + { + "epoch": 54.59, + "learning_rate": 0.00022438596491228068, + "loss": 0.0244, + "step": 3658 + }, + { + "epoch": 54.61, + "learning_rate": 0.00022435087719298243, + "loss": 0.0135, + "step": 3659 + }, + { + "epoch": 54.62, + "learning_rate": 0.00022431578947368418, + "loss": 0.004, + "step": 3660 + }, + { + "epoch": 54.64, + "learning_rate": 0.00022428070175438595, + "loss": 0.1477, + "step": 3661 + }, + { + "epoch": 54.65, + "learning_rate": 0.0002242456140350877, + "loss": 0.012, + "step": 3662 + }, + { + "epoch": 54.67, + "learning_rate": 0.00022421052631578945, + "loss": 0.2786, + "step": 3663 + }, + { + "epoch": 54.68, + "learning_rate": 0.0002241754385964912, + "loss": 0.0527, + "step": 3664 + }, + { + "epoch": 54.7, + "learning_rate": 0.00022414035087719297, + "loss": 0.0731, + "step": 3665 + }, + { + "epoch": 54.71, + "learning_rate": 0.00022410526315789472, + "loss": 0.2216, + "step": 3666 + }, + { + "epoch": 54.73, + "learning_rate": 0.00022407017543859647, + "loss": 0.0036, + "step": 3667 + }, + { + "epoch": 54.74, + "learning_rate": 0.00022403508771929822, + "loss": 0.0137, + "step": 3668 + }, + { + "epoch": 54.76, + "learning_rate": 0.000224, + "loss": 0.3671, + "step": 3669 + }, + { + "epoch": 54.77, + "learning_rate": 0.00022396491228070175, + "loss": 0.3146, + "step": 3670 + }, + { + "epoch": 54.79, + "learning_rate": 0.0002239298245614035, + "loss": 0.2034, + "step": 3671 + }, + { + "epoch": 54.8, + "learning_rate": 0.00022389473684210527, + "loss": 0.1474, + "step": 3672 + }, + { + "epoch": 54.82, + "learning_rate": 0.00022385964912280702, + "loss": 0.0591, + "step": 3673 + }, + { + "epoch": 54.83, + "learning_rate": 0.00022382456140350874, + "loss": 0.0043, + "step": 3674 + }, + { + "epoch": 54.85, + "learning_rate": 0.0002237894736842105, + "loss": 0.0173, + "step": 3675 + }, + { + "epoch": 54.86, + "learning_rate": 0.00022375438596491227, + "loss": 0.1988, + "step": 3676 + }, + { + "epoch": 54.88, + "learning_rate": 0.00022371929824561401, + "loss": 0.4382, + "step": 3677 + }, + { + "epoch": 54.89, + "learning_rate": 0.00022368421052631576, + "loss": 0.0258, + "step": 3678 + }, + { + "epoch": 54.91, + "learning_rate": 0.0002236491228070175, + "loss": 0.0638, + "step": 3679 + }, + { + "epoch": 54.92, + "learning_rate": 0.0002236140350877193, + "loss": 0.1948, + "step": 3680 + }, + { + "epoch": 54.94, + "learning_rate": 0.00022357894736842104, + "loss": 0.2229, + "step": 3681 + }, + { + "epoch": 54.95, + "learning_rate": 0.00022354385964912278, + "loss": 0.0038, + "step": 3682 + }, + { + "epoch": 54.97, + "learning_rate": 0.00022350877192982453, + "loss": 0.451, + "step": 3683 + }, + { + "epoch": 54.98, + "learning_rate": 0.0002234736842105263, + "loss": 0.0037, + "step": 3684 + }, + { + "epoch": 55.0, + "learning_rate": 0.00022343859649122806, + "loss": 0.1133, + "step": 3685 + }, + { + "epoch": 55.01, + "learning_rate": 0.0002234035087719298, + "loss": 0.1625, + "step": 3686 + }, + { + "epoch": 55.03, + "learning_rate": 0.00022336842105263158, + "loss": 0.0319, + "step": 3687 + }, + { + "epoch": 55.04, + "learning_rate": 0.00022333333333333333, + "loss": 0.0071, + "step": 3688 + }, + { + "epoch": 55.06, + "learning_rate": 0.00022329824561403505, + "loss": 0.0307, + "step": 3689 + }, + { + "epoch": 55.07, + "learning_rate": 0.0002232631578947368, + "loss": 0.0852, + "step": 3690 + }, + { + "epoch": 55.09, + "learning_rate": 0.00022322807017543858, + "loss": 0.0048, + "step": 3691 + }, + { + "epoch": 55.1, + "learning_rate": 0.00022319298245614033, + "loss": 0.019, + "step": 3692 + }, + { + "epoch": 55.12, + "learning_rate": 0.00022315789473684208, + "loss": 0.0731, + "step": 3693 + }, + { + "epoch": 55.13, + "learning_rate": 0.00022312280701754382, + "loss": 0.129, + "step": 3694 + }, + { + "epoch": 55.15, + "learning_rate": 0.0002230877192982456, + "loss": 0.037, + "step": 3695 + }, + { + "epoch": 55.16, + "learning_rate": 0.00022305263157894735, + "loss": 0.0068, + "step": 3696 + }, + { + "epoch": 55.18, + "learning_rate": 0.0002230175438596491, + "loss": 0.0246, + "step": 3697 + }, + { + "epoch": 55.19, + "learning_rate": 0.00022298245614035087, + "loss": 0.1615, + "step": 3698 + }, + { + "epoch": 55.21, + "learning_rate": 0.00022294736842105262, + "loss": 0.0029, + "step": 3699 + }, + { + "epoch": 55.22, + "learning_rate": 0.00022291228070175437, + "loss": 0.0246, + "step": 3700 + }, + { + "epoch": 55.24, + "learning_rate": 0.00022287719298245612, + "loss": 0.0065, + "step": 3701 + }, + { + "epoch": 55.25, + "learning_rate": 0.0002228421052631579, + "loss": 0.0155, + "step": 3702 + }, + { + "epoch": 55.27, + "learning_rate": 0.00022280701754385964, + "loss": 0.0479, + "step": 3703 + }, + { + "epoch": 55.28, + "learning_rate": 0.00022277192982456137, + "loss": 0.0084, + "step": 3704 + }, + { + "epoch": 55.3, + "learning_rate": 0.00022273684210526312, + "loss": 0.3419, + "step": 3705 + }, + { + "epoch": 55.31, + "learning_rate": 0.0002227017543859649, + "loss": 0.2642, + "step": 3706 + }, + { + "epoch": 55.33, + "learning_rate": 0.00022266666666666664, + "loss": 0.0043, + "step": 3707 + }, + { + "epoch": 55.34, + "learning_rate": 0.0002226315789473684, + "loss": 0.0485, + "step": 3708 + }, + { + "epoch": 55.36, + "learning_rate": 0.00022259649122807014, + "loss": 0.0498, + "step": 3709 + }, + { + "epoch": 55.37, + "learning_rate": 0.0002225614035087719, + "loss": 0.0062, + "step": 3710 + }, + { + "epoch": 55.39, + "learning_rate": 0.00022252631578947366, + "loss": 0.0073, + "step": 3711 + }, + { + "epoch": 55.4, + "learning_rate": 0.0002224912280701754, + "loss": 0.1017, + "step": 3712 + }, + { + "epoch": 55.42, + "learning_rate": 0.0002224561403508772, + "loss": 0.2038, + "step": 3713 + }, + { + "epoch": 55.43, + "learning_rate": 0.00022242105263157894, + "loss": 0.1198, + "step": 3714 + }, + { + "epoch": 55.45, + "learning_rate": 0.00022238596491228068, + "loss": 0.0198, + "step": 3715 + }, + { + "epoch": 55.46, + "learning_rate": 0.00022235087719298243, + "loss": 0.1242, + "step": 3716 + }, + { + "epoch": 55.48, + "learning_rate": 0.0002223157894736842, + "loss": 0.0246, + "step": 3717 + }, + { + "epoch": 55.49, + "learning_rate": 0.00022228070175438596, + "loss": 0.0113, + "step": 3718 + }, + { + "epoch": 55.51, + "learning_rate": 0.0002222456140350877, + "loss": 0.0202, + "step": 3719 + }, + { + "epoch": 55.52, + "learning_rate": 0.00022221052631578943, + "loss": 0.0141, + "step": 3720 + }, + { + "epoch": 55.54, + "learning_rate": 0.0002221754385964912, + "loss": 0.0651, + "step": 3721 + }, + { + "epoch": 55.55, + "learning_rate": 0.00022214035087719295, + "loss": 0.0952, + "step": 3722 + }, + { + "epoch": 55.57, + "learning_rate": 0.0002221052631578947, + "loss": 0.1741, + "step": 3723 + }, + { + "epoch": 55.58, + "learning_rate": 0.00022207017543859648, + "loss": 0.5287, + "step": 3724 + }, + { + "epoch": 55.59, + "learning_rate": 0.00022203508771929823, + "loss": 0.1499, + "step": 3725 + }, + { + "epoch": 55.61, + "learning_rate": 0.00022199999999999998, + "loss": 0.2446, + "step": 3726 + }, + { + "epoch": 55.62, + "learning_rate": 0.00022196491228070172, + "loss": 0.0443, + "step": 3727 + }, + { + "epoch": 55.64, + "learning_rate": 0.0002219298245614035, + "loss": 0.0117, + "step": 3728 + }, + { + "epoch": 55.65, + "learning_rate": 0.00022189473684210525, + "loss": 0.0446, + "step": 3729 + }, + { + "epoch": 55.67, + "learning_rate": 0.000221859649122807, + "loss": 0.1981, + "step": 3730 + }, + { + "epoch": 55.68, + "learning_rate": 0.00022182456140350875, + "loss": 0.0033, + "step": 3731 + }, + { + "epoch": 55.7, + "learning_rate": 0.00022178947368421052, + "loss": 0.1273, + "step": 3732 + }, + { + "epoch": 55.71, + "learning_rate": 0.00022175438596491227, + "loss": 0.0741, + "step": 3733 + }, + { + "epoch": 55.73, + "learning_rate": 0.00022171929824561402, + "loss": 0.0265, + "step": 3734 + }, + { + "epoch": 55.74, + "learning_rate": 0.0002216842105263158, + "loss": 0.0115, + "step": 3735 + }, + { + "epoch": 55.76, + "learning_rate": 0.00022164912280701754, + "loss": 0.0161, + "step": 3736 + }, + { + "epoch": 55.77, + "learning_rate": 0.00022161403508771927, + "loss": 0.0085, + "step": 3737 + }, + { + "epoch": 55.79, + "learning_rate": 0.00022157894736842101, + "loss": 0.2448, + "step": 3738 + }, + { + "epoch": 55.8, + "learning_rate": 0.0002215438596491228, + "loss": 0.0145, + "step": 3739 + }, + { + "epoch": 55.82, + "learning_rate": 0.00022150877192982454, + "loss": 0.0153, + "step": 3740 + }, + { + "epoch": 55.83, + "learning_rate": 0.0002214736842105263, + "loss": 0.018, + "step": 3741 + }, + { + "epoch": 55.85, + "learning_rate": 0.00022143859649122804, + "loss": 0.2306, + "step": 3742 + }, + { + "epoch": 55.86, + "learning_rate": 0.0002214035087719298, + "loss": 0.0034, + "step": 3743 + }, + { + "epoch": 55.88, + "learning_rate": 0.00022136842105263156, + "loss": 0.0116, + "step": 3744 + }, + { + "epoch": 55.89, + "learning_rate": 0.0002213333333333333, + "loss": 0.0537, + "step": 3745 + }, + { + "epoch": 55.91, + "learning_rate": 0.00022129824561403506, + "loss": 0.1457, + "step": 3746 + }, + { + "epoch": 55.92, + "learning_rate": 0.00022126315789473683, + "loss": 0.0771, + "step": 3747 + }, + { + "epoch": 55.94, + "learning_rate": 0.00022122807017543858, + "loss": 0.0036, + "step": 3748 + }, + { + "epoch": 55.95, + "learning_rate": 0.00022119298245614033, + "loss": 0.1992, + "step": 3749 + }, + { + "epoch": 55.97, + "learning_rate": 0.0002211578947368421, + "loss": 0.1269, + "step": 3750 + }, + { + "epoch": 55.98, + "learning_rate": 0.00022112280701754386, + "loss": 0.209, + "step": 3751 + }, + { + "epoch": 56.0, + "learning_rate": 0.00022108771929824558, + "loss": 0.0806, + "step": 3752 + }, + { + "epoch": 56.01, + "learning_rate": 0.00022105263157894733, + "loss": 0.3064, + "step": 3753 + }, + { + "epoch": 56.03, + "learning_rate": 0.0002210175438596491, + "loss": 0.1929, + "step": 3754 + }, + { + "epoch": 56.04, + "learning_rate": 0.00022098245614035085, + "loss": 0.2673, + "step": 3755 + }, + { + "epoch": 56.06, + "learning_rate": 0.0002209473684210526, + "loss": 0.2803, + "step": 3756 + }, + { + "epoch": 56.07, + "learning_rate": 0.00022091228070175435, + "loss": 0.0192, + "step": 3757 + }, + { + "epoch": 56.09, + "learning_rate": 0.00022087719298245613, + "loss": 0.1947, + "step": 3758 + }, + { + "epoch": 56.1, + "learning_rate": 0.00022084210526315787, + "loss": 0.0723, + "step": 3759 + }, + { + "epoch": 56.12, + "learning_rate": 0.00022080701754385962, + "loss": 0.3244, + "step": 3760 + }, + { + "epoch": 56.13, + "learning_rate": 0.0002207719298245614, + "loss": 0.0238, + "step": 3761 + }, + { + "epoch": 56.15, + "learning_rate": 0.00022073684210526315, + "loss": 0.0308, + "step": 3762 + }, + { + "epoch": 56.16, + "learning_rate": 0.0002207017543859649, + "loss": 0.1321, + "step": 3763 + }, + { + "epoch": 56.18, + "learning_rate": 0.00022066666666666665, + "loss": 0.0051, + "step": 3764 + }, + { + "epoch": 56.19, + "learning_rate": 0.00022063157894736842, + "loss": 0.0057, + "step": 3765 + }, + { + "epoch": 56.21, + "learning_rate": 0.00022059649122807017, + "loss": 0.0935, + "step": 3766 + }, + { + "epoch": 56.22, + "learning_rate": 0.00022056140350877192, + "loss": 0.009, + "step": 3767 + }, + { + "epoch": 56.24, + "learning_rate": 0.00022052631578947364, + "loss": 0.0077, + "step": 3768 + }, + { + "epoch": 56.25, + "learning_rate": 0.00022049122807017542, + "loss": 0.0445, + "step": 3769 + }, + { + "epoch": 56.27, + "learning_rate": 0.00022045614035087717, + "loss": 0.1608, + "step": 3770 + }, + { + "epoch": 56.28, + "learning_rate": 0.00022042105263157891, + "loss": 0.1767, + "step": 3771 + }, + { + "epoch": 56.3, + "learning_rate": 0.00022038596491228066, + "loss": 0.0302, + "step": 3772 + }, + { + "epoch": 56.31, + "learning_rate": 0.00022035087719298244, + "loss": 0.1324, + "step": 3773 + }, + { + "epoch": 56.33, + "learning_rate": 0.0002203157894736842, + "loss": 0.0097, + "step": 3774 + }, + { + "epoch": 56.34, + "learning_rate": 0.00022028070175438594, + "loss": 0.3323, + "step": 3775 + }, + { + "epoch": 56.36, + "learning_rate": 0.0002202456140350877, + "loss": 0.1146, + "step": 3776 + }, + { + "epoch": 56.37, + "learning_rate": 0.00022021052631578946, + "loss": 0.1479, + "step": 3777 + }, + { + "epoch": 56.39, + "learning_rate": 0.0002201754385964912, + "loss": 0.1966, + "step": 3778 + }, + { + "epoch": 56.4, + "learning_rate": 0.00022014035087719296, + "loss": 0.0405, + "step": 3779 + }, + { + "epoch": 56.42, + "learning_rate": 0.00022010526315789473, + "loss": 0.3266, + "step": 3780 + }, + { + "epoch": 56.43, + "learning_rate": 0.00022007017543859648, + "loss": 0.0037, + "step": 3781 + }, + { + "epoch": 56.45, + "learning_rate": 0.00022003508771929823, + "loss": 0.089, + "step": 3782 + }, + { + "epoch": 56.46, + "learning_rate": 0.00021999999999999995, + "loss": 0.2181, + "step": 3783 + }, + { + "epoch": 56.48, + "learning_rate": 0.00021996491228070176, + "loss": 0.0876, + "step": 3784 + }, + { + "epoch": 56.49, + "learning_rate": 0.00021992982456140348, + "loss": 0.2992, + "step": 3785 + }, + { + "epoch": 56.51, + "learning_rate": 0.00021989473684210523, + "loss": 0.0213, + "step": 3786 + }, + { + "epoch": 56.52, + "learning_rate": 0.000219859649122807, + "loss": 0.1602, + "step": 3787 + }, + { + "epoch": 56.54, + "learning_rate": 0.00021982456140350875, + "loss": 0.2594, + "step": 3788 + }, + { + "epoch": 56.55, + "learning_rate": 0.0002197894736842105, + "loss": 0.013, + "step": 3789 + }, + { + "epoch": 56.57, + "learning_rate": 0.00021975438596491225, + "loss": 0.041, + "step": 3790 + }, + { + "epoch": 56.58, + "learning_rate": 0.00021971929824561403, + "loss": 0.1021, + "step": 3791 + }, + { + "epoch": 56.59, + "learning_rate": 0.00021968421052631577, + "loss": 0.1141, + "step": 3792 + }, + { + "epoch": 56.61, + "learning_rate": 0.00021964912280701752, + "loss": 0.0101, + "step": 3793 + }, + { + "epoch": 56.62, + "learning_rate": 0.00021961403508771927, + "loss": 0.0539, + "step": 3794 + }, + { + "epoch": 56.64, + "learning_rate": 0.00021957894736842105, + "loss": 0.0914, + "step": 3795 + }, + { + "epoch": 56.65, + "learning_rate": 0.0002195438596491228, + "loss": 0.0729, + "step": 3796 + }, + { + "epoch": 56.67, + "learning_rate": 0.00021950877192982454, + "loss": 0.0327, + "step": 3797 + }, + { + "epoch": 56.68, + "learning_rate": 0.00021947368421052632, + "loss": 0.0187, + "step": 3798 + }, + { + "epoch": 56.7, + "learning_rate": 0.00021943859649122807, + "loss": 0.0103, + "step": 3799 + }, + { + "epoch": 56.71, + "learning_rate": 0.0002194035087719298, + "loss": 0.1781, + "step": 3800 + }, + { + "epoch": 56.71, + "eval_accuracy": 0.8389623103279491, + "eval_f1": 0.8392926046674013, + "eval_loss": 0.6989664435386658, + "eval_runtime": 344.8439, + "eval_samples_per_second": 11.849, + "eval_steps_per_second": 0.742, + "step": 3800 + }, + { + "epoch": 56.73, + "learning_rate": 0.00021936842105263154, + "loss": 0.0225, + "step": 3801 + }, + { + "epoch": 56.74, + "learning_rate": 0.00021933333333333332, + "loss": 0.1503, + "step": 3802 + }, + { + "epoch": 56.76, + "learning_rate": 0.00021929824561403506, + "loss": 0.0784, + "step": 3803 + }, + { + "epoch": 56.77, + "learning_rate": 0.0002192631578947368, + "loss": 0.2565, + "step": 3804 + }, + { + "epoch": 56.79, + "learning_rate": 0.00021922807017543856, + "loss": 0.1283, + "step": 3805 + }, + { + "epoch": 56.8, + "learning_rate": 0.00021919298245614034, + "loss": 0.2075, + "step": 3806 + }, + { + "epoch": 56.82, + "learning_rate": 0.0002191578947368421, + "loss": 0.1317, + "step": 3807 + }, + { + "epoch": 56.83, + "learning_rate": 0.00021912280701754384, + "loss": 0.0164, + "step": 3808 + }, + { + "epoch": 56.85, + "learning_rate": 0.00021908771929824558, + "loss": 0.1049, + "step": 3809 + }, + { + "epoch": 56.86, + "learning_rate": 0.00021905263157894736, + "loss": 0.0052, + "step": 3810 + }, + { + "epoch": 56.88, + "learning_rate": 0.0002190175438596491, + "loss": 0.1653, + "step": 3811 + }, + { + "epoch": 56.89, + "learning_rate": 0.00021898245614035086, + "loss": 0.0032, + "step": 3812 + }, + { + "epoch": 56.91, + "learning_rate": 0.00021894736842105263, + "loss": 0.0823, + "step": 3813 + }, + { + "epoch": 56.92, + "learning_rate": 0.00021891228070175438, + "loss": 0.007, + "step": 3814 + }, + { + "epoch": 56.94, + "learning_rate": 0.0002188771929824561, + "loss": 0.0183, + "step": 3815 + }, + { + "epoch": 56.95, + "learning_rate": 0.00021884210526315785, + "loss": 0.0204, + "step": 3816 + }, + { + "epoch": 56.97, + "learning_rate": 0.00021880701754385963, + "loss": 0.0054, + "step": 3817 + }, + { + "epoch": 56.98, + "learning_rate": 0.00021877192982456138, + "loss": 0.0143, + "step": 3818 + }, + { + "epoch": 57.0, + "learning_rate": 0.00021873684210526313, + "loss": 0.2526, + "step": 3819 + }, + { + "epoch": 57.01, + "learning_rate": 0.00021870175438596488, + "loss": 0.4135, + "step": 3820 + }, + { + "epoch": 57.03, + "learning_rate": 0.00021866666666666665, + "loss": 0.1704, + "step": 3821 + }, + { + "epoch": 57.04, + "learning_rate": 0.0002186315789473684, + "loss": 0.1849, + "step": 3822 + }, + { + "epoch": 57.06, + "learning_rate": 0.00021859649122807015, + "loss": 0.1036, + "step": 3823 + }, + { + "epoch": 57.07, + "learning_rate": 0.00021856140350877192, + "loss": 0.0054, + "step": 3824 + }, + { + "epoch": 57.09, + "learning_rate": 0.00021852631578947367, + "loss": 0.0035, + "step": 3825 + }, + { + "epoch": 57.1, + "learning_rate": 0.00021849122807017542, + "loss": 0.1784, + "step": 3826 + }, + { + "epoch": 57.12, + "learning_rate": 0.00021845614035087717, + "loss": 0.0086, + "step": 3827 + }, + { + "epoch": 57.13, + "learning_rate": 0.00021842105263157895, + "loss": 0.2458, + "step": 3828 + }, + { + "epoch": 57.15, + "learning_rate": 0.0002183859649122807, + "loss": 0.1473, + "step": 3829 + }, + { + "epoch": 57.16, + "learning_rate": 0.00021835087719298244, + "loss": 0.0109, + "step": 3830 + }, + { + "epoch": 57.18, + "learning_rate": 0.00021831578947368417, + "loss": 0.0234, + "step": 3831 + }, + { + "epoch": 57.19, + "learning_rate": 0.00021828070175438597, + "loss": 0.1154, + "step": 3832 + }, + { + "epoch": 57.21, + "learning_rate": 0.0002182456140350877, + "loss": 0.0863, + "step": 3833 + }, + { + "epoch": 57.22, + "learning_rate": 0.00021821052631578944, + "loss": 0.0019, + "step": 3834 + }, + { + "epoch": 57.24, + "learning_rate": 0.00021817543859649122, + "loss": 0.0907, + "step": 3835 + }, + { + "epoch": 57.25, + "learning_rate": 0.00021814035087719296, + "loss": 0.0025, + "step": 3836 + }, + { + "epoch": 57.27, + "learning_rate": 0.0002181052631578947, + "loss": 0.16, + "step": 3837 + }, + { + "epoch": 57.28, + "learning_rate": 0.00021807017543859646, + "loss": 0.2068, + "step": 3838 + }, + { + "epoch": 57.3, + "learning_rate": 0.00021803508771929824, + "loss": 0.0291, + "step": 3839 + }, + { + "epoch": 57.31, + "learning_rate": 0.00021799999999999999, + "loss": 0.0851, + "step": 3840 + }, + { + "epoch": 57.33, + "learning_rate": 0.00021796491228070174, + "loss": 0.0864, + "step": 3841 + }, + { + "epoch": 57.34, + "learning_rate": 0.00021792982456140348, + "loss": 0.0652, + "step": 3842 + }, + { + "epoch": 57.36, + "learning_rate": 0.00021789473684210526, + "loss": 0.0539, + "step": 3843 + }, + { + "epoch": 57.37, + "learning_rate": 0.000217859649122807, + "loss": 0.0076, + "step": 3844 + }, + { + "epoch": 57.39, + "learning_rate": 0.00021782456140350876, + "loss": 0.0818, + "step": 3845 + }, + { + "epoch": 57.4, + "learning_rate": 0.00021778947368421048, + "loss": 0.0047, + "step": 3846 + }, + { + "epoch": 57.42, + "learning_rate": 0.00021775438596491228, + "loss": 0.0698, + "step": 3847 + }, + { + "epoch": 57.43, + "learning_rate": 0.000217719298245614, + "loss": 0.002, + "step": 3848 + }, + { + "epoch": 57.45, + "learning_rate": 0.00021768421052631575, + "loss": 0.0138, + "step": 3849 + }, + { + "epoch": 57.46, + "learning_rate": 0.00021764912280701753, + "loss": 0.0896, + "step": 3850 + }, + { + "epoch": 57.48, + "learning_rate": 0.00021761403508771928, + "loss": 0.0029, + "step": 3851 + }, + { + "epoch": 57.49, + "learning_rate": 0.00021757894736842103, + "loss": 0.2649, + "step": 3852 + }, + { + "epoch": 57.51, + "learning_rate": 0.00021754385964912277, + "loss": 0.0521, + "step": 3853 + }, + { + "epoch": 57.52, + "learning_rate": 0.00021750877192982455, + "loss": 0.3472, + "step": 3854 + }, + { + "epoch": 57.54, + "learning_rate": 0.0002174736842105263, + "loss": 0.1763, + "step": 3855 + }, + { + "epoch": 57.55, + "learning_rate": 0.00021743859649122805, + "loss": 0.0379, + "step": 3856 + }, + { + "epoch": 57.57, + "learning_rate": 0.0002174035087719298, + "loss": 0.1569, + "step": 3857 + }, + { + "epoch": 57.58, + "learning_rate": 0.00021736842105263157, + "loss": 0.0211, + "step": 3858 + }, + { + "epoch": 57.59, + "learning_rate": 0.00021733333333333332, + "loss": 0.0072, + "step": 3859 + }, + { + "epoch": 57.61, + "learning_rate": 0.00021729824561403507, + "loss": 0.0209, + "step": 3860 + }, + { + "epoch": 57.62, + "learning_rate": 0.00021726315789473685, + "loss": 0.0629, + "step": 3861 + }, + { + "epoch": 57.64, + "learning_rate": 0.0002172280701754386, + "loss": 0.0572, + "step": 3862 + }, + { + "epoch": 57.65, + "learning_rate": 0.00021719298245614032, + "loss": 0.0058, + "step": 3863 + }, + { + "epoch": 57.67, + "learning_rate": 0.00021715789473684207, + "loss": 0.192, + "step": 3864 + }, + { + "epoch": 57.68, + "learning_rate": 0.00021712280701754384, + "loss": 0.005, + "step": 3865 + }, + { + "epoch": 57.7, + "learning_rate": 0.0002170877192982456, + "loss": 0.0027, + "step": 3866 + }, + { + "epoch": 57.71, + "learning_rate": 0.00021705263157894734, + "loss": 0.0573, + "step": 3867 + }, + { + "epoch": 57.73, + "learning_rate": 0.0002170175438596491, + "loss": 0.0142, + "step": 3868 + }, + { + "epoch": 57.74, + "learning_rate": 0.00021698245614035086, + "loss": 0.0021, + "step": 3869 + }, + { + "epoch": 57.76, + "learning_rate": 0.0002169473684210526, + "loss": 0.0478, + "step": 3870 + }, + { + "epoch": 57.77, + "learning_rate": 0.00021691228070175436, + "loss": 0.0061, + "step": 3871 + }, + { + "epoch": 57.79, + "learning_rate": 0.0002168771929824561, + "loss": 0.0036, + "step": 3872 + }, + { + "epoch": 57.8, + "learning_rate": 0.00021684210526315789, + "loss": 0.0152, + "step": 3873 + }, + { + "epoch": 57.82, + "learning_rate": 0.00021680701754385963, + "loss": 0.0777, + "step": 3874 + }, + { + "epoch": 57.83, + "learning_rate": 0.00021677192982456138, + "loss": 0.0093, + "step": 3875 + }, + { + "epoch": 57.85, + "learning_rate": 0.00021673684210526316, + "loss": 0.2739, + "step": 3876 + }, + { + "epoch": 57.86, + "learning_rate": 0.0002167017543859649, + "loss": 0.1036, + "step": 3877 + }, + { + "epoch": 57.88, + "learning_rate": 0.00021666666666666666, + "loss": 0.0132, + "step": 3878 + }, + { + "epoch": 57.89, + "learning_rate": 0.00021663157894736838, + "loss": 0.0039, + "step": 3879 + }, + { + "epoch": 57.91, + "learning_rate": 0.00021659649122807018, + "loss": 0.0541, + "step": 3880 + }, + { + "epoch": 57.92, + "learning_rate": 0.0002165614035087719, + "loss": 0.0431, + "step": 3881 + }, + { + "epoch": 57.94, + "learning_rate": 0.00021652631578947365, + "loss": 0.1137, + "step": 3882 + }, + { + "epoch": 57.95, + "learning_rate": 0.0002164912280701754, + "loss": 0.0022, + "step": 3883 + }, + { + "epoch": 57.97, + "learning_rate": 0.00021645614035087718, + "loss": 0.1969, + "step": 3884 + }, + { + "epoch": 57.98, + "learning_rate": 0.00021642105263157893, + "loss": 0.0212, + "step": 3885 + }, + { + "epoch": 58.0, + "learning_rate": 0.00021638596491228067, + "loss": 0.3204, + "step": 3886 + }, + { + "epoch": 58.01, + "learning_rate": 0.00021635087719298245, + "loss": 0.0098, + "step": 3887 + }, + { + "epoch": 58.03, + "learning_rate": 0.0002163157894736842, + "loss": 0.1595, + "step": 3888 + }, + { + "epoch": 58.04, + "learning_rate": 0.00021628070175438595, + "loss": 0.086, + "step": 3889 + }, + { + "epoch": 58.06, + "learning_rate": 0.0002162456140350877, + "loss": 0.0183, + "step": 3890 + }, + { + "epoch": 58.07, + "learning_rate": 0.00021621052631578947, + "loss": 0.2086, + "step": 3891 + }, + { + "epoch": 58.09, + "learning_rate": 0.00021617543859649122, + "loss": 0.0196, + "step": 3892 + }, + { + "epoch": 58.1, + "learning_rate": 0.00021614035087719297, + "loss": 0.1816, + "step": 3893 + }, + { + "epoch": 58.12, + "learning_rate": 0.0002161052631578947, + "loss": 0.2019, + "step": 3894 + }, + { + "epoch": 58.13, + "learning_rate": 0.0002160701754385965, + "loss": 0.0304, + "step": 3895 + }, + { + "epoch": 58.15, + "learning_rate": 0.00021603508771929822, + "loss": 0.0164, + "step": 3896 + }, + { + "epoch": 58.16, + "learning_rate": 0.00021599999999999996, + "loss": 0.002, + "step": 3897 + }, + { + "epoch": 58.18, + "learning_rate": 0.00021596491228070174, + "loss": 0.0027, + "step": 3898 + }, + { + "epoch": 58.19, + "learning_rate": 0.0002159298245614035, + "loss": 0.0026, + "step": 3899 + }, + { + "epoch": 58.21, + "learning_rate": 0.00021589473684210524, + "loss": 0.0901, + "step": 3900 + }, + { + "epoch": 58.22, + "learning_rate": 0.000215859649122807, + "loss": 0.0058, + "step": 3901 + }, + { + "epoch": 58.24, + "learning_rate": 0.00021582456140350876, + "loss": 0.0685, + "step": 3902 + }, + { + "epoch": 58.25, + "learning_rate": 0.0002157894736842105, + "loss": 0.0045, + "step": 3903 + }, + { + "epoch": 58.27, + "learning_rate": 0.00021575438596491226, + "loss": 0.2307, + "step": 3904 + }, + { + "epoch": 58.28, + "learning_rate": 0.000215719298245614, + "loss": 0.0077, + "step": 3905 + }, + { + "epoch": 58.3, + "learning_rate": 0.00021568421052631579, + "loss": 0.0029, + "step": 3906 + }, + { + "epoch": 58.31, + "learning_rate": 0.00021564912280701753, + "loss": 0.2945, + "step": 3907 + }, + { + "epoch": 58.33, + "learning_rate": 0.00021561403508771928, + "loss": 0.0025, + "step": 3908 + }, + { + "epoch": 58.34, + "learning_rate": 0.000215578947368421, + "loss": 0.019, + "step": 3909 + }, + { + "epoch": 58.36, + "learning_rate": 0.0002155438596491228, + "loss": 0.0021, + "step": 3910 + }, + { + "epoch": 58.37, + "learning_rate": 0.00021550877192982453, + "loss": 0.0225, + "step": 3911 + }, + { + "epoch": 58.39, + "learning_rate": 0.00021547368421052628, + "loss": 0.0101, + "step": 3912 + }, + { + "epoch": 58.4, + "learning_rate": 0.00021543859649122805, + "loss": 0.0075, + "step": 3913 + }, + { + "epoch": 58.42, + "learning_rate": 0.0002154035087719298, + "loss": 0.0023, + "step": 3914 + }, + { + "epoch": 58.43, + "learning_rate": 0.00021536842105263155, + "loss": 0.0037, + "step": 3915 + }, + { + "epoch": 58.45, + "learning_rate": 0.0002153333333333333, + "loss": 0.0055, + "step": 3916 + }, + { + "epoch": 58.46, + "learning_rate": 0.00021529824561403508, + "loss": 0.0205, + "step": 3917 + }, + { + "epoch": 58.48, + "learning_rate": 0.00021526315789473682, + "loss": 0.2707, + "step": 3918 + }, + { + "epoch": 58.49, + "learning_rate": 0.00021522807017543857, + "loss": 0.0289, + "step": 3919 + }, + { + "epoch": 58.51, + "learning_rate": 0.00021519298245614032, + "loss": 0.0175, + "step": 3920 + }, + { + "epoch": 58.52, + "learning_rate": 0.0002151578947368421, + "loss": 0.1888, + "step": 3921 + }, + { + "epoch": 58.54, + "learning_rate": 0.00021512280701754385, + "loss": 0.0133, + "step": 3922 + }, + { + "epoch": 58.55, + "learning_rate": 0.0002150877192982456, + "loss": 0.1769, + "step": 3923 + }, + { + "epoch": 58.57, + "learning_rate": 0.00021505263157894737, + "loss": 0.0578, + "step": 3924 + }, + { + "epoch": 58.58, + "learning_rate": 0.00021501754385964912, + "loss": 0.1585, + "step": 3925 + }, + { + "epoch": 58.59, + "learning_rate": 0.00021498245614035087, + "loss": 0.0783, + "step": 3926 + }, + { + "epoch": 58.61, + "learning_rate": 0.0002149473684210526, + "loss": 0.0021, + "step": 3927 + }, + { + "epoch": 58.62, + "learning_rate": 0.00021491228070175437, + "loss": 0.0082, + "step": 3928 + }, + { + "epoch": 58.64, + "learning_rate": 0.00021487719298245612, + "loss": 0.0019, + "step": 3929 + }, + { + "epoch": 58.65, + "learning_rate": 0.00021484210526315786, + "loss": 0.0022, + "step": 3930 + }, + { + "epoch": 58.67, + "learning_rate": 0.0002148070175438596, + "loss": 0.0103, + "step": 3931 + }, + { + "epoch": 58.68, + "learning_rate": 0.0002147719298245614, + "loss": 0.297, + "step": 3932 + }, + { + "epoch": 58.7, + "learning_rate": 0.00021473684210526314, + "loss": 0.0833, + "step": 3933 + }, + { + "epoch": 58.71, + "learning_rate": 0.00021470175438596489, + "loss": 0.0789, + "step": 3934 + }, + { + "epoch": 58.73, + "learning_rate": 0.00021466666666666664, + "loss": 0.0016, + "step": 3935 + }, + { + "epoch": 58.74, + "learning_rate": 0.0002146315789473684, + "loss": 0.0414, + "step": 3936 + }, + { + "epoch": 58.76, + "learning_rate": 0.00021459649122807016, + "loss": 0.0163, + "step": 3937 + }, + { + "epoch": 58.77, + "learning_rate": 0.0002145614035087719, + "loss": 0.1671, + "step": 3938 + }, + { + "epoch": 58.79, + "learning_rate": 0.00021452631578947368, + "loss": 0.1753, + "step": 3939 + }, + { + "epoch": 58.8, + "learning_rate": 0.00021449122807017543, + "loss": 0.2833, + "step": 3940 + }, + { + "epoch": 58.82, + "learning_rate": 0.00021445614035087718, + "loss": 0.0499, + "step": 3941 + }, + { + "epoch": 58.83, + "learning_rate": 0.0002144210526315789, + "loss": 0.0969, + "step": 3942 + }, + { + "epoch": 58.85, + "learning_rate": 0.0002143859649122807, + "loss": 0.4685, + "step": 3943 + }, + { + "epoch": 58.86, + "learning_rate": 0.00021435087719298243, + "loss": 0.0489, + "step": 3944 + }, + { + "epoch": 58.88, + "learning_rate": 0.00021431578947368418, + "loss": 0.1016, + "step": 3945 + }, + { + "epoch": 58.89, + "learning_rate": 0.00021428070175438593, + "loss": 0.161, + "step": 3946 + }, + { + "epoch": 58.91, + "learning_rate": 0.0002142456140350877, + "loss": 0.0239, + "step": 3947 + }, + { + "epoch": 58.92, + "learning_rate": 0.00021421052631578945, + "loss": 0.0382, + "step": 3948 + }, + { + "epoch": 58.94, + "learning_rate": 0.0002141754385964912, + "loss": 0.105, + "step": 3949 + }, + { + "epoch": 58.95, + "learning_rate": 0.00021414035087719298, + "loss": 0.5169, + "step": 3950 + }, + { + "epoch": 58.97, + "learning_rate": 0.00021410526315789472, + "loss": 0.1401, + "step": 3951 + }, + { + "epoch": 58.98, + "learning_rate": 0.00021407017543859647, + "loss": 0.4505, + "step": 3952 + }, + { + "epoch": 59.0, + "learning_rate": 0.00021403508771929822, + "loss": 0.2539, + "step": 3953 + }, + { + "epoch": 59.01, + "learning_rate": 0.000214, + "loss": 0.4692, + "step": 3954 + }, + { + "epoch": 59.03, + "learning_rate": 0.00021396491228070175, + "loss": 0.1235, + "step": 3955 + }, + { + "epoch": 59.04, + "learning_rate": 0.0002139298245614035, + "loss": 0.1404, + "step": 3956 + }, + { + "epoch": 59.06, + "learning_rate": 0.00021389473684210522, + "loss": 0.2029, + "step": 3957 + }, + { + "epoch": 59.07, + "learning_rate": 0.00021385964912280702, + "loss": 0.004, + "step": 3958 + }, + { + "epoch": 59.09, + "learning_rate": 0.00021382456140350874, + "loss": 0.2866, + "step": 3959 + }, + { + "epoch": 59.1, + "learning_rate": 0.0002137894736842105, + "loss": 0.1797, + "step": 3960 + }, + { + "epoch": 59.12, + "learning_rate": 0.00021375438596491227, + "loss": 0.0153, + "step": 3961 + }, + { + "epoch": 59.13, + "learning_rate": 0.00021371929824561401, + "loss": 0.0148, + "step": 3962 + }, + { + "epoch": 59.15, + "learning_rate": 0.00021368421052631576, + "loss": 0.0334, + "step": 3963 + }, + { + "epoch": 59.16, + "learning_rate": 0.0002136491228070175, + "loss": 0.1279, + "step": 3964 + }, + { + "epoch": 59.18, + "learning_rate": 0.0002136140350877193, + "loss": 0.1225, + "step": 3965 + }, + { + "epoch": 59.19, + "learning_rate": 0.00021357894736842104, + "loss": 0.2333, + "step": 3966 + }, + { + "epoch": 59.21, + "learning_rate": 0.00021354385964912279, + "loss": 0.0704, + "step": 3967 + }, + { + "epoch": 59.22, + "learning_rate": 0.00021350877192982453, + "loss": 0.0717, + "step": 3968 + }, + { + "epoch": 59.24, + "learning_rate": 0.0002134736842105263, + "loss": 0.026, + "step": 3969 + }, + { + "epoch": 59.25, + "learning_rate": 0.00021343859649122806, + "loss": 0.0112, + "step": 3970 + }, + { + "epoch": 59.27, + "learning_rate": 0.0002134035087719298, + "loss": 0.2346, + "step": 3971 + }, + { + "epoch": 59.28, + "learning_rate": 0.00021336842105263156, + "loss": 0.099, + "step": 3972 + }, + { + "epoch": 59.3, + "learning_rate": 0.00021333333333333333, + "loss": 0.1305, + "step": 3973 + }, + { + "epoch": 59.31, + "learning_rate": 0.00021329824561403508, + "loss": 0.1288, + "step": 3974 + }, + { + "epoch": 59.33, + "learning_rate": 0.0002132631578947368, + "loss": 0.0628, + "step": 3975 + }, + { + "epoch": 59.34, + "learning_rate": 0.00021322807017543858, + "loss": 0.1149, + "step": 3976 + }, + { + "epoch": 59.36, + "learning_rate": 0.00021319298245614033, + "loss": 0.0104, + "step": 3977 + }, + { + "epoch": 59.37, + "learning_rate": 0.00021315789473684208, + "loss": 0.2735, + "step": 3978 + }, + { + "epoch": 59.39, + "learning_rate": 0.00021312280701754383, + "loss": 0.0035, + "step": 3979 + }, + { + "epoch": 59.4, + "learning_rate": 0.0002130877192982456, + "loss": 0.0304, + "step": 3980 + }, + { + "epoch": 59.42, + "learning_rate": 0.00021305263157894735, + "loss": 0.0028, + "step": 3981 + }, + { + "epoch": 59.43, + "learning_rate": 0.0002130175438596491, + "loss": 0.0032, + "step": 3982 + }, + { + "epoch": 59.45, + "learning_rate": 0.00021298245614035085, + "loss": 0.1995, + "step": 3983 + }, + { + "epoch": 59.46, + "learning_rate": 0.00021294736842105262, + "loss": 0.0188, + "step": 3984 + }, + { + "epoch": 59.48, + "learning_rate": 0.00021291228070175437, + "loss": 0.0776, + "step": 3985 + }, + { + "epoch": 59.49, + "learning_rate": 0.00021287719298245612, + "loss": 0.0076, + "step": 3986 + }, + { + "epoch": 59.51, + "learning_rate": 0.0002128421052631579, + "loss": 0.5661, + "step": 3987 + }, + { + "epoch": 59.52, + "learning_rate": 0.00021280701754385965, + "loss": 0.4085, + "step": 3988 + }, + { + "epoch": 59.54, + "learning_rate": 0.0002127719298245614, + "loss": 0.1628, + "step": 3989 + }, + { + "epoch": 59.55, + "learning_rate": 0.00021273684210526312, + "loss": 0.1061, + "step": 3990 + }, + { + "epoch": 59.57, + "learning_rate": 0.00021270175438596492, + "loss": 0.2666, + "step": 3991 + }, + { + "epoch": 59.58, + "learning_rate": 0.00021266666666666664, + "loss": 0.0611, + "step": 3992 + }, + { + "epoch": 59.59, + "learning_rate": 0.0002126315789473684, + "loss": 0.0071, + "step": 3993 + }, + { + "epoch": 59.61, + "learning_rate": 0.00021259649122807014, + "loss": 0.0339, + "step": 3994 + }, + { + "epoch": 59.62, + "learning_rate": 0.00021256140350877191, + "loss": 0.2808, + "step": 3995 + }, + { + "epoch": 59.64, + "learning_rate": 0.00021252631578947366, + "loss": 0.0188, + "step": 3996 + }, + { + "epoch": 59.65, + "learning_rate": 0.0002124912280701754, + "loss": 0.1661, + "step": 3997 + }, + { + "epoch": 59.67, + "learning_rate": 0.0002124561403508772, + "loss": 0.0633, + "step": 3998 + }, + { + "epoch": 59.68, + "learning_rate": 0.00021242105263157894, + "loss": 0.0081, + "step": 3999 + }, + { + "epoch": 59.7, + "learning_rate": 0.00021238596491228069, + "loss": 0.0309, + "step": 4000 + }, + { + "epoch": 59.7, + "eval_accuracy": 0.8499755261869799, + "eval_f1": 0.8496479698093485, + "eval_loss": 0.6348404884338379, + "eval_runtime": 344.6603, + "eval_samples_per_second": 11.855, + "eval_steps_per_second": 0.743, + "step": 4000 + }, + { + "epoch": 59.71, + "learning_rate": 0.00021235087719298243, + "loss": 0.004, + "step": 4001 + }, + { + "epoch": 59.73, + "learning_rate": 0.0002123157894736842, + "loss": 0.1196, + "step": 4002 + }, + { + "epoch": 59.74, + "learning_rate": 0.00021228070175438596, + "loss": 0.2379, + "step": 4003 + }, + { + "epoch": 59.76, + "learning_rate": 0.0002122456140350877, + "loss": 0.1041, + "step": 4004 + }, + { + "epoch": 59.77, + "learning_rate": 0.00021221052631578943, + "loss": 0.0147, + "step": 4005 + }, + { + "epoch": 59.79, + "learning_rate": 0.00021217543859649123, + "loss": 0.238, + "step": 4006 + }, + { + "epoch": 59.8, + "learning_rate": 0.00021214035087719295, + "loss": 0.1002, + "step": 4007 + }, + { + "epoch": 59.82, + "learning_rate": 0.0002121052631578947, + "loss": 0.0141, + "step": 4008 + }, + { + "epoch": 59.83, + "learning_rate": 0.00021207017543859645, + "loss": 0.0981, + "step": 4009 + }, + { + "epoch": 59.85, + "learning_rate": 0.00021203508771929823, + "loss": 0.0131, + "step": 4010 + }, + { + "epoch": 59.86, + "learning_rate": 0.00021199999999999998, + "loss": 0.0039, + "step": 4011 + }, + { + "epoch": 59.88, + "learning_rate": 0.00021196491228070172, + "loss": 0.0125, + "step": 4012 + }, + { + "epoch": 59.89, + "learning_rate": 0.0002119298245614035, + "loss": 0.0485, + "step": 4013 + }, + { + "epoch": 59.91, + "learning_rate": 0.00021189473684210525, + "loss": 0.0308, + "step": 4014 + }, + { + "epoch": 59.92, + "learning_rate": 0.000211859649122807, + "loss": 0.0196, + "step": 4015 + }, + { + "epoch": 59.94, + "learning_rate": 0.00021182456140350875, + "loss": 0.0278, + "step": 4016 + }, + { + "epoch": 59.95, + "learning_rate": 0.00021178947368421052, + "loss": 0.0999, + "step": 4017 + }, + { + "epoch": 59.97, + "learning_rate": 0.00021175438596491227, + "loss": 0.3143, + "step": 4018 + }, + { + "epoch": 59.98, + "learning_rate": 0.00021171929824561402, + "loss": 0.0039, + "step": 4019 + }, + { + "epoch": 60.0, + "learning_rate": 0.00021168421052631577, + "loss": 0.0026, + "step": 4020 + }, + { + "epoch": 60.01, + "learning_rate": 0.00021164912280701755, + "loss": 0.0043, + "step": 4021 + }, + { + "epoch": 60.03, + "learning_rate": 0.00021161403508771927, + "loss": 0.0167, + "step": 4022 + }, + { + "epoch": 60.04, + "learning_rate": 0.00021157894736842102, + "loss": 0.0815, + "step": 4023 + }, + { + "epoch": 60.06, + "learning_rate": 0.0002115438596491228, + "loss": 0.0135, + "step": 4024 + }, + { + "epoch": 60.07, + "learning_rate": 0.00021150877192982454, + "loss": 0.0043, + "step": 4025 + }, + { + "epoch": 60.09, + "learning_rate": 0.0002114736842105263, + "loss": 0.206, + "step": 4026 + }, + { + "epoch": 60.1, + "learning_rate": 0.00021143859649122804, + "loss": 0.017, + "step": 4027 + }, + { + "epoch": 60.12, + "learning_rate": 0.00021140350877192981, + "loss": 0.0317, + "step": 4028 + }, + { + "epoch": 60.13, + "learning_rate": 0.00021136842105263156, + "loss": 0.0049, + "step": 4029 + }, + { + "epoch": 60.15, + "learning_rate": 0.0002113333333333333, + "loss": 0.0669, + "step": 4030 + }, + { + "epoch": 60.16, + "learning_rate": 0.00021129824561403506, + "loss": 0.0034, + "step": 4031 + }, + { + "epoch": 60.18, + "learning_rate": 0.00021126315789473684, + "loss": 0.0118, + "step": 4032 + }, + { + "epoch": 60.19, + "learning_rate": 0.00021122807017543858, + "loss": 0.1456, + "step": 4033 + }, + { + "epoch": 60.21, + "learning_rate": 0.00021119298245614033, + "loss": 0.0936, + "step": 4034 + }, + { + "epoch": 60.22, + "learning_rate": 0.00021115789473684208, + "loss": 0.002, + "step": 4035 + }, + { + "epoch": 60.24, + "learning_rate": 0.00021112280701754386, + "loss": 0.0019, + "step": 4036 + }, + { + "epoch": 60.25, + "learning_rate": 0.0002110877192982456, + "loss": 0.0154, + "step": 4037 + }, + { + "epoch": 60.27, + "learning_rate": 0.00021105263157894733, + "loss": 0.1007, + "step": 4038 + }, + { + "epoch": 60.28, + "learning_rate": 0.00021101754385964913, + "loss": 0.002, + "step": 4039 + }, + { + "epoch": 60.3, + "learning_rate": 0.00021098245614035085, + "loss": 0.0145, + "step": 4040 + }, + { + "epoch": 60.31, + "learning_rate": 0.0002109473684210526, + "loss": 0.0021, + "step": 4041 + }, + { + "epoch": 60.33, + "learning_rate": 0.00021091228070175435, + "loss": 0.0255, + "step": 4042 + }, + { + "epoch": 60.34, + "learning_rate": 0.00021087719298245613, + "loss": 0.1407, + "step": 4043 + }, + { + "epoch": 60.36, + "learning_rate": 0.00021084210526315788, + "loss": 0.0017, + "step": 4044 + }, + { + "epoch": 60.37, + "learning_rate": 0.00021080701754385962, + "loss": 0.0023, + "step": 4045 + }, + { + "epoch": 60.39, + "learning_rate": 0.00021077192982456137, + "loss": 0.2124, + "step": 4046 + }, + { + "epoch": 60.4, + "learning_rate": 0.00021073684210526315, + "loss": 0.0597, + "step": 4047 + }, + { + "epoch": 60.42, + "learning_rate": 0.0002107017543859649, + "loss": 0.002, + "step": 4048 + }, + { + "epoch": 60.43, + "learning_rate": 0.00021066666666666665, + "loss": 0.1701, + "step": 4049 + }, + { + "epoch": 60.45, + "learning_rate": 0.00021063157894736842, + "loss": 0.0052, + "step": 4050 + }, + { + "epoch": 60.46, + "learning_rate": 0.00021059649122807017, + "loss": 0.0016, + "step": 4051 + }, + { + "epoch": 60.48, + "learning_rate": 0.00021056140350877192, + "loss": 0.0016, + "step": 4052 + }, + { + "epoch": 60.49, + "learning_rate": 0.00021052631578947364, + "loss": 0.0013, + "step": 4053 + }, + { + "epoch": 60.51, + "learning_rate": 0.00021049122807017544, + "loss": 0.0026, + "step": 4054 + }, + { + "epoch": 60.52, + "learning_rate": 0.00021045614035087717, + "loss": 0.0104, + "step": 4055 + }, + { + "epoch": 60.54, + "learning_rate": 0.00021042105263157891, + "loss": 0.0041, + "step": 4056 + }, + { + "epoch": 60.55, + "learning_rate": 0.00021038596491228066, + "loss": 0.022, + "step": 4057 + }, + { + "epoch": 60.57, + "learning_rate": 0.00021035087719298244, + "loss": 0.3154, + "step": 4058 + }, + { + "epoch": 60.58, + "learning_rate": 0.0002103157894736842, + "loss": 0.0074, + "step": 4059 + }, + { + "epoch": 60.59, + "learning_rate": 0.00021028070175438594, + "loss": 0.0018, + "step": 4060 + }, + { + "epoch": 60.61, + "learning_rate": 0.0002102456140350877, + "loss": 0.1199, + "step": 4061 + }, + { + "epoch": 60.62, + "learning_rate": 0.00021021052631578946, + "loss": 0.0353, + "step": 4062 + }, + { + "epoch": 60.64, + "learning_rate": 0.0002101754385964912, + "loss": 0.011, + "step": 4063 + }, + { + "epoch": 60.65, + "learning_rate": 0.00021014035087719296, + "loss": 0.0122, + "step": 4064 + }, + { + "epoch": 60.67, + "learning_rate": 0.00021010526315789474, + "loss": 0.0019, + "step": 4065 + }, + { + "epoch": 60.68, + "learning_rate": 0.00021007017543859648, + "loss": 0.2162, + "step": 4066 + }, + { + "epoch": 60.7, + "learning_rate": 0.00021003508771929823, + "loss": 0.0015, + "step": 4067 + }, + { + "epoch": 60.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.0029, + "step": 4068 + }, + { + "epoch": 60.73, + "learning_rate": 0.00020996491228070176, + "loss": 0.0676, + "step": 4069 + }, + { + "epoch": 60.74, + "learning_rate": 0.00020992982456140348, + "loss": 0.0236, + "step": 4070 + }, + { + "epoch": 60.76, + "learning_rate": 0.00020989473684210523, + "loss": 0.2489, + "step": 4071 + }, + { + "epoch": 60.77, + "learning_rate": 0.00020985964912280698, + "loss": 0.0363, + "step": 4072 + }, + { + "epoch": 60.79, + "learning_rate": 0.00020982456140350875, + "loss": 0.0253, + "step": 4073 + }, + { + "epoch": 60.8, + "learning_rate": 0.0002097894736842105, + "loss": 0.111, + "step": 4074 + }, + { + "epoch": 60.82, + "learning_rate": 0.00020975438596491225, + "loss": 0.4341, + "step": 4075 + }, + { + "epoch": 60.83, + "learning_rate": 0.00020971929824561403, + "loss": 0.157, + "step": 4076 + }, + { + "epoch": 60.85, + "learning_rate": 0.00020968421052631577, + "loss": 0.0185, + "step": 4077 + }, + { + "epoch": 60.86, + "learning_rate": 0.00020964912280701752, + "loss": 0.1298, + "step": 4078 + }, + { + "epoch": 60.88, + "learning_rate": 0.00020961403508771927, + "loss": 0.1157, + "step": 4079 + }, + { + "epoch": 60.89, + "learning_rate": 0.00020957894736842105, + "loss": 0.0145, + "step": 4080 + }, + { + "epoch": 60.91, + "learning_rate": 0.0002095438596491228, + "loss": 0.0021, + "step": 4081 + }, + { + "epoch": 60.92, + "learning_rate": 0.00020950877192982455, + "loss": 0.0042, + "step": 4082 + }, + { + "epoch": 60.94, + "learning_rate": 0.0002094736842105263, + "loss": 0.1683, + "step": 4083 + }, + { + "epoch": 60.95, + "learning_rate": 0.00020943859649122807, + "loss": 0.3351, + "step": 4084 + }, + { + "epoch": 60.97, + "learning_rate": 0.00020940350877192982, + "loss": 0.2813, + "step": 4085 + }, + { + "epoch": 60.98, + "learning_rate": 0.00020936842105263154, + "loss": 0.0018, + "step": 4086 + }, + { + "epoch": 61.0, + "learning_rate": 0.00020933333333333334, + "loss": 0.1741, + "step": 4087 + }, + { + "epoch": 61.01, + "learning_rate": 0.00020929824561403507, + "loss": 0.0651, + "step": 4088 + }, + { + "epoch": 61.03, + "learning_rate": 0.00020926315789473681, + "loss": 0.2118, + "step": 4089 + }, + { + "epoch": 61.04, + "learning_rate": 0.00020922807017543856, + "loss": 0.003, + "step": 4090 + }, + { + "epoch": 61.06, + "learning_rate": 0.00020919298245614034, + "loss": 0.1208, + "step": 4091 + }, + { + "epoch": 61.07, + "learning_rate": 0.0002091578947368421, + "loss": 0.0036, + "step": 4092 + }, + { + "epoch": 61.09, + "learning_rate": 0.00020912280701754384, + "loss": 0.1213, + "step": 4093 + }, + { + "epoch": 61.1, + "learning_rate": 0.00020908771929824559, + "loss": 0.0017, + "step": 4094 + }, + { + "epoch": 61.12, + "learning_rate": 0.00020905263157894736, + "loss": 0.0023, + "step": 4095 + }, + { + "epoch": 61.13, + "learning_rate": 0.0002090175438596491, + "loss": 0.0016, + "step": 4096 + }, + { + "epoch": 61.15, + "learning_rate": 0.00020898245614035086, + "loss": 0.1565, + "step": 4097 + }, + { + "epoch": 61.16, + "learning_rate": 0.0002089473684210526, + "loss": 0.0819, + "step": 4098 + }, + { + "epoch": 61.18, + "learning_rate": 0.00020891228070175438, + "loss": 0.0196, + "step": 4099 + }, + { + "epoch": 61.19, + "learning_rate": 0.00020887719298245613, + "loss": 0.0111, + "step": 4100 + }, + { + "epoch": 61.21, + "learning_rate": 0.00020884210526315785, + "loss": 0.0028, + "step": 4101 + }, + { + "epoch": 61.22, + "learning_rate": 0.00020880701754385966, + "loss": 0.0065, + "step": 4102 + }, + { + "epoch": 61.24, + "learning_rate": 0.00020877192982456138, + "loss": 0.0015, + "step": 4103 + }, + { + "epoch": 61.25, + "learning_rate": 0.00020873684210526313, + "loss": 0.3828, + "step": 4104 + }, + { + "epoch": 61.27, + "learning_rate": 0.00020870175438596488, + "loss": 0.295, + "step": 4105 + }, + { + "epoch": 61.28, + "learning_rate": 0.00020866666666666665, + "loss": 0.3663, + "step": 4106 + }, + { + "epoch": 61.3, + "learning_rate": 0.0002086315789473684, + "loss": 0.0094, + "step": 4107 + }, + { + "epoch": 61.31, + "learning_rate": 0.00020859649122807015, + "loss": 0.0764, + "step": 4108 + }, + { + "epoch": 61.33, + "learning_rate": 0.0002085614035087719, + "loss": 0.2064, + "step": 4109 + }, + { + "epoch": 61.34, + "learning_rate": 0.00020852631578947367, + "loss": 0.2032, + "step": 4110 + }, + { + "epoch": 61.36, + "learning_rate": 0.00020849122807017542, + "loss": 0.0028, + "step": 4111 + }, + { + "epoch": 61.37, + "learning_rate": 0.00020845614035087717, + "loss": 0.247, + "step": 4112 + }, + { + "epoch": 61.39, + "learning_rate": 0.00020842105263157895, + "loss": 0.0929, + "step": 4113 + }, + { + "epoch": 61.4, + "learning_rate": 0.0002083859649122807, + "loss": 0.1304, + "step": 4114 + }, + { + "epoch": 61.42, + "learning_rate": 0.00020835087719298245, + "loss": 0.0107, + "step": 4115 + }, + { + "epoch": 61.43, + "learning_rate": 0.00020831578947368417, + "loss": 0.0028, + "step": 4116 + }, + { + "epoch": 61.45, + "learning_rate": 0.00020828070175438597, + "loss": 0.0404, + "step": 4117 + }, + { + "epoch": 61.46, + "learning_rate": 0.0002082456140350877, + "loss": 0.0027, + "step": 4118 + }, + { + "epoch": 61.48, + "learning_rate": 0.00020821052631578944, + "loss": 0.0116, + "step": 4119 + }, + { + "epoch": 61.49, + "learning_rate": 0.0002081754385964912, + "loss": 0.1348, + "step": 4120 + }, + { + "epoch": 61.51, + "learning_rate": 0.00020814035087719296, + "loss": 0.2591, + "step": 4121 + }, + { + "epoch": 61.52, + "learning_rate": 0.00020810526315789471, + "loss": 0.0318, + "step": 4122 + }, + { + "epoch": 61.54, + "learning_rate": 0.00020807017543859646, + "loss": 0.004, + "step": 4123 + }, + { + "epoch": 61.55, + "learning_rate": 0.00020803508771929824, + "loss": 0.1941, + "step": 4124 + }, + { + "epoch": 61.57, + "learning_rate": 0.000208, + "loss": 0.315, + "step": 4125 + }, + { + "epoch": 61.58, + "learning_rate": 0.00020796491228070174, + "loss": 0.5173, + "step": 4126 + }, + { + "epoch": 61.59, + "learning_rate": 0.00020792982456140348, + "loss": 0.0103, + "step": 4127 + }, + { + "epoch": 61.61, + "learning_rate": 0.00020789473684210526, + "loss": 0.0171, + "step": 4128 + }, + { + "epoch": 61.62, + "learning_rate": 0.000207859649122807, + "loss": 0.044, + "step": 4129 + }, + { + "epoch": 61.64, + "learning_rate": 0.00020782456140350876, + "loss": 0.1884, + "step": 4130 + }, + { + "epoch": 61.65, + "learning_rate": 0.0002077894736842105, + "loss": 0.0724, + "step": 4131 + }, + { + "epoch": 61.67, + "learning_rate": 0.00020775438596491228, + "loss": 0.1644, + "step": 4132 + }, + { + "epoch": 61.68, + "learning_rate": 0.00020771929824561403, + "loss": 0.0115, + "step": 4133 + }, + { + "epoch": 61.7, + "learning_rate": 0.00020768421052631575, + "loss": 0.0602, + "step": 4134 + }, + { + "epoch": 61.71, + "learning_rate": 0.0002076491228070175, + "loss": 0.1779, + "step": 4135 + }, + { + "epoch": 61.73, + "learning_rate": 0.00020761403508771928, + "loss": 0.3983, + "step": 4136 + }, + { + "epoch": 61.74, + "learning_rate": 0.00020757894736842103, + "loss": 0.034, + "step": 4137 + }, + { + "epoch": 61.76, + "learning_rate": 0.00020754385964912278, + "loss": 0.016, + "step": 4138 + }, + { + "epoch": 61.77, + "learning_rate": 0.00020750877192982455, + "loss": 0.0098, + "step": 4139 + }, + { + "epoch": 61.79, + "learning_rate": 0.0002074736842105263, + "loss": 0.1986, + "step": 4140 + }, + { + "epoch": 61.8, + "learning_rate": 0.00020743859649122805, + "loss": 0.2418, + "step": 4141 + }, + { + "epoch": 61.82, + "learning_rate": 0.0002074035087719298, + "loss": 0.156, + "step": 4142 + }, + { + "epoch": 61.83, + "learning_rate": 0.00020736842105263157, + "loss": 0.0444, + "step": 4143 + }, + { + "epoch": 61.85, + "learning_rate": 0.00020733333333333332, + "loss": 0.2965, + "step": 4144 + }, + { + "epoch": 61.86, + "learning_rate": 0.00020729824561403507, + "loss": 0.0129, + "step": 4145 + }, + { + "epoch": 61.88, + "learning_rate": 0.00020726315789473682, + "loss": 0.1533, + "step": 4146 + }, + { + "epoch": 61.89, + "learning_rate": 0.0002072280701754386, + "loss": 0.0443, + "step": 4147 + }, + { + "epoch": 61.91, + "learning_rate": 0.00020719298245614034, + "loss": 0.2175, + "step": 4148 + }, + { + "epoch": 61.92, + "learning_rate": 0.00020715789473684207, + "loss": 0.0256, + "step": 4149 + }, + { + "epoch": 61.94, + "learning_rate": 0.00020712280701754387, + "loss": 0.2934, + "step": 4150 + }, + { + "epoch": 61.95, + "learning_rate": 0.0002070877192982456, + "loss": 0.0082, + "step": 4151 + }, + { + "epoch": 61.97, + "learning_rate": 0.00020705263157894734, + "loss": 0.2396, + "step": 4152 + }, + { + "epoch": 61.98, + "learning_rate": 0.0002070175438596491, + "loss": 0.0225, + "step": 4153 + }, + { + "epoch": 62.0, + "learning_rate": 0.00020698245614035086, + "loss": 0.0113, + "step": 4154 + }, + { + "epoch": 62.01, + "learning_rate": 0.0002069473684210526, + "loss": 0.0102, + "step": 4155 + }, + { + "epoch": 62.03, + "learning_rate": 0.00020691228070175436, + "loss": 0.1027, + "step": 4156 + }, + { + "epoch": 62.04, + "learning_rate": 0.0002068771929824561, + "loss": 0.0548, + "step": 4157 + }, + { + "epoch": 62.06, + "learning_rate": 0.0002068421052631579, + "loss": 0.0603, + "step": 4158 + }, + { + "epoch": 62.07, + "learning_rate": 0.00020680701754385964, + "loss": 0.1559, + "step": 4159 + }, + { + "epoch": 62.09, + "learning_rate": 0.00020677192982456138, + "loss": 0.0041, + "step": 4160 + }, + { + "epoch": 62.1, + "learning_rate": 0.00020673684210526316, + "loss": 0.2439, + "step": 4161 + }, + { + "epoch": 62.12, + "learning_rate": 0.0002067017543859649, + "loss": 0.055, + "step": 4162 + }, + { + "epoch": 62.13, + "learning_rate": 0.00020666666666666666, + "loss": 0.009, + "step": 4163 + }, + { + "epoch": 62.15, + "learning_rate": 0.00020663157894736838, + "loss": 0.0038, + "step": 4164 + }, + { + "epoch": 62.16, + "learning_rate": 0.00020659649122807018, + "loss": 0.0186, + "step": 4165 + }, + { + "epoch": 62.18, + "learning_rate": 0.0002065614035087719, + "loss": 0.1261, + "step": 4166 + }, + { + "epoch": 62.19, + "learning_rate": 0.00020652631578947365, + "loss": 0.0824, + "step": 4167 + }, + { + "epoch": 62.21, + "learning_rate": 0.0002064912280701754, + "loss": 0.0174, + "step": 4168 + }, + { + "epoch": 62.22, + "learning_rate": 0.00020645614035087718, + "loss": 0.0025, + "step": 4169 + }, + { + "epoch": 62.24, + "learning_rate": 0.00020642105263157893, + "loss": 0.0024, + "step": 4170 + }, + { + "epoch": 62.25, + "learning_rate": 0.00020638596491228067, + "loss": 0.1987, + "step": 4171 + }, + { + "epoch": 62.27, + "learning_rate": 0.00020635087719298242, + "loss": 0.2195, + "step": 4172 + }, + { + "epoch": 62.28, + "learning_rate": 0.0002063157894736842, + "loss": 0.2643, + "step": 4173 + }, + { + "epoch": 62.3, + "learning_rate": 0.00020628070175438595, + "loss": 0.1246, + "step": 4174 + }, + { + "epoch": 62.31, + "learning_rate": 0.0002062456140350877, + "loss": 0.0093, + "step": 4175 + }, + { + "epoch": 62.33, + "learning_rate": 0.00020621052631578947, + "loss": 0.0032, + "step": 4176 + }, + { + "epoch": 62.34, + "learning_rate": 0.00020617543859649122, + "loss": 0.0156, + "step": 4177 + }, + { + "epoch": 62.36, + "learning_rate": 0.00020614035087719297, + "loss": 0.0021, + "step": 4178 + }, + { + "epoch": 62.37, + "learning_rate": 0.00020610526315789472, + "loss": 0.011, + "step": 4179 + }, + { + "epoch": 62.39, + "learning_rate": 0.0002060701754385965, + "loss": 0.0111, + "step": 4180 + }, + { + "epoch": 62.4, + "learning_rate": 0.00020603508771929824, + "loss": 0.0531, + "step": 4181 + }, + { + "epoch": 62.42, + "learning_rate": 0.00020599999999999997, + "loss": 0.0025, + "step": 4182 + }, + { + "epoch": 62.43, + "learning_rate": 0.00020596491228070171, + "loss": 0.0019, + "step": 4183 + }, + { + "epoch": 62.45, + "learning_rate": 0.0002059298245614035, + "loss": 0.1355, + "step": 4184 + }, + { + "epoch": 62.46, + "learning_rate": 0.00020589473684210524, + "loss": 0.1004, + "step": 4185 + }, + { + "epoch": 62.48, + "learning_rate": 0.000205859649122807, + "loss": 0.2385, + "step": 4186 + }, + { + "epoch": 62.49, + "learning_rate": 0.00020582456140350876, + "loss": 0.0027, + "step": 4187 + }, + { + "epoch": 62.51, + "learning_rate": 0.0002057894736842105, + "loss": 0.0032, + "step": 4188 + }, + { + "epoch": 62.52, + "learning_rate": 0.00020575438596491226, + "loss": 0.0206, + "step": 4189 + }, + { + "epoch": 62.54, + "learning_rate": 0.000205719298245614, + "loss": 0.0684, + "step": 4190 + }, + { + "epoch": 62.55, + "learning_rate": 0.00020568421052631579, + "loss": 0.0065, + "step": 4191 + }, + { + "epoch": 62.57, + "learning_rate": 0.00020564912280701753, + "loss": 0.0411, + "step": 4192 + }, + { + "epoch": 62.58, + "learning_rate": 0.00020561403508771928, + "loss": 0.1708, + "step": 4193 + }, + { + "epoch": 62.59, + "learning_rate": 0.00020557894736842103, + "loss": 0.0696, + "step": 4194 + }, + { + "epoch": 62.61, + "learning_rate": 0.0002055438596491228, + "loss": 0.111, + "step": 4195 + }, + { + "epoch": 62.62, + "learning_rate": 0.00020550877192982456, + "loss": 0.2224, + "step": 4196 + }, + { + "epoch": 62.64, + "learning_rate": 0.00020547368421052628, + "loss": 0.0619, + "step": 4197 + }, + { + "epoch": 62.65, + "learning_rate": 0.00020543859649122803, + "loss": 0.1194, + "step": 4198 + }, + { + "epoch": 62.67, + "learning_rate": 0.0002054035087719298, + "loss": 0.1641, + "step": 4199 + }, + { + "epoch": 62.68, + "learning_rate": 0.00020536842105263155, + "loss": 0.0026, + "step": 4200 + }, + { + "epoch": 62.68, + "eval_accuracy": 0.8565834557023985, + "eval_f1": 0.8584555081928308, + "eval_loss": 0.6736618876457214, + "eval_runtime": 345.5411, + "eval_samples_per_second": 11.825, + "eval_steps_per_second": 0.741, + "step": 4200 + }, + { + "epoch": 62.7, + "learning_rate": 0.0002053333333333333, + "loss": 0.004, + "step": 4201 + }, + { + "epoch": 62.71, + "learning_rate": 0.00020529824561403508, + "loss": 0.2727, + "step": 4202 + }, + { + "epoch": 62.73, + "learning_rate": 0.00020526315789473683, + "loss": 0.0279, + "step": 4203 + }, + { + "epoch": 62.74, + "learning_rate": 0.00020522807017543857, + "loss": 0.0092, + "step": 4204 + }, + { + "epoch": 62.76, + "learning_rate": 0.00020519298245614032, + "loss": 0.1556, + "step": 4205 + }, + { + "epoch": 62.77, + "learning_rate": 0.0002051578947368421, + "loss": 0.1234, + "step": 4206 + }, + { + "epoch": 62.79, + "learning_rate": 0.00020512280701754385, + "loss": 0.0567, + "step": 4207 + }, + { + "epoch": 62.8, + "learning_rate": 0.0002050877192982456, + "loss": 0.0048, + "step": 4208 + }, + { + "epoch": 62.82, + "learning_rate": 0.00020505263157894735, + "loss": 0.1576, + "step": 4209 + }, + { + "epoch": 62.83, + "learning_rate": 0.00020501754385964912, + "loss": 0.0604, + "step": 4210 + }, + { + "epoch": 62.85, + "learning_rate": 0.00020498245614035087, + "loss": 0.0071, + "step": 4211 + }, + { + "epoch": 62.86, + "learning_rate": 0.0002049473684210526, + "loss": 0.189, + "step": 4212 + }, + { + "epoch": 62.88, + "learning_rate": 0.0002049122807017544, + "loss": 0.0091, + "step": 4213 + }, + { + "epoch": 62.89, + "learning_rate": 0.00020487719298245612, + "loss": 0.1073, + "step": 4214 + }, + { + "epoch": 62.91, + "learning_rate": 0.00020484210526315787, + "loss": 0.0408, + "step": 4215 + }, + { + "epoch": 62.92, + "learning_rate": 0.00020480701754385961, + "loss": 0.1842, + "step": 4216 + }, + { + "epoch": 62.94, + "learning_rate": 0.0002047719298245614, + "loss": 0.0559, + "step": 4217 + }, + { + "epoch": 62.95, + "learning_rate": 0.00020473684210526314, + "loss": 0.0073, + "step": 4218 + }, + { + "epoch": 62.97, + "learning_rate": 0.0002047017543859649, + "loss": 0.071, + "step": 4219 + }, + { + "epoch": 62.98, + "learning_rate": 0.00020466666666666664, + "loss": 0.0027, + "step": 4220 + }, + { + "epoch": 63.0, + "learning_rate": 0.0002046315789473684, + "loss": 0.0257, + "step": 4221 + }, + { + "epoch": 63.01, + "learning_rate": 0.00020459649122807016, + "loss": 0.0126, + "step": 4222 + }, + { + "epoch": 63.03, + "learning_rate": 0.0002045614035087719, + "loss": 0.1263, + "step": 4223 + }, + { + "epoch": 63.04, + "learning_rate": 0.00020452631578947369, + "loss": 0.0039, + "step": 4224 + }, + { + "epoch": 63.06, + "learning_rate": 0.00020449122807017543, + "loss": 0.0579, + "step": 4225 + }, + { + "epoch": 63.07, + "learning_rate": 0.00020445614035087718, + "loss": 0.0764, + "step": 4226 + }, + { + "epoch": 63.09, + "learning_rate": 0.00020442105263157893, + "loss": 0.1427, + "step": 4227 + }, + { + "epoch": 63.1, + "learning_rate": 0.0002043859649122807, + "loss": 0.0591, + "step": 4228 + }, + { + "epoch": 63.12, + "learning_rate": 0.00020435087719298246, + "loss": 0.2312, + "step": 4229 + }, + { + "epoch": 63.13, + "learning_rate": 0.00020431578947368418, + "loss": 0.0047, + "step": 4230 + }, + { + "epoch": 63.15, + "learning_rate": 0.00020428070175438593, + "loss": 0.1699, + "step": 4231 + }, + { + "epoch": 63.16, + "learning_rate": 0.0002042456140350877, + "loss": 0.0909, + "step": 4232 + }, + { + "epoch": 63.18, + "learning_rate": 0.00020421052631578945, + "loss": 0.0058, + "step": 4233 + }, + { + "epoch": 63.19, + "learning_rate": 0.0002041754385964912, + "loss": 0.0023, + "step": 4234 + }, + { + "epoch": 63.21, + "learning_rate": 0.00020414035087719295, + "loss": 0.0038, + "step": 4235 + }, + { + "epoch": 63.22, + "learning_rate": 0.00020410526315789472, + "loss": 0.0051, + "step": 4236 + }, + { + "epoch": 63.24, + "learning_rate": 0.00020407017543859647, + "loss": 0.0402, + "step": 4237 + }, + { + "epoch": 63.25, + "learning_rate": 0.00020403508771929822, + "loss": 0.0693, + "step": 4238 + }, + { + "epoch": 63.27, + "learning_rate": 0.000204, + "loss": 0.0375, + "step": 4239 + }, + { + "epoch": 63.28, + "learning_rate": 0.00020396491228070175, + "loss": 0.0099, + "step": 4240 + }, + { + "epoch": 63.3, + "learning_rate": 0.0002039298245614035, + "loss": 0.194, + "step": 4241 + }, + { + "epoch": 63.31, + "learning_rate": 0.00020389473684210524, + "loss": 0.0048, + "step": 4242 + }, + { + "epoch": 63.33, + "learning_rate": 0.00020385964912280702, + "loss": 0.2181, + "step": 4243 + }, + { + "epoch": 63.34, + "learning_rate": 0.00020382456140350877, + "loss": 0.0991, + "step": 4244 + }, + { + "epoch": 63.36, + "learning_rate": 0.0002037894736842105, + "loss": 0.1155, + "step": 4245 + }, + { + "epoch": 63.37, + "learning_rate": 0.00020375438596491224, + "loss": 0.0227, + "step": 4246 + }, + { + "epoch": 63.39, + "learning_rate": 0.00020371929824561402, + "loss": 0.2667, + "step": 4247 + }, + { + "epoch": 63.4, + "learning_rate": 0.00020368421052631576, + "loss": 0.0338, + "step": 4248 + }, + { + "epoch": 63.42, + "learning_rate": 0.0002036491228070175, + "loss": 0.0102, + "step": 4249 + }, + { + "epoch": 63.43, + "learning_rate": 0.0002036140350877193, + "loss": 0.0171, + "step": 4250 + }, + { + "epoch": 63.45, + "learning_rate": 0.00020357894736842104, + "loss": 0.0062, + "step": 4251 + }, + { + "epoch": 63.46, + "learning_rate": 0.0002035438596491228, + "loss": 0.0692, + "step": 4252 + }, + { + "epoch": 63.48, + "learning_rate": 0.00020350877192982454, + "loss": 0.0126, + "step": 4253 + }, + { + "epoch": 63.49, + "learning_rate": 0.0002034736842105263, + "loss": 0.1374, + "step": 4254 + }, + { + "epoch": 63.51, + "learning_rate": 0.00020343859649122806, + "loss": 0.0052, + "step": 4255 + }, + { + "epoch": 63.52, + "learning_rate": 0.0002034035087719298, + "loss": 0.0199, + "step": 4256 + }, + { + "epoch": 63.54, + "learning_rate": 0.00020336842105263156, + "loss": 0.0057, + "step": 4257 + }, + { + "epoch": 63.55, + "learning_rate": 0.00020333333333333333, + "loss": 0.0557, + "step": 4258 + }, + { + "epoch": 63.57, + "learning_rate": 0.00020329824561403508, + "loss": 0.2364, + "step": 4259 + }, + { + "epoch": 63.58, + "learning_rate": 0.0002032631578947368, + "loss": 0.0175, + "step": 4260 + }, + { + "epoch": 63.59, + "learning_rate": 0.00020322807017543855, + "loss": 0.0065, + "step": 4261 + }, + { + "epoch": 63.61, + "learning_rate": 0.00020319298245614033, + "loss": 0.0044, + "step": 4262 + }, + { + "epoch": 63.62, + "learning_rate": 0.00020315789473684208, + "loss": 0.0074, + "step": 4263 + }, + { + "epoch": 63.64, + "learning_rate": 0.00020312280701754383, + "loss": 0.0929, + "step": 4264 + }, + { + "epoch": 63.65, + "learning_rate": 0.0002030877192982456, + "loss": 0.3596, + "step": 4265 + }, + { + "epoch": 63.67, + "learning_rate": 0.00020305263157894735, + "loss": 0.051, + "step": 4266 + }, + { + "epoch": 63.68, + "learning_rate": 0.0002030175438596491, + "loss": 0.038, + "step": 4267 + }, + { + "epoch": 63.7, + "learning_rate": 0.00020298245614035085, + "loss": 0.0597, + "step": 4268 + }, + { + "epoch": 63.71, + "learning_rate": 0.00020294736842105262, + "loss": 0.1669, + "step": 4269 + }, + { + "epoch": 63.73, + "learning_rate": 0.00020291228070175437, + "loss": 0.0057, + "step": 4270 + }, + { + "epoch": 63.74, + "learning_rate": 0.00020287719298245612, + "loss": 0.0087, + "step": 4271 + }, + { + "epoch": 63.76, + "learning_rate": 0.00020284210526315787, + "loss": 0.2221, + "step": 4272 + }, + { + "epoch": 63.77, + "learning_rate": 0.00020280701754385965, + "loss": 0.0157, + "step": 4273 + }, + { + "epoch": 63.79, + "learning_rate": 0.0002027719298245614, + "loss": 0.2365, + "step": 4274 + }, + { + "epoch": 63.8, + "learning_rate": 0.00020273684210526314, + "loss": 0.01, + "step": 4275 + }, + { + "epoch": 63.82, + "learning_rate": 0.00020270175438596492, + "loss": 0.0646, + "step": 4276 + }, + { + "epoch": 63.83, + "learning_rate": 0.00020266666666666664, + "loss": 0.0485, + "step": 4277 + }, + { + "epoch": 63.85, + "learning_rate": 0.0002026315789473684, + "loss": 0.0316, + "step": 4278 + }, + { + "epoch": 63.86, + "learning_rate": 0.00020259649122807014, + "loss": 0.0029, + "step": 4279 + }, + { + "epoch": 63.88, + "learning_rate": 0.00020256140350877192, + "loss": 0.002, + "step": 4280 + }, + { + "epoch": 63.89, + "learning_rate": 0.00020252631578947366, + "loss": 0.0292, + "step": 4281 + }, + { + "epoch": 63.91, + "learning_rate": 0.0002024912280701754, + "loss": 0.0059, + "step": 4282 + }, + { + "epoch": 63.92, + "learning_rate": 0.00020245614035087716, + "loss": 0.0683, + "step": 4283 + }, + { + "epoch": 63.94, + "learning_rate": 0.00020242105263157894, + "loss": 0.0021, + "step": 4284 + }, + { + "epoch": 63.95, + "learning_rate": 0.00020238596491228069, + "loss": 0.0025, + "step": 4285 + }, + { + "epoch": 63.97, + "learning_rate": 0.00020235087719298243, + "loss": 0.2327, + "step": 4286 + }, + { + "epoch": 63.98, + "learning_rate": 0.0002023157894736842, + "loss": 0.2651, + "step": 4287 + }, + { + "epoch": 64.0, + "learning_rate": 0.00020228070175438596, + "loss": 0.0564, + "step": 4288 + }, + { + "epoch": 64.01, + "learning_rate": 0.0002022456140350877, + "loss": 0.1241, + "step": 4289 + }, + { + "epoch": 64.03, + "learning_rate": 0.00020221052631578946, + "loss": 0.1026, + "step": 4290 + }, + { + "epoch": 64.04, + "learning_rate": 0.00020217543859649123, + "loss": 0.0335, + "step": 4291 + }, + { + "epoch": 64.06, + "learning_rate": 0.00020214035087719298, + "loss": 0.0337, + "step": 4292 + }, + { + "epoch": 64.07, + "learning_rate": 0.0002021052631578947, + "loss": 0.0045, + "step": 4293 + }, + { + "epoch": 64.09, + "learning_rate": 0.00020207017543859645, + "loss": 0.0046, + "step": 4294 + }, + { + "epoch": 64.1, + "learning_rate": 0.00020203508771929823, + "loss": 0.0291, + "step": 4295 + }, + { + "epoch": 64.12, + "learning_rate": 0.00020199999999999998, + "loss": 0.0063, + "step": 4296 + }, + { + "epoch": 64.13, + "learning_rate": 0.00020196491228070173, + "loss": 0.0019, + "step": 4297 + }, + { + "epoch": 64.15, + "learning_rate": 0.00020192982456140347, + "loss": 0.0518, + "step": 4298 + }, + { + "epoch": 64.16, + "learning_rate": 0.00020189473684210525, + "loss": 0.0032, + "step": 4299 + }, + { + "epoch": 64.18, + "learning_rate": 0.000201859649122807, + "loss": 0.0083, + "step": 4300 + }, + { + "epoch": 64.19, + "learning_rate": 0.00020182456140350875, + "loss": 0.0019, + "step": 4301 + }, + { + "epoch": 64.21, + "learning_rate": 0.00020178947368421052, + "loss": 0.0107, + "step": 4302 + }, + { + "epoch": 64.22, + "learning_rate": 0.00020175438596491227, + "loss": 0.0465, + "step": 4303 + }, + { + "epoch": 64.24, + "learning_rate": 0.00020171929824561402, + "loss": 0.0032, + "step": 4304 + }, + { + "epoch": 64.25, + "learning_rate": 0.00020168421052631577, + "loss": 0.0136, + "step": 4305 + }, + { + "epoch": 64.27, + "learning_rate": 0.00020164912280701755, + "loss": 0.1822, + "step": 4306 + }, + { + "epoch": 64.28, + "learning_rate": 0.0002016140350877193, + "loss": 0.022, + "step": 4307 + }, + { + "epoch": 64.3, + "learning_rate": 0.00020157894736842102, + "loss": 0.004, + "step": 4308 + }, + { + "epoch": 64.31, + "learning_rate": 0.00020154385964912277, + "loss": 0.2661, + "step": 4309 + }, + { + "epoch": 64.33, + "learning_rate": 0.00020150877192982454, + "loss": 0.632, + "step": 4310 + }, + { + "epoch": 64.34, + "learning_rate": 0.0002014736842105263, + "loss": 0.0027, + "step": 4311 + }, + { + "epoch": 64.36, + "learning_rate": 0.00020143859649122804, + "loss": 0.0271, + "step": 4312 + }, + { + "epoch": 64.37, + "learning_rate": 0.00020140350877192981, + "loss": 0.0028, + "step": 4313 + }, + { + "epoch": 64.39, + "learning_rate": 0.00020136842105263156, + "loss": 0.0025, + "step": 4314 + }, + { + "epoch": 64.4, + "learning_rate": 0.0002013333333333333, + "loss": 0.0028, + "step": 4315 + }, + { + "epoch": 64.42, + "learning_rate": 0.00020129824561403506, + "loss": 0.029, + "step": 4316 + }, + { + "epoch": 64.43, + "learning_rate": 0.00020126315789473684, + "loss": 0.0315, + "step": 4317 + }, + { + "epoch": 64.45, + "learning_rate": 0.00020122807017543859, + "loss": 0.2121, + "step": 4318 + }, + { + "epoch": 64.46, + "learning_rate": 0.00020119298245614033, + "loss": 0.0234, + "step": 4319 + }, + { + "epoch": 64.48, + "learning_rate": 0.00020115789473684208, + "loss": 0.0018, + "step": 4320 + }, + { + "epoch": 64.49, + "learning_rate": 0.00020112280701754386, + "loss": 0.3627, + "step": 4321 + }, + { + "epoch": 64.51, + "learning_rate": 0.0002010877192982456, + "loss": 0.0109, + "step": 4322 + }, + { + "epoch": 64.52, + "learning_rate": 0.00020105263157894736, + "loss": 0.2671, + "step": 4323 + }, + { + "epoch": 64.54, + "learning_rate": 0.00020101754385964913, + "loss": 0.2699, + "step": 4324 + }, + { + "epoch": 64.55, + "learning_rate": 0.00020098245614035085, + "loss": 0.0016, + "step": 4325 + }, + { + "epoch": 64.57, + "learning_rate": 0.0002009473684210526, + "loss": 0.1817, + "step": 4326 + }, + { + "epoch": 64.58, + "learning_rate": 0.00020091228070175435, + "loss": 0.1634, + "step": 4327 + }, + { + "epoch": 64.59, + "learning_rate": 0.00020087719298245613, + "loss": 0.0018, + "step": 4328 + }, + { + "epoch": 64.61, + "learning_rate": 0.00020084210526315788, + "loss": 0.0023, + "step": 4329 + }, + { + "epoch": 64.62, + "learning_rate": 0.00020080701754385963, + "loss": 0.0434, + "step": 4330 + }, + { + "epoch": 64.64, + "learning_rate": 0.00020077192982456137, + "loss": 0.1258, + "step": 4331 + }, + { + "epoch": 64.65, + "learning_rate": 0.00020073684210526315, + "loss": 0.0021, + "step": 4332 + }, + { + "epoch": 64.67, + "learning_rate": 0.0002007017543859649, + "loss": 0.0041, + "step": 4333 + }, + { + "epoch": 64.68, + "learning_rate": 0.00020066666666666665, + "loss": 0.0288, + "step": 4334 + }, + { + "epoch": 64.7, + "learning_rate": 0.0002006315789473684, + "loss": 0.0029, + "step": 4335 + }, + { + "epoch": 64.71, + "learning_rate": 0.00020059649122807017, + "loss": 0.0031, + "step": 4336 + }, + { + "epoch": 64.73, + "learning_rate": 0.00020056140350877192, + "loss": 0.0202, + "step": 4337 + }, + { + "epoch": 64.74, + "learning_rate": 0.00020052631578947367, + "loss": 0.007, + "step": 4338 + }, + { + "epoch": 64.76, + "learning_rate": 0.00020049122807017545, + "loss": 0.0825, + "step": 4339 + }, + { + "epoch": 64.77, + "learning_rate": 0.0002004561403508772, + "loss": 0.073, + "step": 4340 + }, + { + "epoch": 64.79, + "learning_rate": 0.00020042105263157892, + "loss": 0.0015, + "step": 4341 + }, + { + "epoch": 64.8, + "learning_rate": 0.00020038596491228066, + "loss": 0.1713, + "step": 4342 + }, + { + "epoch": 64.82, + "learning_rate": 0.00020035087719298244, + "loss": 0.1075, + "step": 4343 + }, + { + "epoch": 64.83, + "learning_rate": 0.0002003157894736842, + "loss": 0.0029, + "step": 4344 + }, + { + "epoch": 64.85, + "learning_rate": 0.00020028070175438594, + "loss": 0.0301, + "step": 4345 + }, + { + "epoch": 64.86, + "learning_rate": 0.0002002456140350877, + "loss": 0.0017, + "step": 4346 + }, + { + "epoch": 64.88, + "learning_rate": 0.00020021052631578946, + "loss": 0.0022, + "step": 4347 + }, + { + "epoch": 64.89, + "learning_rate": 0.0002001754385964912, + "loss": 0.3129, + "step": 4348 + }, + { + "epoch": 64.91, + "learning_rate": 0.00020014035087719296, + "loss": 0.101, + "step": 4349 + }, + { + "epoch": 64.92, + "learning_rate": 0.00020010526315789474, + "loss": 0.0029, + "step": 4350 + }, + { + "epoch": 64.94, + "learning_rate": 0.00020007017543859648, + "loss": 0.008, + "step": 4351 + }, + { + "epoch": 64.95, + "learning_rate": 0.00020003508771929823, + "loss": 0.1807, + "step": 4352 + }, + { + "epoch": 64.97, + "learning_rate": 0.00019999999999999998, + "loss": 0.053, + "step": 4353 + }, + { + "epoch": 64.98, + "learning_rate": 0.00019996491228070176, + "loss": 0.1483, + "step": 4354 + }, + { + "epoch": 65.0, + "learning_rate": 0.0001999298245614035, + "loss": 0.0238, + "step": 4355 + }, + { + "epoch": 65.01, + "learning_rate": 0.00019989473684210523, + "loss": 0.0031, + "step": 4356 + }, + { + "epoch": 65.03, + "learning_rate": 0.00019985964912280698, + "loss": 0.0186, + "step": 4357 + }, + { + "epoch": 65.04, + "learning_rate": 0.00019982456140350875, + "loss": 0.04, + "step": 4358 + }, + { + "epoch": 65.06, + "learning_rate": 0.0001997894736842105, + "loss": 0.0081, + "step": 4359 + }, + { + "epoch": 65.07, + "learning_rate": 0.00019975438596491225, + "loss": 0.0024, + "step": 4360 + }, + { + "epoch": 65.09, + "learning_rate": 0.000199719298245614, + "loss": 0.0114, + "step": 4361 + }, + { + "epoch": 65.1, + "learning_rate": 0.00019968421052631578, + "loss": 0.0039, + "step": 4362 + }, + { + "epoch": 65.12, + "learning_rate": 0.00019964912280701752, + "loss": 0.0028, + "step": 4363 + }, + { + "epoch": 65.13, + "learning_rate": 0.00019961403508771927, + "loss": 0.0313, + "step": 4364 + }, + { + "epoch": 65.15, + "learning_rate": 0.00019957894736842105, + "loss": 0.2049, + "step": 4365 + }, + { + "epoch": 65.16, + "learning_rate": 0.0001995438596491228, + "loss": 0.3819, + "step": 4366 + }, + { + "epoch": 65.18, + "learning_rate": 0.00019950877192982455, + "loss": 0.0066, + "step": 4367 + }, + { + "epoch": 65.19, + "learning_rate": 0.0001994736842105263, + "loss": 0.0154, + "step": 4368 + }, + { + "epoch": 65.21, + "learning_rate": 0.00019943859649122807, + "loss": 0.039, + "step": 4369 + }, + { + "epoch": 65.22, + "learning_rate": 0.00019940350877192982, + "loss": 0.0363, + "step": 4370 + }, + { + "epoch": 65.24, + "learning_rate": 0.00019936842105263154, + "loss": 0.1389, + "step": 4371 + }, + { + "epoch": 65.25, + "learning_rate": 0.0001993333333333333, + "loss": 0.0658, + "step": 4372 + }, + { + "epoch": 65.27, + "learning_rate": 0.00019929824561403507, + "loss": 0.2018, + "step": 4373 + }, + { + "epoch": 65.28, + "learning_rate": 0.00019926315789473682, + "loss": 0.0042, + "step": 4374 + }, + { + "epoch": 65.3, + "learning_rate": 0.00019922807017543856, + "loss": 0.0155, + "step": 4375 + }, + { + "epoch": 65.31, + "learning_rate": 0.00019919298245614034, + "loss": 0.1579, + "step": 4376 + }, + { + "epoch": 65.33, + "learning_rate": 0.0001991578947368421, + "loss": 0.4092, + "step": 4377 + }, + { + "epoch": 65.34, + "learning_rate": 0.00019912280701754384, + "loss": 0.0113, + "step": 4378 + }, + { + "epoch": 65.36, + "learning_rate": 0.00019908771929824559, + "loss": 0.3527, + "step": 4379 + }, + { + "epoch": 65.37, + "learning_rate": 0.00019905263157894736, + "loss": 0.0811, + "step": 4380 + }, + { + "epoch": 65.39, + "learning_rate": 0.0001990175438596491, + "loss": 0.0931, + "step": 4381 + }, + { + "epoch": 65.4, + "learning_rate": 0.00019898245614035086, + "loss": 0.1252, + "step": 4382 + }, + { + "epoch": 65.42, + "learning_rate": 0.0001989473684210526, + "loss": 0.263, + "step": 4383 + }, + { + "epoch": 65.43, + "learning_rate": 0.00019891228070175438, + "loss": 0.0018, + "step": 4384 + }, + { + "epoch": 65.45, + "learning_rate": 0.00019887719298245613, + "loss": 0.0345, + "step": 4385 + }, + { + "epoch": 65.46, + "learning_rate": 0.00019884210526315788, + "loss": 0.2768, + "step": 4386 + }, + { + "epoch": 65.48, + "learning_rate": 0.00019880701754385966, + "loss": 0.0059, + "step": 4387 + }, + { + "epoch": 65.49, + "learning_rate": 0.0001987719298245614, + "loss": 0.2509, + "step": 4388 + }, + { + "epoch": 65.51, + "learning_rate": 0.00019873684210526313, + "loss": 0.02, + "step": 4389 + }, + { + "epoch": 65.52, + "learning_rate": 0.00019870175438596488, + "loss": 0.0966, + "step": 4390 + }, + { + "epoch": 65.54, + "learning_rate": 0.00019866666666666665, + "loss": 0.0251, + "step": 4391 + }, + { + "epoch": 65.55, + "learning_rate": 0.0001986315789473684, + "loss": 0.0032, + "step": 4392 + }, + { + "epoch": 65.57, + "learning_rate": 0.00019859649122807015, + "loss": 0.2893, + "step": 4393 + }, + { + "epoch": 65.58, + "learning_rate": 0.0001985614035087719, + "loss": 0.1985, + "step": 4394 + }, + { + "epoch": 65.59, + "learning_rate": 0.00019852631578947368, + "loss": 0.0492, + "step": 4395 + }, + { + "epoch": 65.61, + "learning_rate": 0.00019849122807017542, + "loss": 0.0081, + "step": 4396 + }, + { + "epoch": 65.62, + "learning_rate": 0.00019845614035087717, + "loss": 0.3234, + "step": 4397 + }, + { + "epoch": 65.64, + "learning_rate": 0.00019842105263157892, + "loss": 0.0271, + "step": 4398 + }, + { + "epoch": 65.65, + "learning_rate": 0.0001983859649122807, + "loss": 0.1399, + "step": 4399 + }, + { + "epoch": 65.67, + "learning_rate": 0.00019835087719298245, + "loss": 0.0043, + "step": 4400 + }, + { + "epoch": 65.67, + "eval_accuracy": 0.8387175721977485, + "eval_f1": 0.8415862353144246, + "eval_loss": 0.7780405879020691, + "eval_runtime": 345.1415, + "eval_samples_per_second": 11.839, + "eval_steps_per_second": 0.742, + "step": 4400 + }, + { + "epoch": 65.68, + "learning_rate": 0.0001983157894736842, + "loss": 0.1963, + "step": 4401 + }, + { + "epoch": 65.7, + "learning_rate": 0.00019828070175438597, + "loss": 0.3146, + "step": 4402 + }, + { + "epoch": 65.71, + "learning_rate": 0.00019824561403508772, + "loss": 0.0092, + "step": 4403 + }, + { + "epoch": 65.73, + "learning_rate": 0.00019821052631578944, + "loss": 0.0416, + "step": 4404 + }, + { + "epoch": 65.74, + "learning_rate": 0.0001981754385964912, + "loss": 0.2324, + "step": 4405 + }, + { + "epoch": 65.76, + "learning_rate": 0.00019814035087719297, + "loss": 0.1401, + "step": 4406 + }, + { + "epoch": 65.77, + "learning_rate": 0.00019810526315789471, + "loss": 0.0429, + "step": 4407 + }, + { + "epoch": 65.79, + "learning_rate": 0.00019807017543859646, + "loss": 0.1155, + "step": 4408 + }, + { + "epoch": 65.8, + "learning_rate": 0.0001980350877192982, + "loss": 0.1608, + "step": 4409 + }, + { + "epoch": 65.82, + "learning_rate": 0.000198, + "loss": 0.3258, + "step": 4410 + }, + { + "epoch": 65.83, + "learning_rate": 0.00019796491228070174, + "loss": 0.0427, + "step": 4411 + }, + { + "epoch": 65.85, + "learning_rate": 0.00019792982456140349, + "loss": 0.2753, + "step": 4412 + }, + { + "epoch": 65.86, + "learning_rate": 0.00019789473684210526, + "loss": 0.0221, + "step": 4413 + }, + { + "epoch": 65.88, + "learning_rate": 0.000197859649122807, + "loss": 0.0376, + "step": 4414 + }, + { + "epoch": 65.89, + "learning_rate": 0.00019782456140350876, + "loss": 0.0378, + "step": 4415 + }, + { + "epoch": 65.91, + "learning_rate": 0.0001977894736842105, + "loss": 0.0455, + "step": 4416 + }, + { + "epoch": 65.92, + "learning_rate": 0.00019775438596491228, + "loss": 0.0532, + "step": 4417 + }, + { + "epoch": 65.94, + "learning_rate": 0.00019771929824561403, + "loss": 0.1445, + "step": 4418 + }, + { + "epoch": 65.95, + "learning_rate": 0.00019768421052631575, + "loss": 0.035, + "step": 4419 + }, + { + "epoch": 65.97, + "learning_rate": 0.0001976491228070175, + "loss": 0.4898, + "step": 4420 + }, + { + "epoch": 65.98, + "learning_rate": 0.00019761403508771928, + "loss": 0.0462, + "step": 4421 + }, + { + "epoch": 66.0, + "learning_rate": 0.00019757894736842103, + "loss": 0.018, + "step": 4422 + }, + { + "epoch": 66.01, + "learning_rate": 0.00019754385964912278, + "loss": 0.021, + "step": 4423 + }, + { + "epoch": 66.03, + "learning_rate": 0.00019750877192982453, + "loss": 0.1062, + "step": 4424 + }, + { + "epoch": 66.04, + "learning_rate": 0.0001974736842105263, + "loss": 0.2795, + "step": 4425 + }, + { + "epoch": 66.06, + "learning_rate": 0.00019743859649122805, + "loss": 0.0288, + "step": 4426 + }, + { + "epoch": 66.07, + "learning_rate": 0.0001974035087719298, + "loss": 0.0331, + "step": 4427 + }, + { + "epoch": 66.09, + "learning_rate": 0.00019736842105263157, + "loss": 0.0093, + "step": 4428 + }, + { + "epoch": 66.1, + "learning_rate": 0.00019733333333333332, + "loss": 0.0161, + "step": 4429 + }, + { + "epoch": 66.12, + "learning_rate": 0.00019729824561403507, + "loss": 0.0112, + "step": 4430 + }, + { + "epoch": 66.13, + "learning_rate": 0.00019726315789473682, + "loss": 0.0462, + "step": 4431 + }, + { + "epoch": 66.15, + "learning_rate": 0.0001972280701754386, + "loss": 0.0103, + "step": 4432 + }, + { + "epoch": 66.16, + "learning_rate": 0.00019719298245614035, + "loss": 0.0626, + "step": 4433 + }, + { + "epoch": 66.18, + "learning_rate": 0.0001971578947368421, + "loss": 0.0874, + "step": 4434 + }, + { + "epoch": 66.19, + "learning_rate": 0.00019712280701754382, + "loss": 0.0121, + "step": 4435 + }, + { + "epoch": 66.21, + "learning_rate": 0.00019708771929824562, + "loss": 0.2007, + "step": 4436 + }, + { + "epoch": 66.22, + "learning_rate": 0.00019705263157894734, + "loss": 0.0053, + "step": 4437 + }, + { + "epoch": 66.24, + "learning_rate": 0.0001970175438596491, + "loss": 0.0047, + "step": 4438 + }, + { + "epoch": 66.25, + "learning_rate": 0.00019698245614035087, + "loss": 0.0076, + "step": 4439 + }, + { + "epoch": 66.27, + "learning_rate": 0.00019694736842105261, + "loss": 0.1006, + "step": 4440 + }, + { + "epoch": 66.28, + "learning_rate": 0.00019691228070175436, + "loss": 0.0038, + "step": 4441 + }, + { + "epoch": 66.3, + "learning_rate": 0.0001968771929824561, + "loss": 0.0139, + "step": 4442 + }, + { + "epoch": 66.31, + "learning_rate": 0.0001968421052631579, + "loss": 0.0153, + "step": 4443 + }, + { + "epoch": 66.33, + "learning_rate": 0.00019680701754385964, + "loss": 0.087, + "step": 4444 + }, + { + "epoch": 66.34, + "learning_rate": 0.00019677192982456139, + "loss": 0.1041, + "step": 4445 + }, + { + "epoch": 66.36, + "learning_rate": 0.00019673684210526313, + "loss": 0.0709, + "step": 4446 + }, + { + "epoch": 66.37, + "learning_rate": 0.0001967017543859649, + "loss": 0.0218, + "step": 4447 + }, + { + "epoch": 66.39, + "learning_rate": 0.00019666666666666666, + "loss": 0.3569, + "step": 4448 + }, + { + "epoch": 66.4, + "learning_rate": 0.0001966315789473684, + "loss": 0.0036, + "step": 4449 + }, + { + "epoch": 66.42, + "learning_rate": 0.00019659649122807018, + "loss": 0.0239, + "step": 4450 + }, + { + "epoch": 66.43, + "learning_rate": 0.00019656140350877193, + "loss": 0.1154, + "step": 4451 + }, + { + "epoch": 66.45, + "learning_rate": 0.00019652631578947365, + "loss": 0.0184, + "step": 4452 + }, + { + "epoch": 66.46, + "learning_rate": 0.0001964912280701754, + "loss": 0.1561, + "step": 4453 + }, + { + "epoch": 66.48, + "learning_rate": 0.00019645614035087718, + "loss": 0.0059, + "step": 4454 + }, + { + "epoch": 66.49, + "learning_rate": 0.00019642105263157893, + "loss": 0.0033, + "step": 4455 + }, + { + "epoch": 66.51, + "learning_rate": 0.00019638596491228068, + "loss": 0.0037, + "step": 4456 + }, + { + "epoch": 66.52, + "learning_rate": 0.00019635087719298242, + "loss": 0.4539, + "step": 4457 + }, + { + "epoch": 66.54, + "learning_rate": 0.0001963157894736842, + "loss": 0.0782, + "step": 4458 + }, + { + "epoch": 66.55, + "learning_rate": 0.00019628070175438595, + "loss": 0.0233, + "step": 4459 + }, + { + "epoch": 66.57, + "learning_rate": 0.0001962456140350877, + "loss": 0.0077, + "step": 4460 + }, + { + "epoch": 66.58, + "learning_rate": 0.00019621052631578945, + "loss": 0.0778, + "step": 4461 + }, + { + "epoch": 66.59, + "learning_rate": 0.00019617543859649122, + "loss": 0.0108, + "step": 4462 + }, + { + "epoch": 66.61, + "learning_rate": 0.00019614035087719297, + "loss": 0.0633, + "step": 4463 + }, + { + "epoch": 66.62, + "learning_rate": 0.00019610526315789472, + "loss": 0.0154, + "step": 4464 + }, + { + "epoch": 66.64, + "learning_rate": 0.0001960701754385965, + "loss": 0.2093, + "step": 4465 + }, + { + "epoch": 66.65, + "learning_rate": 0.00019603508771929824, + "loss": 0.0022, + "step": 4466 + }, + { + "epoch": 66.67, + "learning_rate": 0.00019599999999999997, + "loss": 0.0282, + "step": 4467 + }, + { + "epoch": 66.68, + "learning_rate": 0.00019596491228070172, + "loss": 0.1057, + "step": 4468 + }, + { + "epoch": 66.7, + "learning_rate": 0.0001959298245614035, + "loss": 0.2186, + "step": 4469 + }, + { + "epoch": 66.71, + "learning_rate": 0.00019589473684210524, + "loss": 0.0053, + "step": 4470 + }, + { + "epoch": 66.73, + "learning_rate": 0.000195859649122807, + "loss": 0.002, + "step": 4471 + }, + { + "epoch": 66.74, + "learning_rate": 0.00019582456140350874, + "loss": 0.0124, + "step": 4472 + }, + { + "epoch": 66.76, + "learning_rate": 0.0001957894736842105, + "loss": 0.0051, + "step": 4473 + }, + { + "epoch": 66.77, + "learning_rate": 0.00019575438596491226, + "loss": 0.0029, + "step": 4474 + }, + { + "epoch": 66.79, + "learning_rate": 0.000195719298245614, + "loss": 0.1849, + "step": 4475 + }, + { + "epoch": 66.8, + "learning_rate": 0.0001956842105263158, + "loss": 0.0463, + "step": 4476 + }, + { + "epoch": 66.82, + "learning_rate": 0.00019564912280701754, + "loss": 0.0748, + "step": 4477 + }, + { + "epoch": 66.83, + "learning_rate": 0.00019561403508771928, + "loss": 0.1394, + "step": 4478 + }, + { + "epoch": 66.85, + "learning_rate": 0.00019557894736842103, + "loss": 0.0029, + "step": 4479 + }, + { + "epoch": 66.86, + "learning_rate": 0.0001955438596491228, + "loss": 0.0054, + "step": 4480 + }, + { + "epoch": 66.88, + "learning_rate": 0.00019550877192982456, + "loss": 0.1783, + "step": 4481 + }, + { + "epoch": 66.89, + "learning_rate": 0.0001954736842105263, + "loss": 0.1708, + "step": 4482 + }, + { + "epoch": 66.91, + "learning_rate": 0.00019543859649122803, + "loss": 0.0019, + "step": 4483 + }, + { + "epoch": 66.92, + "learning_rate": 0.0001954035087719298, + "loss": 0.0016, + "step": 4484 + }, + { + "epoch": 66.94, + "learning_rate": 0.00019536842105263155, + "loss": 0.0361, + "step": 4485 + }, + { + "epoch": 66.95, + "learning_rate": 0.0001953333333333333, + "loss": 0.0388, + "step": 4486 + }, + { + "epoch": 66.97, + "learning_rate": 0.00019529824561403508, + "loss": 0.1492, + "step": 4487 + }, + { + "epoch": 66.98, + "learning_rate": 0.00019526315789473683, + "loss": 0.246, + "step": 4488 + }, + { + "epoch": 67.0, + "learning_rate": 0.00019522807017543858, + "loss": 0.0024, + "step": 4489 + }, + { + "epoch": 67.01, + "learning_rate": 0.00019519298245614032, + "loss": 0.0028, + "step": 4490 + }, + { + "epoch": 67.03, + "learning_rate": 0.0001951578947368421, + "loss": 0.009, + "step": 4491 + }, + { + "epoch": 67.04, + "learning_rate": 0.00019512280701754385, + "loss": 0.0023, + "step": 4492 + }, + { + "epoch": 67.06, + "learning_rate": 0.0001950877192982456, + "loss": 0.0056, + "step": 4493 + }, + { + "epoch": 67.07, + "learning_rate": 0.00019505263157894735, + "loss": 0.0018, + "step": 4494 + }, + { + "epoch": 67.09, + "learning_rate": 0.00019501754385964912, + "loss": 0.0128, + "step": 4495 + }, + { + "epoch": 67.1, + "learning_rate": 0.00019498245614035087, + "loss": 0.0055, + "step": 4496 + }, + { + "epoch": 67.12, + "learning_rate": 0.00019494736842105262, + "loss": 0.0201, + "step": 4497 + }, + { + "epoch": 67.13, + "learning_rate": 0.00019491228070175434, + "loss": 0.0064, + "step": 4498 + }, + { + "epoch": 67.15, + "learning_rate": 0.00019487719298245614, + "loss": 0.0973, + "step": 4499 + }, + { + "epoch": 67.16, + "learning_rate": 0.00019484210526315787, + "loss": 0.0027, + "step": 4500 + }, + { + "epoch": 67.18, + "learning_rate": 0.00019480701754385961, + "loss": 0.1812, + "step": 4501 + }, + { + "epoch": 67.19, + "learning_rate": 0.0001947719298245614, + "loss": 0.0089, + "step": 4502 + }, + { + "epoch": 67.21, + "learning_rate": 0.00019473684210526314, + "loss": 0.0528, + "step": 4503 + }, + { + "epoch": 67.22, + "learning_rate": 0.0001947017543859649, + "loss": 0.002, + "step": 4504 + }, + { + "epoch": 67.24, + "learning_rate": 0.00019466666666666664, + "loss": 0.0464, + "step": 4505 + }, + { + "epoch": 67.25, + "learning_rate": 0.0001946315789473684, + "loss": 0.0053, + "step": 4506 + }, + { + "epoch": 67.27, + "learning_rate": 0.00019459649122807016, + "loss": 0.0055, + "step": 4507 + }, + { + "epoch": 67.28, + "learning_rate": 0.0001945614035087719, + "loss": 0.0269, + "step": 4508 + }, + { + "epoch": 67.3, + "learning_rate": 0.00019452631578947366, + "loss": 0.0141, + "step": 4509 + }, + { + "epoch": 67.31, + "learning_rate": 0.00019449122807017544, + "loss": 0.0025, + "step": 4510 + }, + { + "epoch": 67.33, + "learning_rate": 0.00019445614035087718, + "loss": 0.192, + "step": 4511 + }, + { + "epoch": 67.34, + "learning_rate": 0.00019442105263157893, + "loss": 0.1234, + "step": 4512 + }, + { + "epoch": 67.36, + "learning_rate": 0.0001943859649122807, + "loss": 0.0036, + "step": 4513 + }, + { + "epoch": 67.37, + "learning_rate": 0.00019435087719298246, + "loss": 0.1498, + "step": 4514 + }, + { + "epoch": 67.39, + "learning_rate": 0.00019431578947368418, + "loss": 0.004, + "step": 4515 + }, + { + "epoch": 67.4, + "learning_rate": 0.00019428070175438593, + "loss": 0.0039, + "step": 4516 + }, + { + "epoch": 67.42, + "learning_rate": 0.0001942456140350877, + "loss": 0.1355, + "step": 4517 + }, + { + "epoch": 67.43, + "learning_rate": 0.00019421052631578945, + "loss": 0.0039, + "step": 4518 + }, + { + "epoch": 67.45, + "learning_rate": 0.0001941754385964912, + "loss": 0.0032, + "step": 4519 + }, + { + "epoch": 67.46, + "learning_rate": 0.00019414035087719295, + "loss": 0.0042, + "step": 4520 + }, + { + "epoch": 67.48, + "learning_rate": 0.00019410526315789473, + "loss": 0.0115, + "step": 4521 + }, + { + "epoch": 67.49, + "learning_rate": 0.00019407017543859647, + "loss": 0.2845, + "step": 4522 + }, + { + "epoch": 67.51, + "learning_rate": 0.00019403508771929822, + "loss": 0.0287, + "step": 4523 + }, + { + "epoch": 67.52, + "learning_rate": 0.00019399999999999997, + "loss": 0.0677, + "step": 4524 + }, + { + "epoch": 67.54, + "learning_rate": 0.00019396491228070175, + "loss": 0.1119, + "step": 4525 + }, + { + "epoch": 67.55, + "learning_rate": 0.0001939298245614035, + "loss": 0.0017, + "step": 4526 + }, + { + "epoch": 67.57, + "learning_rate": 0.00019389473684210525, + "loss": 0.5221, + "step": 4527 + }, + { + "epoch": 67.58, + "learning_rate": 0.00019385964912280702, + "loss": 0.2991, + "step": 4528 + }, + { + "epoch": 67.59, + "learning_rate": 0.00019382456140350877, + "loss": 0.0145, + "step": 4529 + }, + { + "epoch": 67.61, + "learning_rate": 0.00019378947368421052, + "loss": 0.0054, + "step": 4530 + }, + { + "epoch": 67.62, + "learning_rate": 0.00019375438596491224, + "loss": 0.0019, + "step": 4531 + }, + { + "epoch": 67.64, + "learning_rate": 0.00019371929824561402, + "loss": 0.002, + "step": 4532 + }, + { + "epoch": 67.65, + "learning_rate": 0.00019368421052631577, + "loss": 0.1062, + "step": 4533 + }, + { + "epoch": 67.67, + "learning_rate": 0.00019364912280701751, + "loss": 0.0275, + "step": 4534 + }, + { + "epoch": 67.68, + "learning_rate": 0.00019361403508771926, + "loss": 0.0017, + "step": 4535 + }, + { + "epoch": 67.7, + "learning_rate": 0.00019357894736842104, + "loss": 0.2389, + "step": 4536 + }, + { + "epoch": 67.71, + "learning_rate": 0.0001935438596491228, + "loss": 0.0711, + "step": 4537 + }, + { + "epoch": 67.73, + "learning_rate": 0.00019350877192982454, + "loss": 0.0034, + "step": 4538 + }, + { + "epoch": 67.74, + "learning_rate": 0.0001934736842105263, + "loss": 0.0016, + "step": 4539 + }, + { + "epoch": 67.76, + "learning_rate": 0.00019343859649122806, + "loss": 0.0042, + "step": 4540 + }, + { + "epoch": 67.77, + "learning_rate": 0.0001934035087719298, + "loss": 0.0104, + "step": 4541 + }, + { + "epoch": 67.79, + "learning_rate": 0.00019336842105263156, + "loss": 0.0176, + "step": 4542 + }, + { + "epoch": 67.8, + "learning_rate": 0.00019333333333333333, + "loss": 0.0078, + "step": 4543 + }, + { + "epoch": 67.82, + "learning_rate": 0.00019329824561403508, + "loss": 0.0097, + "step": 4544 + }, + { + "epoch": 67.83, + "learning_rate": 0.00019326315789473683, + "loss": 0.0031, + "step": 4545 + }, + { + "epoch": 67.85, + "learning_rate": 0.00019322807017543855, + "loss": 0.0199, + "step": 4546 + }, + { + "epoch": 67.86, + "learning_rate": 0.00019319298245614036, + "loss": 0.0096, + "step": 4547 + }, + { + "epoch": 67.88, + "learning_rate": 0.00019315789473684208, + "loss": 0.0373, + "step": 4548 + }, + { + "epoch": 67.89, + "learning_rate": 0.00019312280701754383, + "loss": 0.0832, + "step": 4549 + }, + { + "epoch": 67.91, + "learning_rate": 0.0001930877192982456, + "loss": 0.0047, + "step": 4550 + }, + { + "epoch": 67.92, + "learning_rate": 0.00019305263157894735, + "loss": 0.003, + "step": 4551 + }, + { + "epoch": 67.94, + "learning_rate": 0.0001930175438596491, + "loss": 0.0063, + "step": 4552 + }, + { + "epoch": 67.95, + "learning_rate": 0.00019298245614035085, + "loss": 0.0092, + "step": 4553 + }, + { + "epoch": 67.97, + "learning_rate": 0.00019294736842105263, + "loss": 0.0266, + "step": 4554 + }, + { + "epoch": 67.98, + "learning_rate": 0.00019291228070175437, + "loss": 0.0016, + "step": 4555 + }, + { + "epoch": 68.0, + "learning_rate": 0.00019287719298245612, + "loss": 0.0127, + "step": 4556 + }, + { + "epoch": 68.01, + "learning_rate": 0.00019284210526315787, + "loss": 0.0633, + "step": 4557 + }, + { + "epoch": 68.03, + "learning_rate": 0.00019280701754385965, + "loss": 0.0054, + "step": 4558 + }, + { + "epoch": 68.04, + "learning_rate": 0.0001927719298245614, + "loss": 0.003, + "step": 4559 + }, + { + "epoch": 68.06, + "learning_rate": 0.00019273684210526315, + "loss": 0.1727, + "step": 4560 + }, + { + "epoch": 68.07, + "learning_rate": 0.00019270175438596487, + "loss": 0.0038, + "step": 4561 + }, + { + "epoch": 68.09, + "learning_rate": 0.00019266666666666667, + "loss": 0.0344, + "step": 4562 + }, + { + "epoch": 68.1, + "learning_rate": 0.0001926315789473684, + "loss": 0.0019, + "step": 4563 + }, + { + "epoch": 68.12, + "learning_rate": 0.00019259649122807014, + "loss": 0.0056, + "step": 4564 + }, + { + "epoch": 68.13, + "learning_rate": 0.00019256140350877192, + "loss": 0.0031, + "step": 4565 + }, + { + "epoch": 68.15, + "learning_rate": 0.00019252631578947366, + "loss": 0.002, + "step": 4566 + }, + { + "epoch": 68.16, + "learning_rate": 0.00019249122807017541, + "loss": 0.01, + "step": 4567 + }, + { + "epoch": 68.18, + "learning_rate": 0.00019245614035087716, + "loss": 0.0028, + "step": 4568 + }, + { + "epoch": 68.19, + "learning_rate": 0.00019242105263157894, + "loss": 0.1223, + "step": 4569 + }, + { + "epoch": 68.21, + "learning_rate": 0.0001923859649122807, + "loss": 0.0052, + "step": 4570 + }, + { + "epoch": 68.22, + "learning_rate": 0.00019235087719298244, + "loss": 0.3806, + "step": 4571 + }, + { + "epoch": 68.24, + "learning_rate": 0.00019231578947368418, + "loss": 0.0019, + "step": 4572 + }, + { + "epoch": 68.25, + "learning_rate": 0.00019228070175438596, + "loss": 0.0223, + "step": 4573 + }, + { + "epoch": 68.27, + "learning_rate": 0.0001922456140350877, + "loss": 0.0017, + "step": 4574 + }, + { + "epoch": 68.28, + "learning_rate": 0.00019221052631578946, + "loss": 0.1442, + "step": 4575 + }, + { + "epoch": 68.3, + "learning_rate": 0.00019217543859649123, + "loss": 0.0333, + "step": 4576 + }, + { + "epoch": 68.31, + "learning_rate": 0.00019214035087719298, + "loss": 0.1573, + "step": 4577 + }, + { + "epoch": 68.33, + "learning_rate": 0.0001921052631578947, + "loss": 0.1816, + "step": 4578 + }, + { + "epoch": 68.34, + "learning_rate": 0.00019207017543859645, + "loss": 0.0019, + "step": 4579 + }, + { + "epoch": 68.36, + "learning_rate": 0.00019203508771929823, + "loss": 0.0022, + "step": 4580 + }, + { + "epoch": 68.37, + "learning_rate": 0.00019199999999999998, + "loss": 0.0021, + "step": 4581 + }, + { + "epoch": 68.39, + "learning_rate": 0.00019196491228070173, + "loss": 0.0018, + "step": 4582 + }, + { + "epoch": 68.4, + "learning_rate": 0.00019192982456140348, + "loss": 0.0197, + "step": 4583 + }, + { + "epoch": 68.42, + "learning_rate": 0.00019189473684210525, + "loss": 0.1204, + "step": 4584 + }, + { + "epoch": 68.43, + "learning_rate": 0.000191859649122807, + "loss": 0.0027, + "step": 4585 + }, + { + "epoch": 68.45, + "learning_rate": 0.00019182456140350875, + "loss": 0.194, + "step": 4586 + }, + { + "epoch": 68.46, + "learning_rate": 0.0001917894736842105, + "loss": 0.0022, + "step": 4587 + }, + { + "epoch": 68.48, + "learning_rate": 0.00019175438596491227, + "loss": 0.003, + "step": 4588 + }, + { + "epoch": 68.49, + "learning_rate": 0.00019171929824561402, + "loss": 0.1746, + "step": 4589 + }, + { + "epoch": 68.51, + "learning_rate": 0.00019168421052631577, + "loss": 0.2832, + "step": 4590 + }, + { + "epoch": 68.52, + "learning_rate": 0.00019164912280701755, + "loss": 0.0399, + "step": 4591 + }, + { + "epoch": 68.54, + "learning_rate": 0.0001916140350877193, + "loss": 0.0209, + "step": 4592 + }, + { + "epoch": 68.55, + "learning_rate": 0.00019157894736842104, + "loss": 0.0055, + "step": 4593 + }, + { + "epoch": 68.57, + "learning_rate": 0.00019154385964912277, + "loss": 0.0084, + "step": 4594 + }, + { + "epoch": 68.58, + "learning_rate": 0.00019150877192982457, + "loss": 0.0051, + "step": 4595 + }, + { + "epoch": 68.59, + "learning_rate": 0.0001914736842105263, + "loss": 0.1722, + "step": 4596 + }, + { + "epoch": 68.61, + "learning_rate": 0.00019143859649122804, + "loss": 0.0606, + "step": 4597 + }, + { + "epoch": 68.62, + "learning_rate": 0.0001914035087719298, + "loss": 0.0611, + "step": 4598 + }, + { + "epoch": 68.64, + "learning_rate": 0.00019136842105263156, + "loss": 0.0172, + "step": 4599 + }, + { + "epoch": 68.65, + "learning_rate": 0.0001913333333333333, + "loss": 0.0032, + "step": 4600 + }, + { + "epoch": 68.65, + "eval_accuracy": 0.846059716103769, + "eval_f1": 0.8482206471494375, + "eval_loss": 0.689896821975708, + "eval_runtime": 344.0746, + "eval_samples_per_second": 11.875, + "eval_steps_per_second": 0.744, + "step": 4600 + }, + { + "epoch": 68.67, + "learning_rate": 0.00019129824561403506, + "loss": 0.0392, + "step": 4601 + }, + { + "epoch": 68.68, + "learning_rate": 0.00019126315789473684, + "loss": 0.0019, + "step": 4602 + }, + { + "epoch": 68.7, + "learning_rate": 0.00019122807017543859, + "loss": 0.0025, + "step": 4603 + }, + { + "epoch": 68.71, + "learning_rate": 0.00019119298245614034, + "loss": 0.0398, + "step": 4604 + }, + { + "epoch": 68.73, + "learning_rate": 0.00019115789473684208, + "loss": 0.1833, + "step": 4605 + }, + { + "epoch": 68.74, + "learning_rate": 0.00019112280701754386, + "loss": 0.0022, + "step": 4606 + }, + { + "epoch": 68.76, + "learning_rate": 0.0001910877192982456, + "loss": 0.0574, + "step": 4607 + }, + { + "epoch": 68.77, + "learning_rate": 0.00019105263157894736, + "loss": 0.0379, + "step": 4608 + }, + { + "epoch": 68.79, + "learning_rate": 0.00019101754385964908, + "loss": 0.0053, + "step": 4609 + }, + { + "epoch": 68.8, + "learning_rate": 0.00019098245614035088, + "loss": 0.0207, + "step": 4610 + }, + { + "epoch": 68.82, + "learning_rate": 0.0001909473684210526, + "loss": 0.0028, + "step": 4611 + }, + { + "epoch": 68.83, + "learning_rate": 0.00019091228070175435, + "loss": 0.0186, + "step": 4612 + }, + { + "epoch": 68.85, + "learning_rate": 0.00019087719298245613, + "loss": 0.1757, + "step": 4613 + }, + { + "epoch": 68.86, + "learning_rate": 0.00019084210526315788, + "loss": 0.0196, + "step": 4614 + }, + { + "epoch": 68.88, + "learning_rate": 0.00019080701754385963, + "loss": 0.1241, + "step": 4615 + }, + { + "epoch": 68.89, + "learning_rate": 0.00019077192982456137, + "loss": 0.0025, + "step": 4616 + }, + { + "epoch": 68.91, + "learning_rate": 0.00019073684210526315, + "loss": 0.2688, + "step": 4617 + }, + { + "epoch": 68.92, + "learning_rate": 0.0001907017543859649, + "loss": 0.0955, + "step": 4618 + }, + { + "epoch": 68.94, + "learning_rate": 0.00019066666666666665, + "loss": 0.1777, + "step": 4619 + }, + { + "epoch": 68.95, + "learning_rate": 0.0001906315789473684, + "loss": 0.0022, + "step": 4620 + }, + { + "epoch": 68.97, + "learning_rate": 0.00019059649122807017, + "loss": 0.0059, + "step": 4621 + }, + { + "epoch": 68.98, + "learning_rate": 0.00019056140350877192, + "loss": 0.0378, + "step": 4622 + }, + { + "epoch": 69.0, + "learning_rate": 0.00019052631578947367, + "loss": 0.1003, + "step": 4623 + }, + { + "epoch": 69.01, + "learning_rate": 0.00019049122807017542, + "loss": 0.2193, + "step": 4624 + }, + { + "epoch": 69.03, + "learning_rate": 0.0001904561403508772, + "loss": 0.211, + "step": 4625 + }, + { + "epoch": 69.04, + "learning_rate": 0.00019042105263157892, + "loss": 0.1146, + "step": 4626 + }, + { + "epoch": 69.06, + "learning_rate": 0.00019038596491228067, + "loss": 0.0106, + "step": 4627 + }, + { + "epoch": 69.07, + "learning_rate": 0.00019035087719298244, + "loss": 0.2057, + "step": 4628 + }, + { + "epoch": 69.09, + "learning_rate": 0.0001903157894736842, + "loss": 0.011, + "step": 4629 + }, + { + "epoch": 69.1, + "learning_rate": 0.00019028070175438594, + "loss": 0.1871, + "step": 4630 + }, + { + "epoch": 69.12, + "learning_rate": 0.0001902456140350877, + "loss": 0.2694, + "step": 4631 + }, + { + "epoch": 69.13, + "learning_rate": 0.00019021052631578946, + "loss": 0.0786, + "step": 4632 + }, + { + "epoch": 69.15, + "learning_rate": 0.0001901754385964912, + "loss": 0.0609, + "step": 4633 + }, + { + "epoch": 69.16, + "learning_rate": 0.00019014035087719296, + "loss": 0.1266, + "step": 4634 + }, + { + "epoch": 69.18, + "learning_rate": 0.0001901052631578947, + "loss": 0.0047, + "step": 4635 + }, + { + "epoch": 69.19, + "learning_rate": 0.00019007017543859649, + "loss": 0.0186, + "step": 4636 + }, + { + "epoch": 69.21, + "learning_rate": 0.00019003508771929823, + "loss": 0.1044, + "step": 4637 + }, + { + "epoch": 69.22, + "learning_rate": 0.00018999999999999998, + "loss": 0.0125, + "step": 4638 + }, + { + "epoch": 69.24, + "learning_rate": 0.00018996491228070176, + "loss": 0.0122, + "step": 4639 + }, + { + "epoch": 69.25, + "learning_rate": 0.0001899298245614035, + "loss": 0.0034, + "step": 4640 + }, + { + "epoch": 69.27, + "learning_rate": 0.00018989473684210526, + "loss": 0.2012, + "step": 4641 + }, + { + "epoch": 69.28, + "learning_rate": 0.00018985964912280698, + "loss": 0.1219, + "step": 4642 + }, + { + "epoch": 69.3, + "learning_rate": 0.00018982456140350878, + "loss": 0.128, + "step": 4643 + }, + { + "epoch": 69.31, + "learning_rate": 0.0001897894736842105, + "loss": 0.2125, + "step": 4644 + }, + { + "epoch": 69.33, + "learning_rate": 0.00018975438596491225, + "loss": 0.0541, + "step": 4645 + }, + { + "epoch": 69.34, + "learning_rate": 0.000189719298245614, + "loss": 0.0034, + "step": 4646 + }, + { + "epoch": 69.36, + "learning_rate": 0.00018968421052631578, + "loss": 0.1952, + "step": 4647 + }, + { + "epoch": 69.37, + "learning_rate": 0.00018964912280701753, + "loss": 0.1402, + "step": 4648 + }, + { + "epoch": 69.39, + "learning_rate": 0.00018961403508771927, + "loss": 0.0026, + "step": 4649 + }, + { + "epoch": 69.4, + "learning_rate": 0.00018957894736842105, + "loss": 0.0261, + "step": 4650 + }, + { + "epoch": 69.42, + "learning_rate": 0.0001895438596491228, + "loss": 0.0645, + "step": 4651 + }, + { + "epoch": 69.43, + "learning_rate": 0.00018950877192982455, + "loss": 0.0089, + "step": 4652 + }, + { + "epoch": 69.45, + "learning_rate": 0.0001894736842105263, + "loss": 0.0456, + "step": 4653 + }, + { + "epoch": 69.46, + "learning_rate": 0.00018943859649122807, + "loss": 0.0451, + "step": 4654 + }, + { + "epoch": 69.48, + "learning_rate": 0.00018940350877192982, + "loss": 0.1816, + "step": 4655 + }, + { + "epoch": 69.49, + "learning_rate": 0.00018936842105263157, + "loss": 0.0079, + "step": 4656 + }, + { + "epoch": 69.51, + "learning_rate": 0.0001893333333333333, + "loss": 0.0054, + "step": 4657 + }, + { + "epoch": 69.52, + "learning_rate": 0.0001892982456140351, + "loss": 0.0098, + "step": 4658 + }, + { + "epoch": 69.54, + "learning_rate": 0.00018926315789473682, + "loss": 0.1181, + "step": 4659 + }, + { + "epoch": 69.55, + "learning_rate": 0.00018922807017543856, + "loss": 0.0056, + "step": 4660 + }, + { + "epoch": 69.57, + "learning_rate": 0.00018919298245614031, + "loss": 0.0632, + "step": 4661 + }, + { + "epoch": 69.58, + "learning_rate": 0.0001891578947368421, + "loss": 0.0759, + "step": 4662 + }, + { + "epoch": 69.59, + "learning_rate": 0.00018912280701754384, + "loss": 0.0149, + "step": 4663 + }, + { + "epoch": 69.61, + "learning_rate": 0.0001890877192982456, + "loss": 0.0175, + "step": 4664 + }, + { + "epoch": 69.62, + "learning_rate": 0.00018905263157894736, + "loss": 0.0583, + "step": 4665 + }, + { + "epoch": 69.64, + "learning_rate": 0.0001890175438596491, + "loss": 0.0057, + "step": 4666 + }, + { + "epoch": 69.65, + "learning_rate": 0.00018898245614035086, + "loss": 0.023, + "step": 4667 + }, + { + "epoch": 69.67, + "learning_rate": 0.0001889473684210526, + "loss": 0.0054, + "step": 4668 + }, + { + "epoch": 69.68, + "learning_rate": 0.00018891228070175439, + "loss": 0.0018, + "step": 4669 + }, + { + "epoch": 69.7, + "learning_rate": 0.00018887719298245613, + "loss": 0.0502, + "step": 4670 + }, + { + "epoch": 69.71, + "learning_rate": 0.00018884210526315788, + "loss": 0.0018, + "step": 4671 + }, + { + "epoch": 69.73, + "learning_rate": 0.0001888070175438596, + "loss": 0.005, + "step": 4672 + }, + { + "epoch": 69.74, + "learning_rate": 0.0001887719298245614, + "loss": 0.3566, + "step": 4673 + }, + { + "epoch": 69.76, + "learning_rate": 0.00018873684210526313, + "loss": 0.0016, + "step": 4674 + }, + { + "epoch": 69.77, + "learning_rate": 0.00018870175438596488, + "loss": 0.1829, + "step": 4675 + }, + { + "epoch": 69.79, + "learning_rate": 0.00018866666666666665, + "loss": 0.0085, + "step": 4676 + }, + { + "epoch": 69.8, + "learning_rate": 0.0001886315789473684, + "loss": 0.0765, + "step": 4677 + }, + { + "epoch": 69.82, + "learning_rate": 0.00018859649122807015, + "loss": 0.0022, + "step": 4678 + }, + { + "epoch": 69.83, + "learning_rate": 0.0001885614035087719, + "loss": 0.0162, + "step": 4679 + }, + { + "epoch": 69.85, + "learning_rate": 0.00018852631578947368, + "loss": 0.2237, + "step": 4680 + }, + { + "epoch": 69.86, + "learning_rate": 0.00018849122807017542, + "loss": 0.2743, + "step": 4681 + }, + { + "epoch": 69.88, + "learning_rate": 0.00018845614035087717, + "loss": 0.0056, + "step": 4682 + }, + { + "epoch": 69.89, + "learning_rate": 0.00018842105263157892, + "loss": 0.0636, + "step": 4683 + }, + { + "epoch": 69.91, + "learning_rate": 0.0001883859649122807, + "loss": 0.0069, + "step": 4684 + }, + { + "epoch": 69.92, + "learning_rate": 0.00018835087719298245, + "loss": 0.1107, + "step": 4685 + }, + { + "epoch": 69.94, + "learning_rate": 0.0001883157894736842, + "loss": 0.0033, + "step": 4686 + }, + { + "epoch": 69.95, + "learning_rate": 0.00018828070175438594, + "loss": 0.0026, + "step": 4687 + }, + { + "epoch": 69.97, + "learning_rate": 0.00018824561403508772, + "loss": 0.188, + "step": 4688 + }, + { + "epoch": 69.98, + "learning_rate": 0.00018821052631578947, + "loss": 0.1188, + "step": 4689 + }, + { + "epoch": 70.0, + "learning_rate": 0.0001881754385964912, + "loss": 0.0255, + "step": 4690 + }, + { + "epoch": 70.01, + "learning_rate": 0.00018814035087719297, + "loss": 0.0019, + "step": 4691 + }, + { + "epoch": 70.03, + "learning_rate": 0.00018810526315789472, + "loss": 0.2015, + "step": 4692 + }, + { + "epoch": 70.04, + "learning_rate": 0.00018807017543859646, + "loss": 0.0018, + "step": 4693 + }, + { + "epoch": 70.06, + "learning_rate": 0.0001880350877192982, + "loss": 0.0096, + "step": 4694 + }, + { + "epoch": 70.07, + "learning_rate": 0.000188, + "loss": 0.0097, + "step": 4695 + }, + { + "epoch": 70.09, + "learning_rate": 0.00018796491228070174, + "loss": 0.1639, + "step": 4696 + }, + { + "epoch": 70.1, + "learning_rate": 0.0001879298245614035, + "loss": 0.0949, + "step": 4697 + }, + { + "epoch": 70.12, + "learning_rate": 0.00018789473684210524, + "loss": 0.0125, + "step": 4698 + }, + { + "epoch": 70.13, + "learning_rate": 0.000187859649122807, + "loss": 0.0019, + "step": 4699 + }, + { + "epoch": 70.15, + "learning_rate": 0.00018782456140350876, + "loss": 0.0016, + "step": 4700 + }, + { + "epoch": 70.16, + "learning_rate": 0.0001877894736842105, + "loss": 0.0095, + "step": 4701 + }, + { + "epoch": 70.18, + "learning_rate": 0.00018775438596491228, + "loss": 0.0103, + "step": 4702 + }, + { + "epoch": 70.19, + "learning_rate": 0.00018771929824561403, + "loss": 0.0036, + "step": 4703 + }, + { + "epoch": 70.21, + "learning_rate": 0.00018768421052631578, + "loss": 0.0878, + "step": 4704 + }, + { + "epoch": 70.22, + "learning_rate": 0.0001876491228070175, + "loss": 0.1278, + "step": 4705 + }, + { + "epoch": 70.24, + "learning_rate": 0.0001876140350877193, + "loss": 0.0028, + "step": 4706 + }, + { + "epoch": 70.25, + "learning_rate": 0.00018757894736842103, + "loss": 0.0033, + "step": 4707 + }, + { + "epoch": 70.27, + "learning_rate": 0.00018754385964912278, + "loss": 0.0084, + "step": 4708 + }, + { + "epoch": 70.28, + "learning_rate": 0.00018750877192982453, + "loss": 0.002, + "step": 4709 + }, + { + "epoch": 70.3, + "learning_rate": 0.0001874736842105263, + "loss": 0.2291, + "step": 4710 + }, + { + "epoch": 70.31, + "learning_rate": 0.00018743859649122805, + "loss": 0.005, + "step": 4711 + }, + { + "epoch": 70.33, + "learning_rate": 0.0001874035087719298, + "loss": 0.133, + "step": 4712 + }, + { + "epoch": 70.34, + "learning_rate": 0.00018736842105263158, + "loss": 0.0036, + "step": 4713 + }, + { + "epoch": 70.36, + "learning_rate": 0.00018733333333333332, + "loss": 0.1317, + "step": 4714 + }, + { + "epoch": 70.37, + "learning_rate": 0.00018729824561403507, + "loss": 0.0028, + "step": 4715 + }, + { + "epoch": 70.39, + "learning_rate": 0.00018726315789473682, + "loss": 0.02, + "step": 4716 + }, + { + "epoch": 70.4, + "learning_rate": 0.0001872280701754386, + "loss": 0.0013, + "step": 4717 + }, + { + "epoch": 70.42, + "learning_rate": 0.00018719298245614035, + "loss": 0.0013, + "step": 4718 + }, + { + "epoch": 70.43, + "learning_rate": 0.0001871578947368421, + "loss": 0.0015, + "step": 4719 + }, + { + "epoch": 70.45, + "learning_rate": 0.00018712280701754382, + "loss": 0.0062, + "step": 4720 + }, + { + "epoch": 70.46, + "learning_rate": 0.00018708771929824562, + "loss": 0.3317, + "step": 4721 + }, + { + "epoch": 70.48, + "learning_rate": 0.00018705263157894734, + "loss": 0.0073, + "step": 4722 + }, + { + "epoch": 70.49, + "learning_rate": 0.0001870175438596491, + "loss": 0.0042, + "step": 4723 + }, + { + "epoch": 70.51, + "learning_rate": 0.00018698245614035084, + "loss": 0.0138, + "step": 4724 + }, + { + "epoch": 70.52, + "learning_rate": 0.00018694736842105261, + "loss": 0.1278, + "step": 4725 + }, + { + "epoch": 70.54, + "learning_rate": 0.00018691228070175436, + "loss": 0.0046, + "step": 4726 + }, + { + "epoch": 70.55, + "learning_rate": 0.0001868771929824561, + "loss": 0.003, + "step": 4727 + }, + { + "epoch": 70.57, + "learning_rate": 0.0001868421052631579, + "loss": 0.0073, + "step": 4728 + }, + { + "epoch": 70.58, + "learning_rate": 0.00018680701754385964, + "loss": 0.0081, + "step": 4729 + }, + { + "epoch": 70.59, + "learning_rate": 0.00018677192982456139, + "loss": 0.0057, + "step": 4730 + }, + { + "epoch": 70.61, + "learning_rate": 0.00018673684210526313, + "loss": 0.0133, + "step": 4731 + }, + { + "epoch": 70.62, + "learning_rate": 0.0001867017543859649, + "loss": 0.0017, + "step": 4732 + }, + { + "epoch": 70.64, + "learning_rate": 0.00018666666666666666, + "loss": 0.0448, + "step": 4733 + }, + { + "epoch": 70.65, + "learning_rate": 0.0001866315789473684, + "loss": 0.0026, + "step": 4734 + }, + { + "epoch": 70.67, + "learning_rate": 0.00018659649122807016, + "loss": 0.2009, + "step": 4735 + }, + { + "epoch": 70.68, + "learning_rate": 0.00018656140350877193, + "loss": 0.0104, + "step": 4736 + }, + { + "epoch": 70.7, + "learning_rate": 0.00018652631578947368, + "loss": 0.008, + "step": 4737 + }, + { + "epoch": 70.71, + "learning_rate": 0.0001864912280701754, + "loss": 0.0966, + "step": 4738 + }, + { + "epoch": 70.73, + "learning_rate": 0.00018645614035087718, + "loss": 0.0021, + "step": 4739 + }, + { + "epoch": 70.74, + "learning_rate": 0.00018642105263157893, + "loss": 0.0016, + "step": 4740 + }, + { + "epoch": 70.76, + "learning_rate": 0.00018638596491228068, + "loss": 0.1067, + "step": 4741 + }, + { + "epoch": 70.77, + "learning_rate": 0.00018635087719298243, + "loss": 0.0963, + "step": 4742 + }, + { + "epoch": 70.79, + "learning_rate": 0.0001863157894736842, + "loss": 0.0917, + "step": 4743 + }, + { + "epoch": 70.8, + "learning_rate": 0.00018628070175438595, + "loss": 0.0018, + "step": 4744 + }, + { + "epoch": 70.82, + "learning_rate": 0.0001862456140350877, + "loss": 0.183, + "step": 4745 + }, + { + "epoch": 70.83, + "learning_rate": 0.00018621052631578945, + "loss": 0.0012, + "step": 4746 + }, + { + "epoch": 70.85, + "learning_rate": 0.00018617543859649122, + "loss": 0.0013, + "step": 4747 + }, + { + "epoch": 70.86, + "learning_rate": 0.00018614035087719297, + "loss": 0.002, + "step": 4748 + }, + { + "epoch": 70.88, + "learning_rate": 0.00018610526315789472, + "loss": 0.0035, + "step": 4749 + }, + { + "epoch": 70.89, + "learning_rate": 0.0001860701754385965, + "loss": 0.0024, + "step": 4750 + }, + { + "epoch": 70.91, + "learning_rate": 0.00018603508771929825, + "loss": 0.0031, + "step": 4751 + }, + { + "epoch": 70.92, + "learning_rate": 0.000186, + "loss": 0.0021, + "step": 4752 + }, + { + "epoch": 70.94, + "learning_rate": 0.00018596491228070172, + "loss": 0.0054, + "step": 4753 + }, + { + "epoch": 70.95, + "learning_rate": 0.00018592982456140352, + "loss": 0.0037, + "step": 4754 + }, + { + "epoch": 70.97, + "learning_rate": 0.00018589473684210524, + "loss": 0.002, + "step": 4755 + }, + { + "epoch": 70.98, + "learning_rate": 0.000185859649122807, + "loss": 0.0839, + "step": 4756 + }, + { + "epoch": 71.0, + "learning_rate": 0.00018582456140350874, + "loss": 0.1267, + "step": 4757 + }, + { + "epoch": 71.01, + "learning_rate": 0.00018578947368421051, + "loss": 0.0022, + "step": 4758 + }, + { + "epoch": 71.03, + "learning_rate": 0.00018575438596491226, + "loss": 0.001, + "step": 4759 + }, + { + "epoch": 71.04, + "learning_rate": 0.000185719298245614, + "loss": 0.0012, + "step": 4760 + }, + { + "epoch": 71.06, + "learning_rate": 0.00018568421052631576, + "loss": 0.0075, + "step": 4761 + }, + { + "epoch": 71.07, + "learning_rate": 0.00018564912280701754, + "loss": 0.0519, + "step": 4762 + }, + { + "epoch": 71.09, + "learning_rate": 0.00018561403508771929, + "loss": 0.0016, + "step": 4763 + }, + { + "epoch": 71.1, + "learning_rate": 0.00018557894736842103, + "loss": 0.0017, + "step": 4764 + }, + { + "epoch": 71.12, + "learning_rate": 0.0001855438596491228, + "loss": 0.0289, + "step": 4765 + }, + { + "epoch": 71.13, + "learning_rate": 0.00018550877192982456, + "loss": 0.0028, + "step": 4766 + }, + { + "epoch": 71.15, + "learning_rate": 0.0001854736842105263, + "loss": 0.0536, + "step": 4767 + }, + { + "epoch": 71.16, + "learning_rate": 0.00018543859649122803, + "loss": 0.1368, + "step": 4768 + }, + { + "epoch": 71.18, + "learning_rate": 0.00018540350877192983, + "loss": 0.0013, + "step": 4769 + }, + { + "epoch": 71.19, + "learning_rate": 0.00018536842105263155, + "loss": 0.2852, + "step": 4770 + }, + { + "epoch": 71.21, + "learning_rate": 0.0001853333333333333, + "loss": 0.0144, + "step": 4771 + }, + { + "epoch": 71.22, + "learning_rate": 0.00018529824561403505, + "loss": 0.0072, + "step": 4772 + }, + { + "epoch": 71.24, + "learning_rate": 0.00018526315789473683, + "loss": 0.0088, + "step": 4773 + }, + { + "epoch": 71.25, + "learning_rate": 0.00018522807017543858, + "loss": 0.3189, + "step": 4774 + }, + { + "epoch": 71.27, + "learning_rate": 0.00018519298245614032, + "loss": 0.1993, + "step": 4775 + }, + { + "epoch": 71.28, + "learning_rate": 0.0001851578947368421, + "loss": 0.255, + "step": 4776 + }, + { + "epoch": 71.3, + "learning_rate": 0.00018512280701754385, + "loss": 0.0594, + "step": 4777 + }, + { + "epoch": 71.31, + "learning_rate": 0.0001850877192982456, + "loss": 0.0034, + "step": 4778 + }, + { + "epoch": 71.33, + "learning_rate": 0.00018505263157894735, + "loss": 0.014, + "step": 4779 + }, + { + "epoch": 71.34, + "learning_rate": 0.00018501754385964912, + "loss": 0.0016, + "step": 4780 + }, + { + "epoch": 71.36, + "learning_rate": 0.00018498245614035087, + "loss": 0.1977, + "step": 4781 + }, + { + "epoch": 71.37, + "learning_rate": 0.00018494736842105262, + "loss": 0.2658, + "step": 4782 + }, + { + "epoch": 71.39, + "learning_rate": 0.00018491228070175437, + "loss": 0.005, + "step": 4783 + }, + { + "epoch": 71.4, + "learning_rate": 0.00018487719298245615, + "loss": 0.0766, + "step": 4784 + }, + { + "epoch": 71.42, + "learning_rate": 0.00018484210526315787, + "loss": 0.084, + "step": 4785 + }, + { + "epoch": 71.43, + "learning_rate": 0.00018480701754385962, + "loss": 0.0876, + "step": 4786 + }, + { + "epoch": 71.45, + "learning_rate": 0.00018477192982456136, + "loss": 0.002, + "step": 4787 + }, + { + "epoch": 71.46, + "learning_rate": 0.00018473684210526314, + "loss": 0.0182, + "step": 4788 + }, + { + "epoch": 71.48, + "learning_rate": 0.0001847017543859649, + "loss": 0.0067, + "step": 4789 + }, + { + "epoch": 71.49, + "learning_rate": 0.00018466666666666664, + "loss": 0.0028, + "step": 4790 + }, + { + "epoch": 71.51, + "learning_rate": 0.00018463157894736841, + "loss": 0.0689, + "step": 4791 + }, + { + "epoch": 71.52, + "learning_rate": 0.00018459649122807016, + "loss": 0.3814, + "step": 4792 + }, + { + "epoch": 71.54, + "learning_rate": 0.0001845614035087719, + "loss": 0.0075, + "step": 4793 + }, + { + "epoch": 71.55, + "learning_rate": 0.00018452631578947366, + "loss": 0.0132, + "step": 4794 + }, + { + "epoch": 71.57, + "learning_rate": 0.00018449122807017544, + "loss": 0.1036, + "step": 4795 + }, + { + "epoch": 71.58, + "learning_rate": 0.00018445614035087718, + "loss": 0.0515, + "step": 4796 + }, + { + "epoch": 71.59, + "learning_rate": 0.00018442105263157893, + "loss": 0.0664, + "step": 4797 + }, + { + "epoch": 71.61, + "learning_rate": 0.00018438596491228068, + "loss": 0.0349, + "step": 4798 + }, + { + "epoch": 71.62, + "learning_rate": 0.00018435087719298246, + "loss": 0.1953, + "step": 4799 + }, + { + "epoch": 71.64, + "learning_rate": 0.0001843157894736842, + "loss": 0.0302, + "step": 4800 + }, + { + "epoch": 71.64, + "eval_accuracy": 0.8494860499265786, + "eval_f1": 0.8515287106120767, + "eval_loss": 0.6812880635261536, + "eval_runtime": 344.4321, + "eval_samples_per_second": 11.863, + "eval_steps_per_second": 0.743, + "step": 4800 + }, + { + "epoch": 71.65, + "learning_rate": 0.00018428070175438593, + "loss": 0.0782, + "step": 4801 + }, + { + "epoch": 71.67, + "learning_rate": 0.00018424561403508773, + "loss": 0.0039, + "step": 4802 + }, + { + "epoch": 71.68, + "learning_rate": 0.00018421052631578945, + "loss": 0.0041, + "step": 4803 + }, + { + "epoch": 71.7, + "learning_rate": 0.0001841754385964912, + "loss": 0.0041, + "step": 4804 + }, + { + "epoch": 71.71, + "learning_rate": 0.00018414035087719295, + "loss": 0.0499, + "step": 4805 + }, + { + "epoch": 71.73, + "learning_rate": 0.00018410526315789473, + "loss": 0.0392, + "step": 4806 + }, + { + "epoch": 71.74, + "learning_rate": 0.00018407017543859648, + "loss": 0.0955, + "step": 4807 + }, + { + "epoch": 71.76, + "learning_rate": 0.00018403508771929822, + "loss": 0.2134, + "step": 4808 + }, + { + "epoch": 71.77, + "learning_rate": 0.00018399999999999997, + "loss": 0.2199, + "step": 4809 + }, + { + "epoch": 71.79, + "learning_rate": 0.00018396491228070175, + "loss": 0.0032, + "step": 4810 + }, + { + "epoch": 71.8, + "learning_rate": 0.0001839298245614035, + "loss": 0.0542, + "step": 4811 + }, + { + "epoch": 71.82, + "learning_rate": 0.00018389473684210525, + "loss": 0.1352, + "step": 4812 + }, + { + "epoch": 71.83, + "learning_rate": 0.00018385964912280702, + "loss": 0.1094, + "step": 4813 + }, + { + "epoch": 71.85, + "learning_rate": 0.00018382456140350877, + "loss": 0.0961, + "step": 4814 + }, + { + "epoch": 71.86, + "learning_rate": 0.00018378947368421052, + "loss": 0.0033, + "step": 4815 + }, + { + "epoch": 71.88, + "learning_rate": 0.00018375438596491224, + "loss": 0.3464, + "step": 4816 + }, + { + "epoch": 71.89, + "learning_rate": 0.00018371929824561404, + "loss": 0.0021, + "step": 4817 + }, + { + "epoch": 71.91, + "learning_rate": 0.00018368421052631577, + "loss": 0.0212, + "step": 4818 + }, + { + "epoch": 71.92, + "learning_rate": 0.00018364912280701752, + "loss": 0.09, + "step": 4819 + }, + { + "epoch": 71.94, + "learning_rate": 0.00018361403508771926, + "loss": 0.1911, + "step": 4820 + }, + { + "epoch": 71.95, + "learning_rate": 0.00018357894736842104, + "loss": 0.0022, + "step": 4821 + }, + { + "epoch": 71.97, + "learning_rate": 0.0001835438596491228, + "loss": 0.0026, + "step": 4822 + }, + { + "epoch": 71.98, + "learning_rate": 0.00018350877192982454, + "loss": 0.003, + "step": 4823 + }, + { + "epoch": 72.0, + "learning_rate": 0.00018347368421052629, + "loss": 0.0518, + "step": 4824 + }, + { + "epoch": 72.01, + "learning_rate": 0.00018343859649122806, + "loss": 0.1218, + "step": 4825 + }, + { + "epoch": 72.03, + "learning_rate": 0.0001834035087719298, + "loss": 0.2565, + "step": 4826 + }, + { + "epoch": 72.04, + "learning_rate": 0.00018336842105263156, + "loss": 0.0015, + "step": 4827 + }, + { + "epoch": 72.06, + "learning_rate": 0.00018333333333333334, + "loss": 0.1193, + "step": 4828 + }, + { + "epoch": 72.07, + "learning_rate": 0.00018329824561403508, + "loss": 0.0566, + "step": 4829 + }, + { + "epoch": 72.09, + "learning_rate": 0.00018326315789473683, + "loss": 0.0022, + "step": 4830 + }, + { + "epoch": 72.1, + "learning_rate": 0.00018322807017543858, + "loss": 0.0022, + "step": 4831 + }, + { + "epoch": 72.12, + "learning_rate": 0.00018319298245614036, + "loss": 0.1068, + "step": 4832 + }, + { + "epoch": 72.13, + "learning_rate": 0.00018315789473684208, + "loss": 0.062, + "step": 4833 + }, + { + "epoch": 72.15, + "learning_rate": 0.00018312280701754383, + "loss": 0.0677, + "step": 4834 + }, + { + "epoch": 72.16, + "learning_rate": 0.00018308771929824558, + "loss": 0.0032, + "step": 4835 + }, + { + "epoch": 72.18, + "learning_rate": 0.00018305263157894735, + "loss": 0.0027, + "step": 4836 + }, + { + "epoch": 72.19, + "learning_rate": 0.0001830175438596491, + "loss": 0.0203, + "step": 4837 + }, + { + "epoch": 72.21, + "learning_rate": 0.00018298245614035085, + "loss": 0.0049, + "step": 4838 + }, + { + "epoch": 72.22, + "learning_rate": 0.00018294736842105263, + "loss": 0.0329, + "step": 4839 + }, + { + "epoch": 72.24, + "learning_rate": 0.00018291228070175437, + "loss": 0.0059, + "step": 4840 + }, + { + "epoch": 72.25, + "learning_rate": 0.00018287719298245612, + "loss": 0.0042, + "step": 4841 + }, + { + "epoch": 72.27, + "learning_rate": 0.00018284210526315787, + "loss": 0.1367, + "step": 4842 + }, + { + "epoch": 72.28, + "learning_rate": 0.00018280701754385965, + "loss": 0.0015, + "step": 4843 + }, + { + "epoch": 72.3, + "learning_rate": 0.0001827719298245614, + "loss": 0.1505, + "step": 4844 + }, + { + "epoch": 72.31, + "learning_rate": 0.00018273684210526315, + "loss": 0.0677, + "step": 4845 + }, + { + "epoch": 72.33, + "learning_rate": 0.0001827017543859649, + "loss": 0.0066, + "step": 4846 + }, + { + "epoch": 72.34, + "learning_rate": 0.00018266666666666667, + "loss": 0.0131, + "step": 4847 + }, + { + "epoch": 72.36, + "learning_rate": 0.00018263157894736842, + "loss": 0.095, + "step": 4848 + }, + { + "epoch": 72.37, + "learning_rate": 0.00018259649122807014, + "loss": 0.0308, + "step": 4849 + }, + { + "epoch": 72.39, + "learning_rate": 0.0001825614035087719, + "loss": 0.0013, + "step": 4850 + }, + { + "epoch": 72.4, + "learning_rate": 0.00018252631578947367, + "loss": 0.1971, + "step": 4851 + }, + { + "epoch": 72.42, + "learning_rate": 0.00018249122807017541, + "loss": 0.0014, + "step": 4852 + }, + { + "epoch": 72.43, + "learning_rate": 0.00018245614035087716, + "loss": 0.0013, + "step": 4853 + }, + { + "epoch": 72.45, + "learning_rate": 0.00018242105263157894, + "loss": 0.0355, + "step": 4854 + }, + { + "epoch": 72.46, + "learning_rate": 0.0001823859649122807, + "loss": 0.0038, + "step": 4855 + }, + { + "epoch": 72.48, + "learning_rate": 0.00018235087719298244, + "loss": 0.0012, + "step": 4856 + }, + { + "epoch": 72.49, + "learning_rate": 0.00018231578947368419, + "loss": 0.102, + "step": 4857 + }, + { + "epoch": 72.51, + "learning_rate": 0.00018228070175438596, + "loss": 0.0082, + "step": 4858 + }, + { + "epoch": 72.52, + "learning_rate": 0.0001822456140350877, + "loss": 0.0481, + "step": 4859 + }, + { + "epoch": 72.54, + "learning_rate": 0.00018221052631578946, + "loss": 0.0025, + "step": 4860 + }, + { + "epoch": 72.55, + "learning_rate": 0.0001821754385964912, + "loss": 0.0018, + "step": 4861 + }, + { + "epoch": 72.57, + "learning_rate": 0.00018214035087719298, + "loss": 0.1083, + "step": 4862 + }, + { + "epoch": 72.58, + "learning_rate": 0.00018210526315789473, + "loss": 0.0437, + "step": 4863 + }, + { + "epoch": 72.59, + "learning_rate": 0.00018207017543859645, + "loss": 0.0088, + "step": 4864 + }, + { + "epoch": 72.61, + "learning_rate": 0.00018203508771929826, + "loss": 0.1357, + "step": 4865 + }, + { + "epoch": 72.62, + "learning_rate": 0.00018199999999999998, + "loss": 0.0158, + "step": 4866 + }, + { + "epoch": 72.64, + "learning_rate": 0.00018196491228070173, + "loss": 0.0188, + "step": 4867 + }, + { + "epoch": 72.65, + "learning_rate": 0.00018192982456140348, + "loss": 0.0024, + "step": 4868 + }, + { + "epoch": 72.67, + "learning_rate": 0.00018189473684210525, + "loss": 0.005, + "step": 4869 + }, + { + "epoch": 72.68, + "learning_rate": 0.000181859649122807, + "loss": 0.0025, + "step": 4870 + }, + { + "epoch": 72.7, + "learning_rate": 0.00018182456140350875, + "loss": 0.0077, + "step": 4871 + }, + { + "epoch": 72.71, + "learning_rate": 0.0001817894736842105, + "loss": 0.0025, + "step": 4872 + }, + { + "epoch": 72.73, + "learning_rate": 0.00018175438596491227, + "loss": 0.058, + "step": 4873 + }, + { + "epoch": 72.74, + "learning_rate": 0.00018171929824561402, + "loss": 0.1914, + "step": 4874 + }, + { + "epoch": 72.76, + "learning_rate": 0.00018168421052631577, + "loss": 0.1722, + "step": 4875 + }, + { + "epoch": 72.77, + "learning_rate": 0.00018164912280701755, + "loss": 0.0266, + "step": 4876 + }, + { + "epoch": 72.79, + "learning_rate": 0.0001816140350877193, + "loss": 0.0278, + "step": 4877 + }, + { + "epoch": 72.8, + "learning_rate": 0.00018157894736842105, + "loss": 0.0495, + "step": 4878 + }, + { + "epoch": 72.82, + "learning_rate": 0.00018154385964912277, + "loss": 0.1768, + "step": 4879 + }, + { + "epoch": 72.83, + "learning_rate": 0.00018150877192982457, + "loss": 0.0619, + "step": 4880 + }, + { + "epoch": 72.85, + "learning_rate": 0.0001814736842105263, + "loss": 0.1561, + "step": 4881 + }, + { + "epoch": 72.86, + "learning_rate": 0.00018143859649122804, + "loss": 0.0668, + "step": 4882 + }, + { + "epoch": 72.88, + "learning_rate": 0.0001814035087719298, + "loss": 0.0052, + "step": 4883 + }, + { + "epoch": 72.89, + "learning_rate": 0.00018136842105263157, + "loss": 0.0246, + "step": 4884 + }, + { + "epoch": 72.91, + "learning_rate": 0.00018133333333333331, + "loss": 0.2206, + "step": 4885 + }, + { + "epoch": 72.92, + "learning_rate": 0.00018129824561403506, + "loss": 0.2973, + "step": 4886 + }, + { + "epoch": 72.94, + "learning_rate": 0.0001812631578947368, + "loss": 0.3662, + "step": 4887 + }, + { + "epoch": 72.95, + "learning_rate": 0.0001812280701754386, + "loss": 0.2203, + "step": 4888 + }, + { + "epoch": 72.97, + "learning_rate": 0.00018119298245614034, + "loss": 0.0179, + "step": 4889 + }, + { + "epoch": 72.98, + "learning_rate": 0.00018115789473684208, + "loss": 0.0022, + "step": 4890 + }, + { + "epoch": 73.0, + "learning_rate": 0.00018112280701754386, + "loss": 0.0622, + "step": 4891 + }, + { + "epoch": 73.01, + "learning_rate": 0.0001810877192982456, + "loss": 0.0017, + "step": 4892 + }, + { + "epoch": 73.03, + "learning_rate": 0.00018105263157894736, + "loss": 0.0018, + "step": 4893 + }, + { + "epoch": 73.04, + "learning_rate": 0.0001810175438596491, + "loss": 0.0528, + "step": 4894 + }, + { + "epoch": 73.06, + "learning_rate": 0.00018098245614035088, + "loss": 0.0023, + "step": 4895 + }, + { + "epoch": 73.07, + "learning_rate": 0.00018094736842105263, + "loss": 0.0043, + "step": 4896 + }, + { + "epoch": 73.09, + "learning_rate": 0.00018091228070175435, + "loss": 0.0218, + "step": 4897 + }, + { + "epoch": 73.1, + "learning_rate": 0.0001808771929824561, + "loss": 0.2016, + "step": 4898 + }, + { + "epoch": 73.12, + "learning_rate": 0.00018084210526315788, + "loss": 0.1458, + "step": 4899 + }, + { + "epoch": 73.13, + "learning_rate": 0.00018080701754385963, + "loss": 0.0088, + "step": 4900 + }, + { + "epoch": 73.15, + "learning_rate": 0.00018077192982456138, + "loss": 0.0022, + "step": 4901 + }, + { + "epoch": 73.16, + "learning_rate": 0.00018073684210526315, + "loss": 0.0228, + "step": 4902 + }, + { + "epoch": 73.18, + "learning_rate": 0.0001807017543859649, + "loss": 0.0058, + "step": 4903 + }, + { + "epoch": 73.19, + "learning_rate": 0.00018066666666666665, + "loss": 0.0074, + "step": 4904 + }, + { + "epoch": 73.21, + "learning_rate": 0.0001806315789473684, + "loss": 0.0103, + "step": 4905 + }, + { + "epoch": 73.22, + "learning_rate": 0.00018059649122807017, + "loss": 0.1562, + "step": 4906 + }, + { + "epoch": 73.24, + "learning_rate": 0.00018056140350877192, + "loss": 0.004, + "step": 4907 + }, + { + "epoch": 73.25, + "learning_rate": 0.00018052631578947367, + "loss": 0.0222, + "step": 4908 + }, + { + "epoch": 73.27, + "learning_rate": 0.00018049122807017542, + "loss": 0.0036, + "step": 4909 + }, + { + "epoch": 73.28, + "learning_rate": 0.0001804561403508772, + "loss": 0.0019, + "step": 4910 + }, + { + "epoch": 73.3, + "learning_rate": 0.00018042105263157894, + "loss": 0.0021, + "step": 4911 + }, + { + "epoch": 73.31, + "learning_rate": 0.00018038596491228067, + "loss": 0.2216, + "step": 4912 + }, + { + "epoch": 73.33, + "learning_rate": 0.00018035087719298247, + "loss": 0.1907, + "step": 4913 + }, + { + "epoch": 73.34, + "learning_rate": 0.0001803157894736842, + "loss": 0.0057, + "step": 4914 + }, + { + "epoch": 73.36, + "learning_rate": 0.00018028070175438594, + "loss": 0.0024, + "step": 4915 + }, + { + "epoch": 73.37, + "learning_rate": 0.0001802456140350877, + "loss": 0.2023, + "step": 4916 + }, + { + "epoch": 73.39, + "learning_rate": 0.00018021052631578946, + "loss": 0.0057, + "step": 4917 + }, + { + "epoch": 73.4, + "learning_rate": 0.0001801754385964912, + "loss": 0.0878, + "step": 4918 + }, + { + "epoch": 73.42, + "learning_rate": 0.00018014035087719296, + "loss": 0.0024, + "step": 4919 + }, + { + "epoch": 73.43, + "learning_rate": 0.0001801052631578947, + "loss": 0.0191, + "step": 4920 + }, + { + "epoch": 73.45, + "learning_rate": 0.0001800701754385965, + "loss": 0.0756, + "step": 4921 + }, + { + "epoch": 73.46, + "learning_rate": 0.00018003508771929824, + "loss": 0.0028, + "step": 4922 + }, + { + "epoch": 73.48, + "learning_rate": 0.00017999999999999998, + "loss": 0.2512, + "step": 4923 + }, + { + "epoch": 73.49, + "learning_rate": 0.00017996491228070173, + "loss": 0.1202, + "step": 4924 + }, + { + "epoch": 73.51, + "learning_rate": 0.0001799298245614035, + "loss": 0.1339, + "step": 4925 + }, + { + "epoch": 73.52, + "learning_rate": 0.00017989473684210526, + "loss": 0.0635, + "step": 4926 + }, + { + "epoch": 73.54, + "learning_rate": 0.00017985964912280698, + "loss": 0.0029, + "step": 4927 + }, + { + "epoch": 73.55, + "learning_rate": 0.00017982456140350878, + "loss": 0.0032, + "step": 4928 + }, + { + "epoch": 73.57, + "learning_rate": 0.0001797894736842105, + "loss": 0.0057, + "step": 4929 + }, + { + "epoch": 73.58, + "learning_rate": 0.00017975438596491225, + "loss": 0.267, + "step": 4930 + }, + { + "epoch": 73.59, + "learning_rate": 0.000179719298245614, + "loss": 0.1825, + "step": 4931 + }, + { + "epoch": 73.61, + "learning_rate": 0.00017968421052631578, + "loss": 0.1034, + "step": 4932 + }, + { + "epoch": 73.62, + "learning_rate": 0.00017964912280701753, + "loss": 0.0088, + "step": 4933 + }, + { + "epoch": 73.64, + "learning_rate": 0.00017961403508771927, + "loss": 0.27, + "step": 4934 + }, + { + "epoch": 73.65, + "learning_rate": 0.00017957894736842102, + "loss": 0.0016, + "step": 4935 + }, + { + "epoch": 73.67, + "learning_rate": 0.0001795438596491228, + "loss": 0.0059, + "step": 4936 + }, + { + "epoch": 73.68, + "learning_rate": 0.00017950877192982455, + "loss": 0.3191, + "step": 4937 + }, + { + "epoch": 73.7, + "learning_rate": 0.0001794736842105263, + "loss": 0.0091, + "step": 4938 + }, + { + "epoch": 73.71, + "learning_rate": 0.00017943859649122807, + "loss": 0.037, + "step": 4939 + }, + { + "epoch": 73.73, + "learning_rate": 0.00017940350877192982, + "loss": 0.0173, + "step": 4940 + }, + { + "epoch": 73.74, + "learning_rate": 0.00017936842105263157, + "loss": 0.0028, + "step": 4941 + }, + { + "epoch": 73.76, + "learning_rate": 0.00017933333333333332, + "loss": 0.0125, + "step": 4942 + }, + { + "epoch": 73.77, + "learning_rate": 0.0001792982456140351, + "loss": 0.0519, + "step": 4943 + }, + { + "epoch": 73.79, + "learning_rate": 0.00017926315789473684, + "loss": 0.3839, + "step": 4944 + }, + { + "epoch": 73.8, + "learning_rate": 0.00017922807017543857, + "loss": 0.0054, + "step": 4945 + }, + { + "epoch": 73.82, + "learning_rate": 0.00017919298245614031, + "loss": 0.036, + "step": 4946 + }, + { + "epoch": 73.83, + "learning_rate": 0.0001791578947368421, + "loss": 0.0084, + "step": 4947 + }, + { + "epoch": 73.85, + "learning_rate": 0.00017912280701754384, + "loss": 0.0497, + "step": 4948 + }, + { + "epoch": 73.86, + "learning_rate": 0.0001790877192982456, + "loss": 0.0131, + "step": 4949 + }, + { + "epoch": 73.88, + "learning_rate": 0.00017905263157894734, + "loss": 0.2431, + "step": 4950 + }, + { + "epoch": 73.89, + "learning_rate": 0.0001790175438596491, + "loss": 0.215, + "step": 4951 + }, + { + "epoch": 73.91, + "learning_rate": 0.00017898245614035086, + "loss": 0.011, + "step": 4952 + }, + { + "epoch": 73.92, + "learning_rate": 0.0001789473684210526, + "loss": 0.0145, + "step": 4953 + }, + { + "epoch": 73.94, + "learning_rate": 0.00017891228070175439, + "loss": 0.0019, + "step": 4954 + }, + { + "epoch": 73.95, + "learning_rate": 0.00017887719298245613, + "loss": 0.0811, + "step": 4955 + }, + { + "epoch": 73.97, + "learning_rate": 0.00017884210526315788, + "loss": 0.008, + "step": 4956 + }, + { + "epoch": 73.98, + "learning_rate": 0.00017880701754385963, + "loss": 0.2649, + "step": 4957 + }, + { + "epoch": 74.0, + "learning_rate": 0.0001787719298245614, + "loss": 0.1351, + "step": 4958 + }, + { + "epoch": 74.01, + "learning_rate": 0.00017873684210526316, + "loss": 0.0034, + "step": 4959 + }, + { + "epoch": 74.03, + "learning_rate": 0.00017870175438596488, + "loss": 0.012, + "step": 4960 + }, + { + "epoch": 74.04, + "learning_rate": 0.00017866666666666663, + "loss": 0.0023, + "step": 4961 + }, + { + "epoch": 74.06, + "learning_rate": 0.0001786315789473684, + "loss": 0.1564, + "step": 4962 + }, + { + "epoch": 74.07, + "learning_rate": 0.00017859649122807015, + "loss": 0.1394, + "step": 4963 + }, + { + "epoch": 74.09, + "learning_rate": 0.0001785614035087719, + "loss": 0.0147, + "step": 4964 + }, + { + "epoch": 74.1, + "learning_rate": 0.00017852631578947368, + "loss": 0.0139, + "step": 4965 + }, + { + "epoch": 74.12, + "learning_rate": 0.00017849122807017543, + "loss": 0.028, + "step": 4966 + }, + { + "epoch": 74.13, + "learning_rate": 0.00017845614035087717, + "loss": 0.0709, + "step": 4967 + }, + { + "epoch": 74.15, + "learning_rate": 0.00017842105263157892, + "loss": 0.0028, + "step": 4968 + }, + { + "epoch": 74.16, + "learning_rate": 0.0001783859649122807, + "loss": 0.0064, + "step": 4969 + }, + { + "epoch": 74.18, + "learning_rate": 0.00017835087719298245, + "loss": 0.005, + "step": 4970 + }, + { + "epoch": 74.19, + "learning_rate": 0.0001783157894736842, + "loss": 0.0279, + "step": 4971 + }, + { + "epoch": 74.21, + "learning_rate": 0.00017828070175438595, + "loss": 0.0143, + "step": 4972 + }, + { + "epoch": 74.22, + "learning_rate": 0.00017824561403508772, + "loss": 0.0258, + "step": 4973 + }, + { + "epoch": 74.24, + "learning_rate": 0.00017821052631578947, + "loss": 0.0602, + "step": 4974 + }, + { + "epoch": 74.25, + "learning_rate": 0.0001781754385964912, + "loss": 0.1954, + "step": 4975 + }, + { + "epoch": 74.27, + "learning_rate": 0.000178140350877193, + "loss": 0.0018, + "step": 4976 + }, + { + "epoch": 74.28, + "learning_rate": 0.00017810526315789472, + "loss": 0.0082, + "step": 4977 + }, + { + "epoch": 74.3, + "learning_rate": 0.00017807017543859647, + "loss": 0.1672, + "step": 4978 + }, + { + "epoch": 74.31, + "learning_rate": 0.00017803508771929821, + "loss": 0.0016, + "step": 4979 + }, + { + "epoch": 74.33, + "learning_rate": 0.000178, + "loss": 0.0091, + "step": 4980 + }, + { + "epoch": 74.34, + "learning_rate": 0.00017796491228070174, + "loss": 0.0312, + "step": 4981 + }, + { + "epoch": 74.36, + "learning_rate": 0.0001779298245614035, + "loss": 0.2499, + "step": 4982 + }, + { + "epoch": 74.37, + "learning_rate": 0.00017789473684210524, + "loss": 0.1338, + "step": 4983 + }, + { + "epoch": 74.39, + "learning_rate": 0.000177859649122807, + "loss": 0.011, + "step": 4984 + }, + { + "epoch": 74.4, + "learning_rate": 0.00017782456140350876, + "loss": 0.0024, + "step": 4985 + }, + { + "epoch": 74.42, + "learning_rate": 0.0001777894736842105, + "loss": 0.0041, + "step": 4986 + }, + { + "epoch": 74.43, + "learning_rate": 0.00017775438596491226, + "loss": 0.0016, + "step": 4987 + }, + { + "epoch": 74.45, + "learning_rate": 0.00017771929824561403, + "loss": 0.0135, + "step": 4988 + }, + { + "epoch": 74.46, + "learning_rate": 0.00017768421052631578, + "loss": 0.0148, + "step": 4989 + }, + { + "epoch": 74.48, + "learning_rate": 0.00017764912280701753, + "loss": 0.0708, + "step": 4990 + }, + { + "epoch": 74.49, + "learning_rate": 0.0001776140350877193, + "loss": 0.083, + "step": 4991 + }, + { + "epoch": 74.51, + "learning_rate": 0.00017757894736842103, + "loss": 0.0161, + "step": 4992 + }, + { + "epoch": 74.52, + "learning_rate": 0.00017754385964912278, + "loss": 0.0177, + "step": 4993 + }, + { + "epoch": 74.54, + "learning_rate": 0.00017750877192982453, + "loss": 0.0022, + "step": 4994 + }, + { + "epoch": 74.55, + "learning_rate": 0.0001774736842105263, + "loss": 0.0189, + "step": 4995 + }, + { + "epoch": 74.57, + "learning_rate": 0.00017743859649122805, + "loss": 0.0014, + "step": 4996 + }, + { + "epoch": 74.58, + "learning_rate": 0.0001774035087719298, + "loss": 0.0093, + "step": 4997 + }, + { + "epoch": 74.59, + "learning_rate": 0.00017736842105263155, + "loss": 0.0283, + "step": 4998 + }, + { + "epoch": 74.61, + "learning_rate": 0.00017733333333333333, + "loss": 0.139, + "step": 4999 + }, + { + "epoch": 74.62, + "learning_rate": 0.00017729824561403507, + "loss": 0.0027, + "step": 5000 + }, + { + "epoch": 74.62, + "eval_accuracy": 0.8529123837493882, + "eval_f1": 0.8530140782623041, + "eval_loss": 0.7162572145462036, + "eval_runtime": 343.7711, + "eval_samples_per_second": 11.886, + "eval_steps_per_second": 0.745, + "step": 5000 + }, + { + "epoch": 74.64, + "learning_rate": 0.00017726315789473682, + "loss": 0.0074, + "step": 5001 + }, + { + "epoch": 74.65, + "learning_rate": 0.0001772280701754386, + "loss": 0.0027, + "step": 5002 + }, + { + "epoch": 74.67, + "learning_rate": 0.00017719298245614035, + "loss": 0.0679, + "step": 5003 + }, + { + "epoch": 74.68, + "learning_rate": 0.0001771578947368421, + "loss": 0.0013, + "step": 5004 + }, + { + "epoch": 74.7, + "learning_rate": 0.00017712280701754384, + "loss": 0.0307, + "step": 5005 + }, + { + "epoch": 74.71, + "learning_rate": 0.00017708771929824562, + "loss": 0.0014, + "step": 5006 + }, + { + "epoch": 74.73, + "learning_rate": 0.00017705263157894737, + "loss": 0.0016, + "step": 5007 + }, + { + "epoch": 74.74, + "learning_rate": 0.00017705263157894737, + "loss": 0.3708, + "step": 5008 + }, + { + "epoch": 74.76, + "learning_rate": 0.0001770175438596491, + "loss": 0.0718, + "step": 5009 + }, + { + "epoch": 74.77, + "learning_rate": 0.00017698245614035084, + "loss": 0.0015, + "step": 5010 + }, + { + "epoch": 74.79, + "learning_rate": 0.00017694736842105262, + "loss": 0.0336, + "step": 5011 + }, + { + "epoch": 74.8, + "learning_rate": 0.00017691228070175436, + "loss": 0.0311, + "step": 5012 + }, + { + "epoch": 74.82, + "learning_rate": 0.0001768771929824561, + "loss": 0.0123, + "step": 5013 + }, + { + "epoch": 74.83, + "learning_rate": 0.00017684210526315786, + "loss": 0.0049, + "step": 5014 + }, + { + "epoch": 74.85, + "learning_rate": 0.00017680701754385964, + "loss": 0.0137, + "step": 5015 + }, + { + "epoch": 74.86, + "learning_rate": 0.0001767719298245614, + "loss": 0.0619, + "step": 5016 + }, + { + "epoch": 74.88, + "learning_rate": 0.00017673684210526314, + "loss": 0.1754, + "step": 5017 + }, + { + "epoch": 74.89, + "learning_rate": 0.0001767017543859649, + "loss": 0.0045, + "step": 5018 + }, + { + "epoch": 74.91, + "learning_rate": 0.00017666666666666666, + "loss": 0.0024, + "step": 5019 + }, + { + "epoch": 74.92, + "learning_rate": 0.0001766315789473684, + "loss": 0.0026, + "step": 5020 + }, + { + "epoch": 74.94, + "learning_rate": 0.00017659649122807016, + "loss": 0.1055, + "step": 5021 + }, + { + "epoch": 74.95, + "learning_rate": 0.00017656140350877193, + "loss": 0.004, + "step": 5022 + }, + { + "epoch": 74.97, + "learning_rate": 0.00017652631578947368, + "loss": 0.0023, + "step": 5023 + }, + { + "epoch": 74.98, + "learning_rate": 0.0001764912280701754, + "loss": 0.0035, + "step": 5024 + }, + { + "epoch": 75.0, + "learning_rate": 0.00017645614035087715, + "loss": 0.0064, + "step": 5025 + }, + { + "epoch": 75.01, + "learning_rate": 0.00017642105263157893, + "loss": 0.0107, + "step": 5026 + }, + { + "epoch": 75.03, + "learning_rate": 0.00017638596491228068, + "loss": 0.0037, + "step": 5027 + }, + { + "epoch": 75.04, + "learning_rate": 0.00017635087719298243, + "loss": 0.0015, + "step": 5028 + }, + { + "epoch": 75.06, + "learning_rate": 0.0001763157894736842, + "loss": 0.0016, + "step": 5029 + }, + { + "epoch": 75.07, + "learning_rate": 0.00017628070175438595, + "loss": 0.1804, + "step": 5030 + }, + { + "epoch": 75.09, + "learning_rate": 0.0001762456140350877, + "loss": 0.0013, + "step": 5031 + }, + { + "epoch": 75.1, + "learning_rate": 0.00017621052631578945, + "loss": 0.0021, + "step": 5032 + }, + { + "epoch": 75.12, + "learning_rate": 0.00017617543859649122, + "loss": 0.0022, + "step": 5033 + }, + { + "epoch": 75.13, + "learning_rate": 0.00017614035087719297, + "loss": 0.0011, + "step": 5034 + }, + { + "epoch": 75.15, + "learning_rate": 0.00017610526315789472, + "loss": 0.2328, + "step": 5035 + }, + { + "epoch": 75.16, + "learning_rate": 0.00017607017543859647, + "loss": 0.2112, + "step": 5036 + }, + { + "epoch": 75.18, + "learning_rate": 0.00017603508771929825, + "loss": 0.0083, + "step": 5037 + }, + { + "epoch": 75.19, + "learning_rate": 0.000176, + "loss": 0.2025, + "step": 5038 + }, + { + "epoch": 75.21, + "learning_rate": 0.00017596491228070174, + "loss": 0.0229, + "step": 5039 + }, + { + "epoch": 75.22, + "learning_rate": 0.00017592982456140352, + "loss": 0.0018, + "step": 5040 + }, + { + "epoch": 75.24, + "learning_rate": 0.00017589473684210524, + "loss": 0.0022, + "step": 5041 + }, + { + "epoch": 75.25, + "learning_rate": 0.000175859649122807, + "loss": 0.0017, + "step": 5042 + }, + { + "epoch": 75.27, + "learning_rate": 0.00017582456140350874, + "loss": 0.0017, + "step": 5043 + }, + { + "epoch": 75.28, + "learning_rate": 0.00017578947368421052, + "loss": 0.1587, + "step": 5044 + }, + { + "epoch": 75.3, + "learning_rate": 0.00017575438596491226, + "loss": 0.0028, + "step": 5045 + }, + { + "epoch": 75.31, + "learning_rate": 0.000175719298245614, + "loss": 0.0041, + "step": 5046 + }, + { + "epoch": 75.33, + "learning_rate": 0.00017568421052631576, + "loss": 0.0012, + "step": 5047 + }, + { + "epoch": 75.34, + "learning_rate": 0.00017564912280701754, + "loss": 0.0014, + "step": 5048 + }, + { + "epoch": 75.36, + "learning_rate": 0.00017561403508771929, + "loss": 0.1035, + "step": 5049 + }, + { + "epoch": 75.37, + "learning_rate": 0.00017557894736842103, + "loss": 0.0893, + "step": 5050 + }, + { + "epoch": 75.39, + "learning_rate": 0.00017554385964912278, + "loss": 0.162, + "step": 5051 + }, + { + "epoch": 75.4, + "learning_rate": 0.00017550877192982456, + "loss": 0.0009, + "step": 5052 + }, + { + "epoch": 75.42, + "learning_rate": 0.0001754736842105263, + "loss": 0.001, + "step": 5053 + }, + { + "epoch": 75.43, + "learning_rate": 0.00017543859649122806, + "loss": 0.001, + "step": 5054 + }, + { + "epoch": 75.45, + "learning_rate": 0.00017540350877192983, + "loss": 0.0011, + "step": 5055 + }, + { + "epoch": 75.46, + "learning_rate": 0.00017536842105263158, + "loss": 0.1141, + "step": 5056 + }, + { + "epoch": 75.48, + "learning_rate": 0.0001753333333333333, + "loss": 0.0036, + "step": 5057 + }, + { + "epoch": 75.49, + "learning_rate": 0.00017529824561403505, + "loss": 0.001, + "step": 5058 + }, + { + "epoch": 75.51, + "learning_rate": 0.00017526315789473683, + "loss": 0.16, + "step": 5059 + }, + { + "epoch": 75.52, + "learning_rate": 0.00017522807017543858, + "loss": 0.2575, + "step": 5060 + }, + { + "epoch": 75.54, + "learning_rate": 0.00017519298245614033, + "loss": 0.0013, + "step": 5061 + }, + { + "epoch": 75.55, + "learning_rate": 0.00017515789473684207, + "loss": 0.1232, + "step": 5062 + }, + { + "epoch": 75.57, + "learning_rate": 0.00017512280701754385, + "loss": 0.0014, + "step": 5063 + }, + { + "epoch": 75.58, + "learning_rate": 0.0001750877192982456, + "loss": 0.0102, + "step": 5064 + }, + { + "epoch": 75.59, + "learning_rate": 0.00017505263157894735, + "loss": 0.1149, + "step": 5065 + }, + { + "epoch": 75.61, + "learning_rate": 0.00017501754385964912, + "loss": 0.1935, + "step": 5066 + }, + { + "epoch": 75.62, + "learning_rate": 0.00017498245614035087, + "loss": 0.0018, + "step": 5067 + }, + { + "epoch": 75.64, + "learning_rate": 0.00017494736842105262, + "loss": 0.1331, + "step": 5068 + }, + { + "epoch": 75.65, + "learning_rate": 0.00017491228070175437, + "loss": 0.1212, + "step": 5069 + }, + { + "epoch": 75.67, + "learning_rate": 0.00017487719298245615, + "loss": 0.1023, + "step": 5070 + }, + { + "epoch": 75.68, + "learning_rate": 0.0001748421052631579, + "loss": 0.1183, + "step": 5071 + }, + { + "epoch": 75.7, + "learning_rate": 0.00017480701754385962, + "loss": 0.145, + "step": 5072 + }, + { + "epoch": 75.71, + "learning_rate": 0.00017477192982456137, + "loss": 0.0044, + "step": 5073 + }, + { + "epoch": 75.73, + "learning_rate": 0.00017473684210526314, + "loss": 0.0016, + "step": 5074 + }, + { + "epoch": 75.74, + "learning_rate": 0.0001747017543859649, + "loss": 0.0024, + "step": 5075 + }, + { + "epoch": 75.76, + "learning_rate": 0.00017466666666666664, + "loss": 0.0338, + "step": 5076 + }, + { + "epoch": 75.77, + "learning_rate": 0.00017463157894736841, + "loss": 0.0053, + "step": 5077 + }, + { + "epoch": 75.79, + "learning_rate": 0.00017459649122807016, + "loss": 0.0742, + "step": 5078 + }, + { + "epoch": 75.8, + "learning_rate": 0.0001745614035087719, + "loss": 0.0017, + "step": 5079 + }, + { + "epoch": 75.82, + "learning_rate": 0.00017452631578947366, + "loss": 0.0229, + "step": 5080 + }, + { + "epoch": 75.83, + "learning_rate": 0.00017449122807017544, + "loss": 0.0323, + "step": 5081 + }, + { + "epoch": 75.85, + "learning_rate": 0.00017445614035087719, + "loss": 0.009, + "step": 5082 + }, + { + "epoch": 75.86, + "learning_rate": 0.00017442105263157893, + "loss": 0.0308, + "step": 5083 + }, + { + "epoch": 75.88, + "learning_rate": 0.00017438596491228068, + "loss": 0.1732, + "step": 5084 + }, + { + "epoch": 75.89, + "learning_rate": 0.00017435087719298246, + "loss": 0.0183, + "step": 5085 + }, + { + "epoch": 75.91, + "learning_rate": 0.0001743157894736842, + "loss": 0.0027, + "step": 5086 + }, + { + "epoch": 75.92, + "learning_rate": 0.00017428070175438593, + "loss": 0.0059, + "step": 5087 + }, + { + "epoch": 75.94, + "learning_rate": 0.00017424561403508768, + "loss": 0.0011, + "step": 5088 + }, + { + "epoch": 75.95, + "learning_rate": 0.00017421052631578945, + "loss": 0.0154, + "step": 5089 + }, + { + "epoch": 75.97, + "learning_rate": 0.0001741754385964912, + "loss": 0.2746, + "step": 5090 + }, + { + "epoch": 75.98, + "learning_rate": 0.00017414035087719295, + "loss": 0.009, + "step": 5091 + }, + { + "epoch": 76.0, + "learning_rate": 0.00017410526315789473, + "loss": 0.1161, + "step": 5092 + }, + { + "epoch": 76.01, + "learning_rate": 0.00017407017543859648, + "loss": 0.0044, + "step": 5093 + }, + { + "epoch": 76.03, + "learning_rate": 0.00017403508771929823, + "loss": 0.0122, + "step": 5094 + }, + { + "epoch": 76.04, + "learning_rate": 0.00017399999999999997, + "loss": 0.0024, + "step": 5095 + }, + { + "epoch": 76.06, + "learning_rate": 0.00017396491228070175, + "loss": 0.017, + "step": 5096 + }, + { + "epoch": 76.07, + "learning_rate": 0.0001739298245614035, + "loss": 0.3844, + "step": 5097 + }, + { + "epoch": 76.09, + "learning_rate": 0.00017389473684210525, + "loss": 0.0014, + "step": 5098 + }, + { + "epoch": 76.1, + "learning_rate": 0.000173859649122807, + "loss": 0.0806, + "step": 5099 + }, + { + "epoch": 76.12, + "learning_rate": 0.00017382456140350877, + "loss": 0.0194, + "step": 5100 + }, + { + "epoch": 76.13, + "learning_rate": 0.00017378947368421052, + "loss": 0.1099, + "step": 5101 + }, + { + "epoch": 76.15, + "learning_rate": 0.00017375438596491227, + "loss": 0.0016, + "step": 5102 + }, + { + "epoch": 76.16, + "learning_rate": 0.00017371929824561405, + "loss": 0.0011, + "step": 5103 + }, + { + "epoch": 76.18, + "learning_rate": 0.0001736842105263158, + "loss": 0.0017, + "step": 5104 + }, + { + "epoch": 76.19, + "learning_rate": 0.00017364912280701752, + "loss": 0.0024, + "step": 5105 + }, + { + "epoch": 76.21, + "learning_rate": 0.00017361403508771926, + "loss": 0.0012, + "step": 5106 + }, + { + "epoch": 76.22, + "learning_rate": 0.00017357894736842104, + "loss": 0.0047, + "step": 5107 + }, + { + "epoch": 76.24, + "learning_rate": 0.0001735438596491228, + "loss": 0.001, + "step": 5108 + }, + { + "epoch": 76.25, + "learning_rate": 0.00017350877192982454, + "loss": 0.3858, + "step": 5109 + }, + { + "epoch": 76.27, + "learning_rate": 0.0001734736842105263, + "loss": 0.0133, + "step": 5110 + }, + { + "epoch": 76.28, + "learning_rate": 0.00017343859649122806, + "loss": 0.0053, + "step": 5111 + }, + { + "epoch": 76.3, + "learning_rate": 0.0001734035087719298, + "loss": 0.0933, + "step": 5112 + }, + { + "epoch": 76.31, + "learning_rate": 0.00017336842105263156, + "loss": 0.023, + "step": 5113 + }, + { + "epoch": 76.33, + "learning_rate": 0.0001733333333333333, + "loss": 0.0009, + "step": 5114 + }, + { + "epoch": 76.34, + "learning_rate": 0.00017329824561403508, + "loss": 0.0033, + "step": 5115 + }, + { + "epoch": 76.36, + "learning_rate": 0.00017326315789473683, + "loss": 0.0283, + "step": 5116 + }, + { + "epoch": 76.37, + "learning_rate": 0.00017322807017543858, + "loss": 0.0076, + "step": 5117 + }, + { + "epoch": 76.39, + "learning_rate": 0.00017319298245614036, + "loss": 0.0467, + "step": 5118 + }, + { + "epoch": 76.4, + "learning_rate": 0.0001731578947368421, + "loss": 0.0849, + "step": 5119 + }, + { + "epoch": 76.42, + "learning_rate": 0.00017312280701754383, + "loss": 0.0441, + "step": 5120 + }, + { + "epoch": 76.43, + "learning_rate": 0.00017308771929824558, + "loss": 0.0016, + "step": 5121 + }, + { + "epoch": 76.45, + "learning_rate": 0.00017305263157894735, + "loss": 0.0011, + "step": 5122 + }, + { + "epoch": 76.46, + "learning_rate": 0.0001730175438596491, + "loss": 0.1175, + "step": 5123 + }, + { + "epoch": 76.48, + "learning_rate": 0.00017298245614035085, + "loss": 0.0439, + "step": 5124 + }, + { + "epoch": 76.49, + "learning_rate": 0.0001729473684210526, + "loss": 0.002, + "step": 5125 + }, + { + "epoch": 76.51, + "learning_rate": 0.00017291228070175438, + "loss": 0.2141, + "step": 5126 + }, + { + "epoch": 76.52, + "learning_rate": 0.00017287719298245612, + "loss": 0.0033, + "step": 5127 + }, + { + "epoch": 76.54, + "learning_rate": 0.00017284210526315787, + "loss": 0.0052, + "step": 5128 + }, + { + "epoch": 76.55, + "learning_rate": 0.00017280701754385965, + "loss": 0.001, + "step": 5129 + }, + { + "epoch": 76.57, + "learning_rate": 0.0001727719298245614, + "loss": 0.1329, + "step": 5130 + }, + { + "epoch": 76.58, + "learning_rate": 0.00017273684210526315, + "loss": 0.1306, + "step": 5131 + }, + { + "epoch": 76.59, + "learning_rate": 0.0001727017543859649, + "loss": 0.0259, + "step": 5132 + }, + { + "epoch": 76.61, + "learning_rate": 0.00017266666666666667, + "loss": 0.0014, + "step": 5133 + }, + { + "epoch": 76.62, + "learning_rate": 0.00017263157894736842, + "loss": 0.0735, + "step": 5134 + }, + { + "epoch": 76.64, + "learning_rate": 0.00017259649122807014, + "loss": 0.001, + "step": 5135 + }, + { + "epoch": 76.65, + "learning_rate": 0.0001725614035087719, + "loss": 0.0068, + "step": 5136 + }, + { + "epoch": 76.67, + "learning_rate": 0.00017252631578947367, + "loss": 0.113, + "step": 5137 + }, + { + "epoch": 76.68, + "learning_rate": 0.00017249122807017542, + "loss": 0.1599, + "step": 5138 + }, + { + "epoch": 76.7, + "learning_rate": 0.00017245614035087716, + "loss": 0.0022, + "step": 5139 + }, + { + "epoch": 76.71, + "learning_rate": 0.00017242105263157894, + "loss": 0.0012, + "step": 5140 + }, + { + "epoch": 76.73, + "learning_rate": 0.0001723859649122807, + "loss": 0.0692, + "step": 5141 + }, + { + "epoch": 76.74, + "learning_rate": 0.00017235087719298244, + "loss": 0.0012, + "step": 5142 + }, + { + "epoch": 76.76, + "learning_rate": 0.00017231578947368419, + "loss": 0.0655, + "step": 5143 + }, + { + "epoch": 76.77, + "learning_rate": 0.00017228070175438596, + "loss": 0.1018, + "step": 5144 + }, + { + "epoch": 76.79, + "learning_rate": 0.0001722456140350877, + "loss": 0.0011, + "step": 5145 + }, + { + "epoch": 76.8, + "learning_rate": 0.00017221052631578946, + "loss": 0.0603, + "step": 5146 + }, + { + "epoch": 76.82, + "learning_rate": 0.0001721754385964912, + "loss": 0.1936, + "step": 5147 + }, + { + "epoch": 76.83, + "learning_rate": 0.00017214035087719298, + "loss": 0.0032, + "step": 5148 + }, + { + "epoch": 76.85, + "learning_rate": 0.00017210526315789473, + "loss": 0.003, + "step": 5149 + }, + { + "epoch": 76.86, + "learning_rate": 0.00017207017543859648, + "loss": 0.0038, + "step": 5150 + }, + { + "epoch": 76.88, + "learning_rate": 0.0001720350877192982, + "loss": 0.1508, + "step": 5151 + }, + { + "epoch": 76.89, + "learning_rate": 0.000172, + "loss": 0.001, + "step": 5152 + }, + { + "epoch": 76.91, + "learning_rate": 0.00017196491228070173, + "loss": 0.0362, + "step": 5153 + }, + { + "epoch": 76.92, + "learning_rate": 0.00017192982456140348, + "loss": 0.0024, + "step": 5154 + }, + { + "epoch": 76.94, + "learning_rate": 0.00017189473684210525, + "loss": 0.0012, + "step": 5155 + }, + { + "epoch": 76.95, + "learning_rate": 0.000171859649122807, + "loss": 0.0012, + "step": 5156 + }, + { + "epoch": 76.97, + "learning_rate": 0.00017182456140350875, + "loss": 0.0092, + "step": 5157 + }, + { + "epoch": 76.98, + "learning_rate": 0.0001717894736842105, + "loss": 0.0012, + "step": 5158 + }, + { + "epoch": 77.0, + "learning_rate": 0.00017175438596491228, + "loss": 0.0331, + "step": 5159 + }, + { + "epoch": 77.01, + "learning_rate": 0.00017171929824561402, + "loss": 0.0255, + "step": 5160 + }, + { + "epoch": 77.03, + "learning_rate": 0.00017168421052631577, + "loss": 0.0015, + "step": 5161 + }, + { + "epoch": 77.04, + "learning_rate": 0.00017164912280701752, + "loss": 0.1901, + "step": 5162 + }, + { + "epoch": 77.06, + "learning_rate": 0.0001716140350877193, + "loss": 0.0017, + "step": 5163 + }, + { + "epoch": 77.07, + "learning_rate": 0.00017157894736842105, + "loss": 0.013, + "step": 5164 + }, + { + "epoch": 77.09, + "learning_rate": 0.0001715438596491228, + "loss": 0.0037, + "step": 5165 + }, + { + "epoch": 77.1, + "learning_rate": 0.00017150877192982457, + "loss": 0.0039, + "step": 5166 + }, + { + "epoch": 77.12, + "learning_rate": 0.00017147368421052632, + "loss": 0.0437, + "step": 5167 + }, + { + "epoch": 77.13, + "learning_rate": 0.00017143859649122804, + "loss": 0.1028, + "step": 5168 + }, + { + "epoch": 77.15, + "learning_rate": 0.0001714035087719298, + "loss": 0.0809, + "step": 5169 + }, + { + "epoch": 77.16, + "learning_rate": 0.00017136842105263157, + "loss": 0.0092, + "step": 5170 + }, + { + "epoch": 77.18, + "learning_rate": 0.00017133333333333331, + "loss": 0.0021, + "step": 5171 + }, + { + "epoch": 77.19, + "learning_rate": 0.00017129824561403506, + "loss": 0.0011, + "step": 5172 + }, + { + "epoch": 77.21, + "learning_rate": 0.0001712631578947368, + "loss": 0.1418, + "step": 5173 + }, + { + "epoch": 77.22, + "learning_rate": 0.0001712280701754386, + "loss": 0.0012, + "step": 5174 + }, + { + "epoch": 77.24, + "learning_rate": 0.00017119298245614034, + "loss": 0.0011, + "step": 5175 + }, + { + "epoch": 77.25, + "learning_rate": 0.00017115789473684209, + "loss": 0.1556, + "step": 5176 + }, + { + "epoch": 77.27, + "learning_rate": 0.00017112280701754383, + "loss": 0.02, + "step": 5177 + }, + { + "epoch": 77.28, + "learning_rate": 0.0001710877192982456, + "loss": 0.0541, + "step": 5178 + }, + { + "epoch": 77.3, + "learning_rate": 0.00017105263157894736, + "loss": 0.0024, + "step": 5179 + }, + { + "epoch": 77.31, + "learning_rate": 0.0001710175438596491, + "loss": 0.0144, + "step": 5180 + }, + { + "epoch": 77.33, + "learning_rate": 0.00017098245614035088, + "loss": 0.0672, + "step": 5181 + }, + { + "epoch": 77.34, + "learning_rate": 0.00017094736842105263, + "loss": 0.0022, + "step": 5182 + }, + { + "epoch": 77.36, + "learning_rate": 0.00017091228070175435, + "loss": 0.1882, + "step": 5183 + }, + { + "epoch": 77.37, + "learning_rate": 0.0001708771929824561, + "loss": 0.0506, + "step": 5184 + }, + { + "epoch": 77.39, + "learning_rate": 0.00017084210526315788, + "loss": 0.0947, + "step": 5185 + }, + { + "epoch": 77.4, + "learning_rate": 0.00017080701754385963, + "loss": 0.0038, + "step": 5186 + }, + { + "epoch": 77.42, + "learning_rate": 0.00017077192982456138, + "loss": 0.1022, + "step": 5187 + }, + { + "epoch": 77.43, + "learning_rate": 0.00017073684210526313, + "loss": 0.0033, + "step": 5188 + }, + { + "epoch": 77.45, + "learning_rate": 0.0001707017543859649, + "loss": 0.0016, + "step": 5189 + }, + { + "epoch": 77.46, + "learning_rate": 0.00017066666666666665, + "loss": 0.0544, + "step": 5190 + }, + { + "epoch": 77.48, + "learning_rate": 0.0001706315789473684, + "loss": 0.0466, + "step": 5191 + }, + { + "epoch": 77.49, + "learning_rate": 0.00017059649122807017, + "loss": 0.0066, + "step": 5192 + }, + { + "epoch": 77.51, + "learning_rate": 0.00017056140350877192, + "loss": 0.1766, + "step": 5193 + }, + { + "epoch": 77.52, + "learning_rate": 0.00017052631578947367, + "loss": 0.0044, + "step": 5194 + }, + { + "epoch": 77.54, + "learning_rate": 0.00017049122807017542, + "loss": 0.1594, + "step": 5195 + }, + { + "epoch": 77.55, + "learning_rate": 0.0001704561403508772, + "loss": 0.002, + "step": 5196 + }, + { + "epoch": 77.57, + "learning_rate": 0.00017042105263157895, + "loss": 0.5177, + "step": 5197 + }, + { + "epoch": 77.58, + "learning_rate": 0.0001703859649122807, + "loss": 0.007, + "step": 5198 + }, + { + "epoch": 77.59, + "learning_rate": 0.00017035087719298242, + "loss": 0.0725, + "step": 5199 + }, + { + "epoch": 77.61, + "learning_rate": 0.00017031578947368422, + "loss": 0.1165, + "step": 5200 + }, + { + "epoch": 77.61, + "eval_accuracy": 0.8595203132648067, + "eval_f1": 0.860270309660957, + "eval_loss": 0.6248674988746643, + "eval_runtime": 345.784, + "eval_samples_per_second": 11.817, + "eval_steps_per_second": 0.74, + "step": 5200 + }, + { + "epoch": 77.62, + "learning_rate": 0.00017028070175438594, + "loss": 0.011, + "step": 5201 + }, + { + "epoch": 77.64, + "learning_rate": 0.0001702456140350877, + "loss": 0.0015, + "step": 5202 + }, + { + "epoch": 77.65, + "learning_rate": 0.00017021052631578947, + "loss": 0.0022, + "step": 5203 + }, + { + "epoch": 77.67, + "learning_rate": 0.00017017543859649121, + "loss": 0.0012, + "step": 5204 + }, + { + "epoch": 77.68, + "learning_rate": 0.00017014035087719296, + "loss": 0.0163, + "step": 5205 + }, + { + "epoch": 77.7, + "learning_rate": 0.0001701052631578947, + "loss": 0.0082, + "step": 5206 + }, + { + "epoch": 77.71, + "learning_rate": 0.0001700701754385965, + "loss": 0.0025, + "step": 5207 + }, + { + "epoch": 77.73, + "learning_rate": 0.00017003508771929824, + "loss": 0.0022, + "step": 5208 + }, + { + "epoch": 77.74, + "learning_rate": 0.00016999999999999999, + "loss": 0.0288, + "step": 5209 + }, + { + "epoch": 77.76, + "learning_rate": 0.00016996491228070173, + "loss": 0.1087, + "step": 5210 + }, + { + "epoch": 77.77, + "learning_rate": 0.0001699298245614035, + "loss": 0.0049, + "step": 5211 + }, + { + "epoch": 77.79, + "learning_rate": 0.00016989473684210526, + "loss": 0.0052, + "step": 5212 + }, + { + "epoch": 77.8, + "learning_rate": 0.000169859649122807, + "loss": 0.0126, + "step": 5213 + }, + { + "epoch": 77.82, + "learning_rate": 0.00016982456140350873, + "loss": 0.0048, + "step": 5214 + }, + { + "epoch": 77.83, + "learning_rate": 0.00016978947368421053, + "loss": 0.0034, + "step": 5215 + }, + { + "epoch": 77.85, + "learning_rate": 0.00016975438596491225, + "loss": 0.1069, + "step": 5216 + }, + { + "epoch": 77.86, + "learning_rate": 0.000169719298245614, + "loss": 0.0046, + "step": 5217 + }, + { + "epoch": 77.88, + "learning_rate": 0.00016968421052631578, + "loss": 0.0169, + "step": 5218 + }, + { + "epoch": 77.89, + "learning_rate": 0.00016964912280701753, + "loss": 0.2265, + "step": 5219 + }, + { + "epoch": 77.91, + "learning_rate": 0.00016961403508771928, + "loss": 0.002, + "step": 5220 + }, + { + "epoch": 77.92, + "learning_rate": 0.00016957894736842102, + "loss": 0.0123, + "step": 5221 + }, + { + "epoch": 77.94, + "learning_rate": 0.0001695438596491228, + "loss": 0.0356, + "step": 5222 + }, + { + "epoch": 77.95, + "learning_rate": 0.00016950877192982455, + "loss": 0.0099, + "step": 5223 + }, + { + "epoch": 77.97, + "learning_rate": 0.0001694736842105263, + "loss": 0.003, + "step": 5224 + }, + { + "epoch": 77.98, + "learning_rate": 0.00016943859649122805, + "loss": 0.0033, + "step": 5225 + }, + { + "epoch": 78.0, + "learning_rate": 0.00016940350877192982, + "loss": 0.1931, + "step": 5226 + }, + { + "epoch": 78.01, + "learning_rate": 0.00016936842105263157, + "loss": 0.2121, + "step": 5227 + }, + { + "epoch": 78.03, + "learning_rate": 0.00016933333333333332, + "loss": 0.0295, + "step": 5228 + }, + { + "epoch": 78.04, + "learning_rate": 0.0001692982456140351, + "loss": 0.0095, + "step": 5229 + }, + { + "epoch": 78.06, + "learning_rate": 0.00016926315789473684, + "loss": 0.1848, + "step": 5230 + }, + { + "epoch": 78.07, + "learning_rate": 0.00016922807017543857, + "loss": 0.179, + "step": 5231 + }, + { + "epoch": 78.09, + "learning_rate": 0.00016919298245614032, + "loss": 0.002, + "step": 5232 + }, + { + "epoch": 78.1, + "learning_rate": 0.0001691578947368421, + "loss": 0.0021, + "step": 5233 + }, + { + "epoch": 78.12, + "learning_rate": 0.00016912280701754384, + "loss": 0.001, + "step": 5234 + }, + { + "epoch": 78.13, + "learning_rate": 0.0001690877192982456, + "loss": 0.0018, + "step": 5235 + }, + { + "epoch": 78.15, + "learning_rate": 0.00016905263157894734, + "loss": 0.0209, + "step": 5236 + }, + { + "epoch": 78.16, + "learning_rate": 0.0001690175438596491, + "loss": 0.003, + "step": 5237 + }, + { + "epoch": 78.18, + "learning_rate": 0.00016898245614035086, + "loss": 0.0016, + "step": 5238 + }, + { + "epoch": 78.19, + "learning_rate": 0.0001689473684210526, + "loss": 0.0861, + "step": 5239 + }, + { + "epoch": 78.21, + "learning_rate": 0.0001689122807017544, + "loss": 0.3714, + "step": 5240 + }, + { + "epoch": 78.22, + "learning_rate": 0.00016887719298245614, + "loss": 0.0318, + "step": 5241 + }, + { + "epoch": 78.24, + "learning_rate": 0.00016884210526315788, + "loss": 0.0144, + "step": 5242 + }, + { + "epoch": 78.25, + "learning_rate": 0.00016880701754385963, + "loss": 0.0178, + "step": 5243 + }, + { + "epoch": 78.27, + "learning_rate": 0.0001687719298245614, + "loss": 0.0019, + "step": 5244 + }, + { + "epoch": 78.28, + "learning_rate": 0.00016873684210526316, + "loss": 0.0023, + "step": 5245 + }, + { + "epoch": 78.3, + "learning_rate": 0.0001687017543859649, + "loss": 0.1122, + "step": 5246 + }, + { + "epoch": 78.31, + "learning_rate": 0.00016866666666666663, + "loss": 0.0778, + "step": 5247 + }, + { + "epoch": 78.33, + "learning_rate": 0.0001686315789473684, + "loss": 0.0029, + "step": 5248 + }, + { + "epoch": 78.34, + "learning_rate": 0.00016859649122807015, + "loss": 0.0016, + "step": 5249 + }, + { + "epoch": 78.36, + "learning_rate": 0.0001685614035087719, + "loss": 0.1838, + "step": 5250 + }, + { + "epoch": 78.37, + "learning_rate": 0.00016852631578947365, + "loss": 0.0189, + "step": 5251 + }, + { + "epoch": 78.39, + "learning_rate": 0.00016849122807017543, + "loss": 0.0798, + "step": 5252 + }, + { + "epoch": 78.4, + "learning_rate": 0.00016845614035087718, + "loss": 0.0018, + "step": 5253 + }, + { + "epoch": 78.42, + "learning_rate": 0.00016842105263157892, + "loss": 0.0105, + "step": 5254 + }, + { + "epoch": 78.43, + "learning_rate": 0.0001683859649122807, + "loss": 0.0016, + "step": 5255 + }, + { + "epoch": 78.45, + "learning_rate": 0.00016835087719298245, + "loss": 0.0053, + "step": 5256 + }, + { + "epoch": 78.46, + "learning_rate": 0.0001683157894736842, + "loss": 0.0009, + "step": 5257 + }, + { + "epoch": 78.48, + "learning_rate": 0.00016828070175438595, + "loss": 0.0009, + "step": 5258 + }, + { + "epoch": 78.49, + "learning_rate": 0.00016824561403508772, + "loss": 0.0011, + "step": 5259 + }, + { + "epoch": 78.51, + "learning_rate": 0.00016821052631578947, + "loss": 0.1651, + "step": 5260 + }, + { + "epoch": 78.52, + "learning_rate": 0.00016817543859649122, + "loss": 0.0027, + "step": 5261 + }, + { + "epoch": 78.54, + "learning_rate": 0.00016814035087719294, + "loss": 0.0896, + "step": 5262 + }, + { + "epoch": 78.55, + "learning_rate": 0.00016810526315789474, + "loss": 0.1429, + "step": 5263 + }, + { + "epoch": 78.57, + "learning_rate": 0.00016807017543859647, + "loss": 0.0048, + "step": 5264 + }, + { + "epoch": 78.58, + "learning_rate": 0.00016803508771929821, + "loss": 0.0124, + "step": 5265 + }, + { + "epoch": 78.59, + "learning_rate": 0.000168, + "loss": 0.0183, + "step": 5266 + }, + { + "epoch": 78.61, + "learning_rate": 0.00016796491228070174, + "loss": 0.0027, + "step": 5267 + }, + { + "epoch": 78.62, + "learning_rate": 0.0001679298245614035, + "loss": 0.0021, + "step": 5268 + }, + { + "epoch": 78.64, + "learning_rate": 0.00016789473684210524, + "loss": 0.0305, + "step": 5269 + }, + { + "epoch": 78.65, + "learning_rate": 0.000167859649122807, + "loss": 0.0382, + "step": 5270 + }, + { + "epoch": 78.67, + "learning_rate": 0.00016782456140350876, + "loss": 0.0012, + "step": 5271 + }, + { + "epoch": 78.68, + "learning_rate": 0.0001677894736842105, + "loss": 0.0018, + "step": 5272 + }, + { + "epoch": 78.7, + "learning_rate": 0.00016775438596491226, + "loss": 0.0016, + "step": 5273 + }, + { + "epoch": 78.71, + "learning_rate": 0.00016771929824561404, + "loss": 0.0149, + "step": 5274 + }, + { + "epoch": 78.73, + "learning_rate": 0.00016768421052631578, + "loss": 0.0011, + "step": 5275 + }, + { + "epoch": 78.74, + "learning_rate": 0.00016764912280701753, + "loss": 0.2443, + "step": 5276 + }, + { + "epoch": 78.76, + "learning_rate": 0.00016761403508771925, + "loss": 0.0069, + "step": 5277 + }, + { + "epoch": 78.77, + "learning_rate": 0.00016757894736842106, + "loss": 0.0438, + "step": 5278 + }, + { + "epoch": 78.79, + "learning_rate": 0.00016754385964912278, + "loss": 0.002, + "step": 5279 + }, + { + "epoch": 78.8, + "learning_rate": 0.00016750877192982453, + "loss": 0.0701, + "step": 5280 + }, + { + "epoch": 78.82, + "learning_rate": 0.0001674736842105263, + "loss": 0.0552, + "step": 5281 + }, + { + "epoch": 78.83, + "learning_rate": 0.00016743859649122805, + "loss": 0.1124, + "step": 5282 + }, + { + "epoch": 78.85, + "learning_rate": 0.0001674035087719298, + "loss": 0.0138, + "step": 5283 + }, + { + "epoch": 78.86, + "learning_rate": 0.00016736842105263155, + "loss": 0.0011, + "step": 5284 + }, + { + "epoch": 78.88, + "learning_rate": 0.00016733333333333333, + "loss": 0.0077, + "step": 5285 + }, + { + "epoch": 78.89, + "learning_rate": 0.00016729824561403507, + "loss": 0.0048, + "step": 5286 + }, + { + "epoch": 78.91, + "learning_rate": 0.00016726315789473682, + "loss": 0.0017, + "step": 5287 + }, + { + "epoch": 78.92, + "learning_rate": 0.00016722807017543857, + "loss": 0.1648, + "step": 5288 + }, + { + "epoch": 78.94, + "learning_rate": 0.00016719298245614035, + "loss": 0.1637, + "step": 5289 + }, + { + "epoch": 78.95, + "learning_rate": 0.0001671578947368421, + "loss": 0.0021, + "step": 5290 + }, + { + "epoch": 78.97, + "learning_rate": 0.00016712280701754385, + "loss": 0.0633, + "step": 5291 + }, + { + "epoch": 78.98, + "learning_rate": 0.00016708771929824562, + "loss": 0.0014, + "step": 5292 + }, + { + "epoch": 79.0, + "learning_rate": 0.00016705263157894737, + "loss": 0.0012, + "step": 5293 + }, + { + "epoch": 79.01, + "learning_rate": 0.00016701754385964912, + "loss": 0.0053, + "step": 5294 + }, + { + "epoch": 79.03, + "learning_rate": 0.00016698245614035084, + "loss": 0.0032, + "step": 5295 + }, + { + "epoch": 79.04, + "learning_rate": 0.00016694736842105262, + "loss": 0.0259, + "step": 5296 + }, + { + "epoch": 79.06, + "learning_rate": 0.00016691228070175437, + "loss": 0.0016, + "step": 5297 + }, + { + "epoch": 79.07, + "learning_rate": 0.00016687719298245611, + "loss": 0.0043, + "step": 5298 + }, + { + "epoch": 79.09, + "learning_rate": 0.00016684210526315786, + "loss": 0.099, + "step": 5299 + }, + { + "epoch": 79.1, + "learning_rate": 0.00016680701754385964, + "loss": 0.0156, + "step": 5300 + }, + { + "epoch": 79.12, + "learning_rate": 0.0001667719298245614, + "loss": 0.0023, + "step": 5301 + }, + { + "epoch": 79.13, + "learning_rate": 0.00016673684210526314, + "loss": 0.0013, + "step": 5302 + }, + { + "epoch": 79.15, + "learning_rate": 0.0001667017543859649, + "loss": 0.147, + "step": 5303 + }, + { + "epoch": 79.16, + "learning_rate": 0.00016666666666666666, + "loss": 0.0026, + "step": 5304 + }, + { + "epoch": 79.18, + "learning_rate": 0.0001666315789473684, + "loss": 0.1411, + "step": 5305 + }, + { + "epoch": 79.19, + "learning_rate": 0.00016659649122807016, + "loss": 0.0008, + "step": 5306 + }, + { + "epoch": 79.21, + "learning_rate": 0.00016656140350877193, + "loss": 0.002, + "step": 5307 + }, + { + "epoch": 79.22, + "learning_rate": 0.00016652631578947368, + "loss": 0.0238, + "step": 5308 + }, + { + "epoch": 79.24, + "learning_rate": 0.00016649122807017543, + "loss": 0.0234, + "step": 5309 + }, + { + "epoch": 79.25, + "learning_rate": 0.00016645614035087715, + "loss": 0.0023, + "step": 5310 + }, + { + "epoch": 79.27, + "learning_rate": 0.00016642105263157896, + "loss": 0.0031, + "step": 5311 + }, + { + "epoch": 79.28, + "learning_rate": 0.00016638596491228068, + "loss": 0.0033, + "step": 5312 + }, + { + "epoch": 79.3, + "learning_rate": 0.00016635087719298243, + "loss": 0.0009, + "step": 5313 + }, + { + "epoch": 79.31, + "learning_rate": 0.00016631578947368418, + "loss": 0.0379, + "step": 5314 + }, + { + "epoch": 79.33, + "learning_rate": 0.00016628070175438595, + "loss": 0.001, + "step": 5315 + }, + { + "epoch": 79.34, + "learning_rate": 0.0001662456140350877, + "loss": 0.0029, + "step": 5316 + }, + { + "epoch": 79.36, + "learning_rate": 0.00016621052631578945, + "loss": 0.0009, + "step": 5317 + }, + { + "epoch": 79.37, + "learning_rate": 0.00016617543859649123, + "loss": 0.0497, + "step": 5318 + }, + { + "epoch": 79.39, + "learning_rate": 0.00016614035087719297, + "loss": 0.0067, + "step": 5319 + }, + { + "epoch": 79.4, + "learning_rate": 0.00016610526315789472, + "loss": 0.001, + "step": 5320 + }, + { + "epoch": 79.42, + "learning_rate": 0.00016607017543859647, + "loss": 0.0771, + "step": 5321 + }, + { + "epoch": 79.43, + "learning_rate": 0.00016603508771929825, + "loss": 0.0084, + "step": 5322 + }, + { + "epoch": 79.45, + "learning_rate": 0.000166, + "loss": 0.0012, + "step": 5323 + }, + { + "epoch": 79.46, + "learning_rate": 0.00016596491228070175, + "loss": 0.0063, + "step": 5324 + }, + { + "epoch": 79.48, + "learning_rate": 0.00016592982456140347, + "loss": 0.0082, + "step": 5325 + }, + { + "epoch": 79.49, + "learning_rate": 0.00016589473684210527, + "loss": 0.0008, + "step": 5326 + }, + { + "epoch": 79.51, + "learning_rate": 0.000165859649122807, + "loss": 0.0708, + "step": 5327 + }, + { + "epoch": 79.52, + "learning_rate": 0.00016582456140350874, + "loss": 0.2766, + "step": 5328 + }, + { + "epoch": 79.54, + "learning_rate": 0.00016578947368421052, + "loss": 0.1932, + "step": 5329 + }, + { + "epoch": 79.55, + "learning_rate": 0.00016575438596491226, + "loss": 0.0024, + "step": 5330 + }, + { + "epoch": 79.57, + "learning_rate": 0.00016571929824561401, + "loss": 0.001, + "step": 5331 + }, + { + "epoch": 79.58, + "learning_rate": 0.00016568421052631576, + "loss": 0.026, + "step": 5332 + }, + { + "epoch": 79.59, + "learning_rate": 0.00016564912280701754, + "loss": 0.0014, + "step": 5333 + }, + { + "epoch": 79.61, + "learning_rate": 0.0001656140350877193, + "loss": 0.0027, + "step": 5334 + }, + { + "epoch": 79.62, + "learning_rate": 0.00016557894736842104, + "loss": 0.0038, + "step": 5335 + }, + { + "epoch": 79.64, + "learning_rate": 0.00016554385964912278, + "loss": 0.0042, + "step": 5336 + }, + { + "epoch": 79.65, + "learning_rate": 0.00016550877192982456, + "loss": 0.0009, + "step": 5337 + }, + { + "epoch": 79.67, + "learning_rate": 0.0001654736842105263, + "loss": 0.0024, + "step": 5338 + }, + { + "epoch": 79.68, + "learning_rate": 0.00016543859649122806, + "loss": 0.0009, + "step": 5339 + }, + { + "epoch": 79.7, + "learning_rate": 0.0001654035087719298, + "loss": 0.2441, + "step": 5340 + }, + { + "epoch": 79.71, + "learning_rate": 0.00016536842105263158, + "loss": 0.0387, + "step": 5341 + }, + { + "epoch": 79.73, + "learning_rate": 0.0001653333333333333, + "loss": 0.001, + "step": 5342 + }, + { + "epoch": 79.74, + "learning_rate": 0.00016529824561403505, + "loss": 0.0024, + "step": 5343 + }, + { + "epoch": 79.76, + "learning_rate": 0.00016526315789473683, + "loss": 0.0043, + "step": 5344 + }, + { + "epoch": 79.77, + "learning_rate": 0.00016522807017543858, + "loss": 0.0826, + "step": 5345 + }, + { + "epoch": 79.79, + "learning_rate": 0.00016519298245614033, + "loss": 0.0019, + "step": 5346 + }, + { + "epoch": 79.8, + "learning_rate": 0.00016515789473684208, + "loss": 0.0067, + "step": 5347 + }, + { + "epoch": 79.82, + "learning_rate": 0.00016512280701754385, + "loss": 0.1593, + "step": 5348 + }, + { + "epoch": 79.83, + "learning_rate": 0.0001650877192982456, + "loss": 0.0102, + "step": 5349 + }, + { + "epoch": 79.85, + "learning_rate": 0.00016505263157894735, + "loss": 0.0048, + "step": 5350 + }, + { + "epoch": 79.86, + "learning_rate": 0.0001650175438596491, + "loss": 0.0009, + "step": 5351 + }, + { + "epoch": 79.88, + "learning_rate": 0.00016498245614035087, + "loss": 0.1238, + "step": 5352 + }, + { + "epoch": 79.89, + "learning_rate": 0.00016494736842105262, + "loss": 0.0311, + "step": 5353 + }, + { + "epoch": 79.91, + "learning_rate": 0.00016491228070175437, + "loss": 0.0013, + "step": 5354 + }, + { + "epoch": 79.92, + "learning_rate": 0.00016487719298245615, + "loss": 0.0785, + "step": 5355 + }, + { + "epoch": 79.94, + "learning_rate": 0.0001648421052631579, + "loss": 0.2155, + "step": 5356 + }, + { + "epoch": 79.95, + "learning_rate": 0.00016480701754385964, + "loss": 0.2146, + "step": 5357 + }, + { + "epoch": 79.97, + "learning_rate": 0.00016477192982456137, + "loss": 0.1953, + "step": 5358 + }, + { + "epoch": 79.98, + "learning_rate": 0.00016473684210526317, + "loss": 0.0021, + "step": 5359 + }, + { + "epoch": 80.0, + "learning_rate": 0.0001647017543859649, + "loss": 0.0479, + "step": 5360 + }, + { + "epoch": 80.01, + "learning_rate": 0.00016466666666666664, + "loss": 0.0028, + "step": 5361 + }, + { + "epoch": 80.03, + "learning_rate": 0.0001646315789473684, + "loss": 0.001, + "step": 5362 + }, + { + "epoch": 80.04, + "learning_rate": 0.00016459649122807016, + "loss": 0.1739, + "step": 5363 + }, + { + "epoch": 80.06, + "learning_rate": 0.0001645614035087719, + "loss": 0.1624, + "step": 5364 + }, + { + "epoch": 80.07, + "learning_rate": 0.00016452631578947366, + "loss": 0.1035, + "step": 5365 + }, + { + "epoch": 80.09, + "learning_rate": 0.00016449122807017544, + "loss": 0.0018, + "step": 5366 + }, + { + "epoch": 80.1, + "learning_rate": 0.0001644561403508772, + "loss": 0.1251, + "step": 5367 + }, + { + "epoch": 80.12, + "learning_rate": 0.00016442105263157894, + "loss": 0.0059, + "step": 5368 + }, + { + "epoch": 80.13, + "learning_rate": 0.00016438596491228068, + "loss": 0.3782, + "step": 5369 + }, + { + "epoch": 80.15, + "learning_rate": 0.00016435087719298246, + "loss": 0.0023, + "step": 5370 + }, + { + "epoch": 80.16, + "learning_rate": 0.0001643157894736842, + "loss": 0.0096, + "step": 5371 + }, + { + "epoch": 80.18, + "learning_rate": 0.00016428070175438596, + "loss": 0.3213, + "step": 5372 + }, + { + "epoch": 80.19, + "learning_rate": 0.00016424561403508768, + "loss": 0.0009, + "step": 5373 + }, + { + "epoch": 80.21, + "learning_rate": 0.00016421052631578948, + "loss": 0.0033, + "step": 5374 + }, + { + "epoch": 80.22, + "learning_rate": 0.0001641754385964912, + "loss": 0.001, + "step": 5375 + }, + { + "epoch": 80.24, + "learning_rate": 0.00016414035087719295, + "loss": 0.0185, + "step": 5376 + }, + { + "epoch": 80.25, + "learning_rate": 0.0001641052631578947, + "loss": 0.0059, + "step": 5377 + }, + { + "epoch": 80.27, + "learning_rate": 0.00016407017543859648, + "loss": 0.0022, + "step": 5378 + }, + { + "epoch": 80.28, + "learning_rate": 0.00016403508771929823, + "loss": 0.0538, + "step": 5379 + }, + { + "epoch": 80.3, + "learning_rate": 0.00016399999999999997, + "loss": 0.0096, + "step": 5380 + }, + { + "epoch": 80.31, + "learning_rate": 0.00016396491228070175, + "loss": 0.0257, + "step": 5381 + }, + { + "epoch": 80.33, + "learning_rate": 0.0001639298245614035, + "loss": 0.0072, + "step": 5382 + }, + { + "epoch": 80.34, + "learning_rate": 0.00016389473684210525, + "loss": 0.1744, + "step": 5383 + }, + { + "epoch": 80.36, + "learning_rate": 0.000163859649122807, + "loss": 0.0038, + "step": 5384 + }, + { + "epoch": 80.37, + "learning_rate": 0.00016382456140350877, + "loss": 0.0013, + "step": 5385 + }, + { + "epoch": 80.39, + "learning_rate": 0.00016378947368421052, + "loss": 0.032, + "step": 5386 + }, + { + "epoch": 80.4, + "learning_rate": 0.00016375438596491227, + "loss": 0.001, + "step": 5387 + }, + { + "epoch": 80.42, + "learning_rate": 0.000163719298245614, + "loss": 0.0909, + "step": 5388 + }, + { + "epoch": 80.43, + "learning_rate": 0.0001636842105263158, + "loss": 0.0019, + "step": 5389 + }, + { + "epoch": 80.45, + "learning_rate": 0.00016364912280701752, + "loss": 0.0483, + "step": 5390 + }, + { + "epoch": 80.46, + "learning_rate": 0.00016361403508771927, + "loss": 0.2005, + "step": 5391 + }, + { + "epoch": 80.48, + "learning_rate": 0.00016357894736842104, + "loss": 0.0697, + "step": 5392 + }, + { + "epoch": 80.49, + "learning_rate": 0.0001635438596491228, + "loss": 0.001, + "step": 5393 + }, + { + "epoch": 80.51, + "learning_rate": 0.00016350877192982454, + "loss": 0.0186, + "step": 5394 + }, + { + "epoch": 80.52, + "learning_rate": 0.0001634736842105263, + "loss": 0.1185, + "step": 5395 + }, + { + "epoch": 80.54, + "learning_rate": 0.00016343859649122806, + "loss": 0.243, + "step": 5396 + }, + { + "epoch": 80.55, + "learning_rate": 0.0001634035087719298, + "loss": 0.0291, + "step": 5397 + }, + { + "epoch": 80.57, + "learning_rate": 0.00016336842105263156, + "loss": 0.0045, + "step": 5398 + }, + { + "epoch": 80.58, + "learning_rate": 0.0001633333333333333, + "loss": 0.0055, + "step": 5399 + }, + { + "epoch": 80.59, + "learning_rate": 0.00016329824561403509, + "loss": 0.0021, + "step": 5400 + }, + { + "epoch": 80.59, + "eval_accuracy": 0.8578071463534018, + "eval_f1": 0.8587756014640304, + "eval_loss": 0.6747012138366699, + "eval_runtime": 344.6737, + "eval_samples_per_second": 11.855, + "eval_steps_per_second": 0.743, + "step": 5400 + }, + { + "epoch": 80.61, + "learning_rate": 0.00016326315789473683, + "loss": 0.0016, + "step": 5401 + }, + { + "epoch": 80.62, + "learning_rate": 0.00016322807017543858, + "loss": 0.0357, + "step": 5402 + }, + { + "epoch": 80.64, + "learning_rate": 0.00016319298245614036, + "loss": 0.0355, + "step": 5403 + }, + { + "epoch": 80.65, + "learning_rate": 0.0001631578947368421, + "loss": 0.0025, + "step": 5404 + }, + { + "epoch": 80.67, + "learning_rate": 0.00016312280701754386, + "loss": 0.005, + "step": 5405 + }, + { + "epoch": 80.68, + "learning_rate": 0.00016308771929824558, + "loss": 0.002, + "step": 5406 + }, + { + "epoch": 80.7, + "learning_rate": 0.00016305263157894738, + "loss": 0.0022, + "step": 5407 + }, + { + "epoch": 80.71, + "learning_rate": 0.0001630175438596491, + "loss": 0.2577, + "step": 5408 + }, + { + "epoch": 80.73, + "learning_rate": 0.00016298245614035085, + "loss": 0.0013, + "step": 5409 + }, + { + "epoch": 80.74, + "learning_rate": 0.0001629473684210526, + "loss": 0.007, + "step": 5410 + }, + { + "epoch": 80.76, + "learning_rate": 0.00016291228070175438, + "loss": 0.0017, + "step": 5411 + }, + { + "epoch": 80.77, + "learning_rate": 0.00016287719298245613, + "loss": 0.0038, + "step": 5412 + }, + { + "epoch": 80.79, + "learning_rate": 0.00016284210526315787, + "loss": 0.2622, + "step": 5413 + }, + { + "epoch": 80.8, + "learning_rate": 0.00016280701754385962, + "loss": 0.0013, + "step": 5414 + }, + { + "epoch": 80.82, + "learning_rate": 0.0001627719298245614, + "loss": 0.1833, + "step": 5415 + }, + { + "epoch": 80.83, + "learning_rate": 0.00016273684210526315, + "loss": 0.066, + "step": 5416 + }, + { + "epoch": 80.85, + "learning_rate": 0.0001627017543859649, + "loss": 0.0032, + "step": 5417 + }, + { + "epoch": 80.86, + "learning_rate": 0.00016266666666666667, + "loss": 0.0973, + "step": 5418 + }, + { + "epoch": 80.88, + "learning_rate": 0.00016263157894736842, + "loss": 0.0088, + "step": 5419 + }, + { + "epoch": 80.89, + "learning_rate": 0.00016259649122807017, + "loss": 0.001, + "step": 5420 + }, + { + "epoch": 80.91, + "learning_rate": 0.0001625614035087719, + "loss": 0.1095, + "step": 5421 + }, + { + "epoch": 80.92, + "learning_rate": 0.0001625263157894737, + "loss": 0.0488, + "step": 5422 + }, + { + "epoch": 80.94, + "learning_rate": 0.00016249122807017542, + "loss": 0.0919, + "step": 5423 + }, + { + "epoch": 80.95, + "learning_rate": 0.00016245614035087716, + "loss": 0.1362, + "step": 5424 + }, + { + "epoch": 80.97, + "learning_rate": 0.00016242105263157891, + "loss": 0.0035, + "step": 5425 + }, + { + "epoch": 80.98, + "learning_rate": 0.0001623859649122807, + "loss": 0.0581, + "step": 5426 + }, + { + "epoch": 81.0, + "learning_rate": 0.00016235087719298244, + "loss": 0.2841, + "step": 5427 + }, + { + "epoch": 81.01, + "learning_rate": 0.0001623157894736842, + "loss": 0.3199, + "step": 5428 + }, + { + "epoch": 81.03, + "learning_rate": 0.00016228070175438596, + "loss": 0.0018, + "step": 5429 + }, + { + "epoch": 81.04, + "learning_rate": 0.0001622456140350877, + "loss": 0.1194, + "step": 5430 + }, + { + "epoch": 81.06, + "learning_rate": 0.00016221052631578946, + "loss": 0.2508, + "step": 5431 + }, + { + "epoch": 81.07, + "learning_rate": 0.0001621754385964912, + "loss": 0.0017, + "step": 5432 + }, + { + "epoch": 81.09, + "learning_rate": 0.00016214035087719299, + "loss": 0.0014, + "step": 5433 + }, + { + "epoch": 81.1, + "learning_rate": 0.00016210526315789473, + "loss": 0.0689, + "step": 5434 + }, + { + "epoch": 81.12, + "learning_rate": 0.00016207017543859648, + "loss": 0.0054, + "step": 5435 + }, + { + "epoch": 81.13, + "learning_rate": 0.0001620350877192982, + "loss": 0.0099, + "step": 5436 + }, + { + "epoch": 81.15, + "learning_rate": 0.000162, + "loss": 0.122, + "step": 5437 + }, + { + "epoch": 81.16, + "learning_rate": 0.00016196491228070173, + "loss": 0.0013, + "step": 5438 + }, + { + "epoch": 81.18, + "learning_rate": 0.00016192982456140348, + "loss": 0.0066, + "step": 5439 + }, + { + "epoch": 81.19, + "learning_rate": 0.00016189473684210523, + "loss": 0.013, + "step": 5440 + }, + { + "epoch": 81.21, + "learning_rate": 0.000161859649122807, + "loss": 0.1842, + "step": 5441 + }, + { + "epoch": 81.22, + "learning_rate": 0.00016182456140350875, + "loss": 0.0014, + "step": 5442 + }, + { + "epoch": 81.24, + "learning_rate": 0.0001617894736842105, + "loss": 0.0029, + "step": 5443 + }, + { + "epoch": 81.25, + "learning_rate": 0.00016175438596491228, + "loss": 0.001, + "step": 5444 + }, + { + "epoch": 81.27, + "learning_rate": 0.00016171929824561402, + "loss": 0.0012, + "step": 5445 + }, + { + "epoch": 81.28, + "learning_rate": 0.00016168421052631577, + "loss": 0.002, + "step": 5446 + }, + { + "epoch": 81.3, + "learning_rate": 0.00016164912280701752, + "loss": 0.004, + "step": 5447 + }, + { + "epoch": 81.31, + "learning_rate": 0.0001616140350877193, + "loss": 0.0059, + "step": 5448 + }, + { + "epoch": 81.33, + "learning_rate": 0.00016157894736842105, + "loss": 0.1543, + "step": 5449 + }, + { + "epoch": 81.34, + "learning_rate": 0.0001615438596491228, + "loss": 0.0173, + "step": 5450 + }, + { + "epoch": 81.36, + "learning_rate": 0.00016150877192982454, + "loss": 0.0082, + "step": 5451 + }, + { + "epoch": 81.37, + "learning_rate": 0.00016147368421052632, + "loss": 0.107, + "step": 5452 + }, + { + "epoch": 81.39, + "learning_rate": 0.00016143859649122807, + "loss": 0.001, + "step": 5453 + }, + { + "epoch": 81.4, + "learning_rate": 0.0001614035087719298, + "loss": 0.0013, + "step": 5454 + }, + { + "epoch": 81.42, + "learning_rate": 0.00016136842105263157, + "loss": 0.0149, + "step": 5455 + }, + { + "epoch": 81.43, + "learning_rate": 0.00016133333333333332, + "loss": 0.0018, + "step": 5456 + }, + { + "epoch": 81.45, + "learning_rate": 0.00016129824561403506, + "loss": 0.0016, + "step": 5457 + }, + { + "epoch": 81.46, + "learning_rate": 0.0001612631578947368, + "loss": 0.0064, + "step": 5458 + }, + { + "epoch": 81.48, + "learning_rate": 0.0001612280701754386, + "loss": 0.0017, + "step": 5459 + }, + { + "epoch": 81.49, + "learning_rate": 0.00016119298245614034, + "loss": 0.0033, + "step": 5460 + }, + { + "epoch": 81.51, + "learning_rate": 0.0001611578947368421, + "loss": 0.0052, + "step": 5461 + }, + { + "epoch": 81.52, + "learning_rate": 0.00016112280701754384, + "loss": 0.2818, + "step": 5462 + }, + { + "epoch": 81.54, + "learning_rate": 0.0001610877192982456, + "loss": 0.0061, + "step": 5463 + }, + { + "epoch": 81.55, + "learning_rate": 0.00016105263157894736, + "loss": 0.0035, + "step": 5464 + }, + { + "epoch": 81.57, + "learning_rate": 0.0001610175438596491, + "loss": 0.0019, + "step": 5465 + }, + { + "epoch": 81.58, + "learning_rate": 0.00016098245614035088, + "loss": 0.1041, + "step": 5466 + }, + { + "epoch": 81.59, + "learning_rate": 0.00016094736842105263, + "loss": 0.0522, + "step": 5467 + }, + { + "epoch": 81.61, + "learning_rate": 0.00016091228070175438, + "loss": 0.0039, + "step": 5468 + }, + { + "epoch": 81.62, + "learning_rate": 0.0001608771929824561, + "loss": 0.0032, + "step": 5469 + }, + { + "epoch": 81.64, + "learning_rate": 0.0001608421052631579, + "loss": 0.001, + "step": 5470 + }, + { + "epoch": 81.65, + "learning_rate": 0.00016080701754385963, + "loss": 0.0013, + "step": 5471 + }, + { + "epoch": 81.67, + "learning_rate": 0.00016077192982456138, + "loss": 0.0008, + "step": 5472 + }, + { + "epoch": 81.68, + "learning_rate": 0.00016073684210526313, + "loss": 0.0009, + "step": 5473 + }, + { + "epoch": 81.7, + "learning_rate": 0.0001607017543859649, + "loss": 0.0013, + "step": 5474 + }, + { + "epoch": 81.71, + "learning_rate": 0.00016066666666666665, + "loss": 0.001, + "step": 5475 + }, + { + "epoch": 81.73, + "learning_rate": 0.0001606315789473684, + "loss": 0.0009, + "step": 5476 + }, + { + "epoch": 81.74, + "learning_rate": 0.00016059649122807015, + "loss": 0.0008, + "step": 5477 + }, + { + "epoch": 81.76, + "learning_rate": 0.00016056140350877192, + "loss": 0.0014, + "step": 5478 + }, + { + "epoch": 81.77, + "learning_rate": 0.00016052631578947367, + "loss": 0.0065, + "step": 5479 + }, + { + "epoch": 81.79, + "learning_rate": 0.00016049122807017542, + "loss": 0.0279, + "step": 5480 + }, + { + "epoch": 81.8, + "learning_rate": 0.0001604561403508772, + "loss": 0.0008, + "step": 5481 + }, + { + "epoch": 81.82, + "learning_rate": 0.00016042105263157895, + "loss": 0.001, + "step": 5482 + }, + { + "epoch": 81.83, + "learning_rate": 0.0001603859649122807, + "loss": 0.0019, + "step": 5483 + }, + { + "epoch": 81.85, + "learning_rate": 0.00016035087719298242, + "loss": 0.0048, + "step": 5484 + }, + { + "epoch": 81.86, + "learning_rate": 0.00016031578947368422, + "loss": 0.0362, + "step": 5485 + }, + { + "epoch": 81.88, + "learning_rate": 0.00016028070175438594, + "loss": 0.009, + "step": 5486 + }, + { + "epoch": 81.89, + "learning_rate": 0.0001602456140350877, + "loss": 0.0007, + "step": 5487 + }, + { + "epoch": 81.91, + "learning_rate": 0.00016021052631578944, + "loss": 0.0149, + "step": 5488 + }, + { + "epoch": 81.92, + "learning_rate": 0.00016017543859649121, + "loss": 0.0784, + "step": 5489 + }, + { + "epoch": 81.94, + "learning_rate": 0.00016014035087719296, + "loss": 0.0009, + "step": 5490 + }, + { + "epoch": 81.95, + "learning_rate": 0.0001601052631578947, + "loss": 0.005, + "step": 5491 + }, + { + "epoch": 81.97, + "learning_rate": 0.0001600701754385965, + "loss": 0.0018, + "step": 5492 + }, + { + "epoch": 81.98, + "learning_rate": 0.00016003508771929824, + "loss": 0.0299, + "step": 5493 + }, + { + "epoch": 82.0, + "learning_rate": 0.00015999999999999999, + "loss": 0.2323, + "step": 5494 + }, + { + "epoch": 82.01, + "learning_rate": 0.00015996491228070173, + "loss": 0.0014, + "step": 5495 + }, + { + "epoch": 82.03, + "learning_rate": 0.0001599298245614035, + "loss": 0.0014, + "step": 5496 + }, + { + "epoch": 82.04, + "learning_rate": 0.00015989473684210526, + "loss": 0.0885, + "step": 5497 + }, + { + "epoch": 82.06, + "learning_rate": 0.000159859649122807, + "loss": 0.0017, + "step": 5498 + }, + { + "epoch": 82.07, + "learning_rate": 0.00015982456140350876, + "loss": 0.0009, + "step": 5499 + }, + { + "epoch": 82.09, + "learning_rate": 0.00015978947368421053, + "loss": 0.0017, + "step": 5500 + }, + { + "epoch": 82.1, + "learning_rate": 0.00015975438596491228, + "loss": 0.0016, + "step": 5501 + }, + { + "epoch": 82.12, + "learning_rate": 0.000159719298245614, + "loss": 0.1006, + "step": 5502 + }, + { + "epoch": 82.13, + "learning_rate": 0.00015968421052631575, + "loss": 0.0051, + "step": 5503 + }, + { + "epoch": 82.15, + "learning_rate": 0.00015964912280701753, + "loss": 0.0014, + "step": 5504 + }, + { + "epoch": 82.16, + "learning_rate": 0.00015961403508771928, + "loss": 0.0016, + "step": 5505 + }, + { + "epoch": 82.18, + "learning_rate": 0.00015957894736842103, + "loss": 0.0018, + "step": 5506 + }, + { + "epoch": 82.19, + "learning_rate": 0.0001595438596491228, + "loss": 0.0018, + "step": 5507 + }, + { + "epoch": 82.21, + "learning_rate": 0.00015950877192982455, + "loss": 0.0027, + "step": 5508 + }, + { + "epoch": 82.22, + "learning_rate": 0.0001594736842105263, + "loss": 0.0009, + "step": 5509 + }, + { + "epoch": 82.24, + "learning_rate": 0.00015943859649122805, + "loss": 0.0029, + "step": 5510 + }, + { + "epoch": 82.25, + "learning_rate": 0.00015940350877192982, + "loss": 0.0521, + "step": 5511 + }, + { + "epoch": 82.27, + "learning_rate": 0.00015936842105263157, + "loss": 0.019, + "step": 5512 + }, + { + "epoch": 82.28, + "learning_rate": 0.00015933333333333332, + "loss": 0.0095, + "step": 5513 + }, + { + "epoch": 82.3, + "learning_rate": 0.00015929824561403507, + "loss": 0.0915, + "step": 5514 + }, + { + "epoch": 82.31, + "learning_rate": 0.00015926315789473685, + "loss": 0.0017, + "step": 5515 + }, + { + "epoch": 82.33, + "learning_rate": 0.0001592280701754386, + "loss": 0.0024, + "step": 5516 + }, + { + "epoch": 82.34, + "learning_rate": 0.00015919298245614032, + "loss": 0.0162, + "step": 5517 + }, + { + "epoch": 82.36, + "learning_rate": 0.00015915789473684212, + "loss": 0.0009, + "step": 5518 + }, + { + "epoch": 82.37, + "learning_rate": 0.00015912280701754384, + "loss": 0.0009, + "step": 5519 + }, + { + "epoch": 82.39, + "learning_rate": 0.0001590877192982456, + "loss": 0.0008, + "step": 5520 + }, + { + "epoch": 82.4, + "learning_rate": 0.00015905263157894734, + "loss": 0.0008, + "step": 5521 + }, + { + "epoch": 82.42, + "learning_rate": 0.00015901754385964911, + "loss": 0.0081, + "step": 5522 + }, + { + "epoch": 82.43, + "learning_rate": 0.00015898245614035086, + "loss": 0.001, + "step": 5523 + }, + { + "epoch": 82.45, + "learning_rate": 0.0001589473684210526, + "loss": 0.0013, + "step": 5524 + }, + { + "epoch": 82.46, + "learning_rate": 0.00015891228070175436, + "loss": 0.0824, + "step": 5525 + }, + { + "epoch": 82.48, + "learning_rate": 0.00015887719298245614, + "loss": 0.0008, + "step": 5526 + }, + { + "epoch": 82.49, + "learning_rate": 0.00015884210526315789, + "loss": 0.0013, + "step": 5527 + }, + { + "epoch": 82.51, + "learning_rate": 0.00015880701754385963, + "loss": 0.0014, + "step": 5528 + }, + { + "epoch": 82.52, + "learning_rate": 0.0001587719298245614, + "loss": 0.0571, + "step": 5529 + }, + { + "epoch": 82.54, + "learning_rate": 0.00015873684210526316, + "loss": 0.0397, + "step": 5530 + }, + { + "epoch": 82.55, + "learning_rate": 0.0001587017543859649, + "loss": 0.0006, + "step": 5531 + }, + { + "epoch": 82.57, + "learning_rate": 0.00015866666666666663, + "loss": 0.106, + "step": 5532 + }, + { + "epoch": 82.58, + "learning_rate": 0.00015863157894736843, + "loss": 0.0012, + "step": 5533 + }, + { + "epoch": 82.59, + "learning_rate": 0.00015859649122807015, + "loss": 0.0023, + "step": 5534 + }, + { + "epoch": 82.61, + "learning_rate": 0.0001585614035087719, + "loss": 0.0007, + "step": 5535 + }, + { + "epoch": 82.62, + "learning_rate": 0.00015852631578947365, + "loss": 0.0007, + "step": 5536 + }, + { + "epoch": 82.64, + "learning_rate": 0.00015849122807017543, + "loss": 0.0006, + "step": 5537 + }, + { + "epoch": 82.65, + "learning_rate": 0.00015845614035087718, + "loss": 0.0947, + "step": 5538 + }, + { + "epoch": 82.67, + "learning_rate": 0.00015842105263157892, + "loss": 0.0804, + "step": 5539 + }, + { + "epoch": 82.68, + "learning_rate": 0.00015838596491228067, + "loss": 0.0006, + "step": 5540 + }, + { + "epoch": 82.7, + "learning_rate": 0.00015835087719298245, + "loss": 0.002, + "step": 5541 + }, + { + "epoch": 82.71, + "learning_rate": 0.0001583157894736842, + "loss": 0.0091, + "step": 5542 + }, + { + "epoch": 82.73, + "learning_rate": 0.00015828070175438595, + "loss": 0.0725, + "step": 5543 + }, + { + "epoch": 82.74, + "learning_rate": 0.00015824561403508772, + "loss": 0.0369, + "step": 5544 + }, + { + "epoch": 82.76, + "learning_rate": 0.00015821052631578947, + "loss": 0.0008, + "step": 5545 + }, + { + "epoch": 82.77, + "learning_rate": 0.00015817543859649122, + "loss": 0.2917, + "step": 5546 + }, + { + "epoch": 82.79, + "learning_rate": 0.00015814035087719297, + "loss": 0.3772, + "step": 5547 + }, + { + "epoch": 82.8, + "learning_rate": 0.00015810526315789475, + "loss": 0.0007, + "step": 5548 + }, + { + "epoch": 82.82, + "learning_rate": 0.00015807017543859647, + "loss": 0.1434, + "step": 5549 + }, + { + "epoch": 82.83, + "learning_rate": 0.00015803508771929822, + "loss": 0.0369, + "step": 5550 + }, + { + "epoch": 82.85, + "learning_rate": 0.00015799999999999996, + "loss": 0.0007, + "step": 5551 + }, + { + "epoch": 82.86, + "learning_rate": 0.00015796491228070174, + "loss": 0.0007, + "step": 5552 + }, + { + "epoch": 82.88, + "learning_rate": 0.0001579298245614035, + "loss": 0.0009, + "step": 5553 + }, + { + "epoch": 82.89, + "learning_rate": 0.00015789473684210524, + "loss": 0.0011, + "step": 5554 + }, + { + "epoch": 82.91, + "learning_rate": 0.00015785964912280701, + "loss": 0.0009, + "step": 5555 + }, + { + "epoch": 82.92, + "learning_rate": 0.00015782456140350876, + "loss": 0.0047, + "step": 5556 + }, + { + "epoch": 82.94, + "learning_rate": 0.0001577894736842105, + "loss": 0.0011, + "step": 5557 + }, + { + "epoch": 82.95, + "learning_rate": 0.00015775438596491226, + "loss": 0.0142, + "step": 5558 + }, + { + "epoch": 82.97, + "learning_rate": 0.00015771929824561404, + "loss": 0.0384, + "step": 5559 + }, + { + "epoch": 82.98, + "learning_rate": 0.00015768421052631578, + "loss": 0.0017, + "step": 5560 + }, + { + "epoch": 83.0, + "learning_rate": 0.00015764912280701753, + "loss": 0.0011, + "step": 5561 + }, + { + "epoch": 83.01, + "learning_rate": 0.00015761403508771928, + "loss": 0.0043, + "step": 5562 + }, + { + "epoch": 83.03, + "learning_rate": 0.00015757894736842106, + "loss": 0.0012, + "step": 5563 + }, + { + "epoch": 83.04, + "learning_rate": 0.0001575438596491228, + "loss": 0.1857, + "step": 5564 + }, + { + "epoch": 83.06, + "learning_rate": 0.00015750877192982453, + "loss": 0.1551, + "step": 5565 + }, + { + "epoch": 83.07, + "learning_rate": 0.00015747368421052633, + "loss": 0.0013, + "step": 5566 + }, + { + "epoch": 83.09, + "learning_rate": 0.00015743859649122805, + "loss": 0.0018, + "step": 5567 + }, + { + "epoch": 83.1, + "learning_rate": 0.0001574035087719298, + "loss": 0.0416, + "step": 5568 + }, + { + "epoch": 83.12, + "learning_rate": 0.00015736842105263155, + "loss": 0.0016, + "step": 5569 + }, + { + "epoch": 83.13, + "learning_rate": 0.00015733333333333333, + "loss": 0.0565, + "step": 5570 + }, + { + "epoch": 83.15, + "learning_rate": 0.00015729824561403508, + "loss": 0.0044, + "step": 5571 + }, + { + "epoch": 83.16, + "learning_rate": 0.00015726315789473682, + "loss": 0.0031, + "step": 5572 + }, + { + "epoch": 83.18, + "learning_rate": 0.00015722807017543857, + "loss": 0.0062, + "step": 5573 + }, + { + "epoch": 83.19, + "learning_rate": 0.00015719298245614035, + "loss": 0.0015, + "step": 5574 + }, + { + "epoch": 83.21, + "learning_rate": 0.0001571578947368421, + "loss": 0.0162, + "step": 5575 + }, + { + "epoch": 83.22, + "learning_rate": 0.00015712280701754385, + "loss": 0.0023, + "step": 5576 + }, + { + "epoch": 83.24, + "learning_rate": 0.0001570877192982456, + "loss": 0.1509, + "step": 5577 + }, + { + "epoch": 83.25, + "learning_rate": 0.00015705263157894737, + "loss": 0.0032, + "step": 5578 + }, + { + "epoch": 83.27, + "learning_rate": 0.00015701754385964912, + "loss": 0.0601, + "step": 5579 + }, + { + "epoch": 83.28, + "learning_rate": 0.00015698245614035084, + "loss": 0.0017, + "step": 5580 + }, + { + "epoch": 83.3, + "learning_rate": 0.00015694736842105264, + "loss": 0.0017, + "step": 5581 + }, + { + "epoch": 83.31, + "learning_rate": 0.00015691228070175437, + "loss": 0.0074, + "step": 5582 + }, + { + "epoch": 83.33, + "learning_rate": 0.00015687719298245612, + "loss": 0.2237, + "step": 5583 + }, + { + "epoch": 83.34, + "learning_rate": 0.00015684210526315786, + "loss": 0.0026, + "step": 5584 + }, + { + "epoch": 83.36, + "learning_rate": 0.00015680701754385964, + "loss": 0.0321, + "step": 5585 + }, + { + "epoch": 83.37, + "learning_rate": 0.0001567719298245614, + "loss": 0.0013, + "step": 5586 + }, + { + "epoch": 83.39, + "learning_rate": 0.00015673684210526314, + "loss": 0.0012, + "step": 5587 + }, + { + "epoch": 83.4, + "learning_rate": 0.00015670175438596489, + "loss": 0.0011, + "step": 5588 + }, + { + "epoch": 83.42, + "learning_rate": 0.00015666666666666666, + "loss": 0.0089, + "step": 5589 + }, + { + "epoch": 83.43, + "learning_rate": 0.0001566315789473684, + "loss": 0.0007, + "step": 5590 + }, + { + "epoch": 83.45, + "learning_rate": 0.00015659649122807016, + "loss": 0.0008, + "step": 5591 + }, + { + "epoch": 83.46, + "learning_rate": 0.00015656140350877194, + "loss": 0.0024, + "step": 5592 + }, + { + "epoch": 83.48, + "learning_rate": 0.00015652631578947368, + "loss": 0.0008, + "step": 5593 + }, + { + "epoch": 83.49, + "learning_rate": 0.00015649122807017543, + "loss": 0.0019, + "step": 5594 + }, + { + "epoch": 83.51, + "learning_rate": 0.00015645614035087718, + "loss": 0.2801, + "step": 5595 + }, + { + "epoch": 83.52, + "learning_rate": 0.00015642105263157896, + "loss": 0.0012, + "step": 5596 + }, + { + "epoch": 83.54, + "learning_rate": 0.00015638596491228068, + "loss": 0.0034, + "step": 5597 + }, + { + "epoch": 83.55, + "learning_rate": 0.00015635087719298243, + "loss": 0.0846, + "step": 5598 + }, + { + "epoch": 83.57, + "learning_rate": 0.00015631578947368418, + "loss": 0.0549, + "step": 5599 + }, + { + "epoch": 83.58, + "learning_rate": 0.00015628070175438595, + "loss": 0.2558, + "step": 5600 + }, + { + "epoch": 83.58, + "eval_accuracy": 0.8580518844836026, + "eval_f1": 0.8581217984766242, + "eval_loss": 0.7513839602470398, + "eval_runtime": 345.0011, + "eval_samples_per_second": 11.843, + "eval_steps_per_second": 0.742, + "step": 5600 + }, + { + "epoch": 83.59, + "learning_rate": 0.0001562456140350877, + "loss": 0.0009, + "step": 5601 + }, + { + "epoch": 83.61, + "learning_rate": 0.00015621052631578945, + "loss": 0.0996, + "step": 5602 + }, + { + "epoch": 83.62, + "learning_rate": 0.0001561754385964912, + "loss": 0.0023, + "step": 5603 + }, + { + "epoch": 83.64, + "learning_rate": 0.00015614035087719297, + "loss": 0.0013, + "step": 5604 + }, + { + "epoch": 83.65, + "learning_rate": 0.00015610526315789472, + "loss": 0.0011, + "step": 5605 + }, + { + "epoch": 83.67, + "learning_rate": 0.00015607017543859647, + "loss": 0.0021, + "step": 5606 + }, + { + "epoch": 83.68, + "learning_rate": 0.00015603508771929825, + "loss": 0.267, + "step": 5607 + }, + { + "epoch": 83.7, + "learning_rate": 0.000156, + "loss": 0.0036, + "step": 5608 + }, + { + "epoch": 83.71, + "learning_rate": 0.00015596491228070175, + "loss": 0.0013, + "step": 5609 + }, + { + "epoch": 83.73, + "learning_rate": 0.0001559298245614035, + "loss": 0.0077, + "step": 5610 + }, + { + "epoch": 83.74, + "learning_rate": 0.00015589473684210527, + "loss": 0.0034, + "step": 5611 + }, + { + "epoch": 83.76, + "learning_rate": 0.00015585964912280702, + "loss": 0.0024, + "step": 5612 + }, + { + "epoch": 83.77, + "learning_rate": 0.00015582456140350874, + "loss": 0.1785, + "step": 5613 + }, + { + "epoch": 83.79, + "learning_rate": 0.0001557894736842105, + "loss": 0.0032, + "step": 5614 + }, + { + "epoch": 83.8, + "learning_rate": 0.00015575438596491227, + "loss": 0.0021, + "step": 5615 + }, + { + "epoch": 83.82, + "learning_rate": 0.00015571929824561401, + "loss": 0.4422, + "step": 5616 + }, + { + "epoch": 83.83, + "learning_rate": 0.00015568421052631576, + "loss": 0.0062, + "step": 5617 + }, + { + "epoch": 83.85, + "learning_rate": 0.00015564912280701754, + "loss": 0.3256, + "step": 5618 + }, + { + "epoch": 83.86, + "learning_rate": 0.0001556140350877193, + "loss": 0.0058, + "step": 5619 + }, + { + "epoch": 83.88, + "learning_rate": 0.00015557894736842104, + "loss": 0.0015, + "step": 5620 + }, + { + "epoch": 83.89, + "learning_rate": 0.00015554385964912279, + "loss": 0.0021, + "step": 5621 + }, + { + "epoch": 83.91, + "learning_rate": 0.00015550877192982456, + "loss": 0.3244, + "step": 5622 + }, + { + "epoch": 83.92, + "learning_rate": 0.0001554736842105263, + "loss": 0.002, + "step": 5623 + }, + { + "epoch": 83.94, + "learning_rate": 0.00015543859649122806, + "loss": 0.0073, + "step": 5624 + }, + { + "epoch": 83.95, + "learning_rate": 0.0001554035087719298, + "loss": 0.034, + "step": 5625 + }, + { + "epoch": 83.97, + "learning_rate": 0.00015536842105263158, + "loss": 0.0015, + "step": 5626 + }, + { + "epoch": 83.98, + "learning_rate": 0.00015533333333333333, + "loss": 0.0368, + "step": 5627 + }, + { + "epoch": 84.0, + "learning_rate": 0.00015529824561403505, + "loss": 0.0769, + "step": 5628 + }, + { + "epoch": 84.01, + "learning_rate": 0.00015526315789473686, + "loss": 0.0027, + "step": 5629 + }, + { + "epoch": 84.03, + "learning_rate": 0.00015522807017543858, + "loss": 0.1014, + "step": 5630 + }, + { + "epoch": 84.04, + "learning_rate": 0.00015519298245614033, + "loss": 0.0016, + "step": 5631 + }, + { + "epoch": 84.06, + "learning_rate": 0.00015515789473684208, + "loss": 0.0019, + "step": 5632 + }, + { + "epoch": 84.07, + "learning_rate": 0.00015512280701754385, + "loss": 0.0227, + "step": 5633 + }, + { + "epoch": 84.09, + "learning_rate": 0.0001550877192982456, + "loss": 0.0014, + "step": 5634 + }, + { + "epoch": 84.1, + "learning_rate": 0.00015505263157894735, + "loss": 0.0755, + "step": 5635 + }, + { + "epoch": 84.12, + "learning_rate": 0.0001550175438596491, + "loss": 0.0014, + "step": 5636 + }, + { + "epoch": 84.13, + "learning_rate": 0.00015498245614035087, + "loss": 0.0031, + "step": 5637 + }, + { + "epoch": 84.15, + "learning_rate": 0.00015494736842105262, + "loss": 0.107, + "step": 5638 + }, + { + "epoch": 84.16, + "learning_rate": 0.00015491228070175437, + "loss": 0.3357, + "step": 5639 + }, + { + "epoch": 84.18, + "learning_rate": 0.00015487719298245612, + "loss": 0.0045, + "step": 5640 + }, + { + "epoch": 84.19, + "learning_rate": 0.0001548421052631579, + "loss": 0.0018, + "step": 5641 + }, + { + "epoch": 84.21, + "learning_rate": 0.00015480701754385965, + "loss": 0.0163, + "step": 5642 + }, + { + "epoch": 84.22, + "learning_rate": 0.00015477192982456137, + "loss": 0.3467, + "step": 5643 + }, + { + "epoch": 84.24, + "learning_rate": 0.00015473684210526317, + "loss": 0.0168, + "step": 5644 + }, + { + "epoch": 84.25, + "learning_rate": 0.0001547017543859649, + "loss": 0.0943, + "step": 5645 + }, + { + "epoch": 84.27, + "learning_rate": 0.00015466666666666664, + "loss": 0.002, + "step": 5646 + }, + { + "epoch": 84.28, + "learning_rate": 0.0001546315789473684, + "loss": 0.193, + "step": 5647 + }, + { + "epoch": 84.3, + "learning_rate": 0.00015459649122807017, + "loss": 0.0045, + "step": 5648 + }, + { + "epoch": 84.31, + "learning_rate": 0.00015456140350877191, + "loss": 0.0017, + "step": 5649 + }, + { + "epoch": 84.33, + "learning_rate": 0.00015452631578947366, + "loss": 0.4212, + "step": 5650 + }, + { + "epoch": 84.34, + "learning_rate": 0.0001544912280701754, + "loss": 0.0033, + "step": 5651 + }, + { + "epoch": 84.36, + "learning_rate": 0.0001544561403508772, + "loss": 0.0022, + "step": 5652 + }, + { + "epoch": 84.37, + "learning_rate": 0.00015442105263157894, + "loss": 0.0021, + "step": 5653 + }, + { + "epoch": 84.39, + "learning_rate": 0.00015438596491228068, + "loss": 0.0087, + "step": 5654 + }, + { + "epoch": 84.4, + "learning_rate": 0.00015435087719298246, + "loss": 0.0464, + "step": 5655 + }, + { + "epoch": 84.42, + "learning_rate": 0.0001543157894736842, + "loss": 0.0063, + "step": 5656 + }, + { + "epoch": 84.43, + "learning_rate": 0.00015428070175438596, + "loss": 0.0176, + "step": 5657 + }, + { + "epoch": 84.45, + "learning_rate": 0.0001542456140350877, + "loss": 0.3616, + "step": 5658 + }, + { + "epoch": 84.46, + "learning_rate": 0.00015421052631578948, + "loss": 0.0088, + "step": 5659 + }, + { + "epoch": 84.48, + "learning_rate": 0.00015417543859649123, + "loss": 0.0016, + "step": 5660 + }, + { + "epoch": 84.49, + "learning_rate": 0.00015414035087719295, + "loss": 0.0014, + "step": 5661 + }, + { + "epoch": 84.51, + "learning_rate": 0.0001541052631578947, + "loss": 0.0018, + "step": 5662 + }, + { + "epoch": 84.52, + "learning_rate": 0.00015407017543859648, + "loss": 0.0014, + "step": 5663 + }, + { + "epoch": 84.54, + "learning_rate": 0.00015403508771929823, + "loss": 0.0066, + "step": 5664 + }, + { + "epoch": 84.55, + "learning_rate": 0.00015399999999999998, + "loss": 0.0012, + "step": 5665 + }, + { + "epoch": 84.57, + "learning_rate": 0.00015396491228070172, + "loss": 0.0026, + "step": 5666 + }, + { + "epoch": 84.58, + "learning_rate": 0.0001539298245614035, + "loss": 0.0016, + "step": 5667 + }, + { + "epoch": 84.59, + "learning_rate": 0.00015389473684210525, + "loss": 0.0063, + "step": 5668 + }, + { + "epoch": 84.61, + "learning_rate": 0.000153859649122807, + "loss": 0.0022, + "step": 5669 + }, + { + "epoch": 84.62, + "learning_rate": 0.00015382456140350877, + "loss": 0.0018, + "step": 5670 + }, + { + "epoch": 84.64, + "learning_rate": 0.00015378947368421052, + "loss": 0.1457, + "step": 5671 + }, + { + "epoch": 84.65, + "learning_rate": 0.00015375438596491227, + "loss": 0.0018, + "step": 5672 + }, + { + "epoch": 84.67, + "learning_rate": 0.00015371929824561402, + "loss": 0.0831, + "step": 5673 + }, + { + "epoch": 84.68, + "learning_rate": 0.0001536842105263158, + "loss": 0.0848, + "step": 5674 + }, + { + "epoch": 84.7, + "learning_rate": 0.00015364912280701754, + "loss": 0.1043, + "step": 5675 + }, + { + "epoch": 84.71, + "learning_rate": 0.00015361403508771927, + "loss": 0.01, + "step": 5676 + }, + { + "epoch": 84.73, + "learning_rate": 0.00015357894736842102, + "loss": 0.0043, + "step": 5677 + }, + { + "epoch": 84.74, + "learning_rate": 0.0001535438596491228, + "loss": 0.0017, + "step": 5678 + }, + { + "epoch": 84.76, + "learning_rate": 0.00015350877192982454, + "loss": 0.0034, + "step": 5679 + }, + { + "epoch": 84.77, + "learning_rate": 0.0001534736842105263, + "loss": 0.0042, + "step": 5680 + }, + { + "epoch": 84.79, + "learning_rate": 0.00015343859649122806, + "loss": 0.0725, + "step": 5681 + }, + { + "epoch": 84.8, + "learning_rate": 0.0001534035087719298, + "loss": 0.0264, + "step": 5682 + }, + { + "epoch": 84.82, + "learning_rate": 0.00015336842105263156, + "loss": 0.0028, + "step": 5683 + }, + { + "epoch": 84.83, + "learning_rate": 0.0001533333333333333, + "loss": 0.6024, + "step": 5684 + }, + { + "epoch": 84.85, + "learning_rate": 0.0001532982456140351, + "loss": 0.0025, + "step": 5685 + }, + { + "epoch": 84.86, + "learning_rate": 0.00015326315789473684, + "loss": 0.0011, + "step": 5686 + }, + { + "epoch": 84.88, + "learning_rate": 0.00015322807017543858, + "loss": 0.0021, + "step": 5687 + }, + { + "epoch": 84.89, + "learning_rate": 0.00015319298245614033, + "loss": 0.001, + "step": 5688 + }, + { + "epoch": 84.91, + "learning_rate": 0.0001531578947368421, + "loss": 0.0023, + "step": 5689 + }, + { + "epoch": 84.92, + "learning_rate": 0.00015312280701754386, + "loss": 0.0969, + "step": 5690 + }, + { + "epoch": 84.94, + "learning_rate": 0.00015308771929824558, + "loss": 0.0009, + "step": 5691 + }, + { + "epoch": 84.95, + "learning_rate": 0.00015305263157894738, + "loss": 0.0018, + "step": 5692 + }, + { + "epoch": 84.97, + "learning_rate": 0.0001530175438596491, + "loss": 0.0348, + "step": 5693 + }, + { + "epoch": 84.98, + "learning_rate": 0.00015298245614035085, + "loss": 0.006, + "step": 5694 + }, + { + "epoch": 85.0, + "learning_rate": 0.0001529473684210526, + "loss": 0.0012, + "step": 5695 + }, + { + "epoch": 85.01, + "learning_rate": 0.00015291228070175438, + "loss": 0.0085, + "step": 5696 + }, + { + "epoch": 85.03, + "learning_rate": 0.00015287719298245613, + "loss": 0.0019, + "step": 5697 + }, + { + "epoch": 85.04, + "learning_rate": 0.00015284210526315788, + "loss": 0.0055, + "step": 5698 + }, + { + "epoch": 85.06, + "learning_rate": 0.00015280701754385962, + "loss": 0.1146, + "step": 5699 + }, + { + "epoch": 85.07, + "learning_rate": 0.0001527719298245614, + "loss": 0.0261, + "step": 5700 + }, + { + "epoch": 85.09, + "learning_rate": 0.00015273684210526315, + "loss": 0.0082, + "step": 5701 + }, + { + "epoch": 85.1, + "learning_rate": 0.0001527017543859649, + "loss": 0.001, + "step": 5702 + }, + { + "epoch": 85.12, + "learning_rate": 0.00015266666666666665, + "loss": 0.0011, + "step": 5703 + }, + { + "epoch": 85.13, + "learning_rate": 0.00015263157894736842, + "loss": 0.0084, + "step": 5704 + }, + { + "epoch": 85.15, + "learning_rate": 0.00015259649122807017, + "loss": 0.005, + "step": 5705 + }, + { + "epoch": 85.16, + "learning_rate": 0.00015256140350877192, + "loss": 0.0011, + "step": 5706 + }, + { + "epoch": 85.18, + "learning_rate": 0.0001525263157894737, + "loss": 0.0007, + "step": 5707 + }, + { + "epoch": 85.19, + "learning_rate": 0.00015249122807017544, + "loss": 0.0018, + "step": 5708 + }, + { + "epoch": 85.21, + "learning_rate": 0.00015245614035087717, + "loss": 0.0009, + "step": 5709 + }, + { + "epoch": 85.22, + "learning_rate": 0.00015242105263157891, + "loss": 0.001, + "step": 5710 + }, + { + "epoch": 85.24, + "learning_rate": 0.0001523859649122807, + "loss": 0.0112, + "step": 5711 + }, + { + "epoch": 85.25, + "learning_rate": 0.00015235087719298244, + "loss": 0.0023, + "step": 5712 + }, + { + "epoch": 85.27, + "learning_rate": 0.0001523157894736842, + "loss": 0.0017, + "step": 5713 + }, + { + "epoch": 85.28, + "learning_rate": 0.00015228070175438594, + "loss": 0.0008, + "step": 5714 + }, + { + "epoch": 85.3, + "learning_rate": 0.0001522456140350877, + "loss": 0.0268, + "step": 5715 + }, + { + "epoch": 85.31, + "learning_rate": 0.00015221052631578946, + "loss": 0.0033, + "step": 5716 + }, + { + "epoch": 85.33, + "learning_rate": 0.0001521754385964912, + "loss": 0.0422, + "step": 5717 + }, + { + "epoch": 85.34, + "learning_rate": 0.00015214035087719299, + "loss": 0.0048, + "step": 5718 + }, + { + "epoch": 85.36, + "learning_rate": 0.00015210526315789473, + "loss": 0.005, + "step": 5719 + }, + { + "epoch": 85.37, + "learning_rate": 0.00015207017543859648, + "loss": 0.0054, + "step": 5720 + }, + { + "epoch": 85.39, + "learning_rate": 0.00015203508771929823, + "loss": 0.1974, + "step": 5721 + }, + { + "epoch": 85.4, + "learning_rate": 0.000152, + "loss": 0.0008, + "step": 5722 + }, + { + "epoch": 85.42, + "learning_rate": 0.00015196491228070176, + "loss": 0.0007, + "step": 5723 + }, + { + "epoch": 85.43, + "learning_rate": 0.00015192982456140348, + "loss": 0.0016, + "step": 5724 + }, + { + "epoch": 85.45, + "learning_rate": 0.00015189473684210523, + "loss": 0.0007, + "step": 5725 + }, + { + "epoch": 85.46, + "learning_rate": 0.000151859649122807, + "loss": 0.0115, + "step": 5726 + }, + { + "epoch": 85.48, + "learning_rate": 0.00015182456140350875, + "loss": 0.0009, + "step": 5727 + }, + { + "epoch": 85.49, + "learning_rate": 0.0001517894736842105, + "loss": 0.0008, + "step": 5728 + }, + { + "epoch": 85.51, + "learning_rate": 0.00015175438596491228, + "loss": 0.001, + "step": 5729 + }, + { + "epoch": 85.52, + "learning_rate": 0.00015171929824561403, + "loss": 0.0024, + "step": 5730 + }, + { + "epoch": 85.54, + "learning_rate": 0.00015168421052631577, + "loss": 0.0352, + "step": 5731 + }, + { + "epoch": 85.55, + "learning_rate": 0.00015164912280701752, + "loss": 0.0773, + "step": 5732 + }, + { + "epoch": 85.57, + "learning_rate": 0.0001516140350877193, + "loss": 0.0067, + "step": 5733 + }, + { + "epoch": 85.58, + "learning_rate": 0.00015157894736842105, + "loss": 0.001, + "step": 5734 + }, + { + "epoch": 85.59, + "learning_rate": 0.0001515438596491228, + "loss": 0.0658, + "step": 5735 + }, + { + "epoch": 85.61, + "learning_rate": 0.00015150877192982455, + "loss": 0.0008, + "step": 5736 + }, + { + "epoch": 85.62, + "learning_rate": 0.00015147368421052632, + "loss": 0.0021, + "step": 5737 + }, + { + "epoch": 85.64, + "learning_rate": 0.00015143859649122807, + "loss": 0.0005, + "step": 5738 + }, + { + "epoch": 85.65, + "learning_rate": 0.0001514035087719298, + "loss": 0.0008, + "step": 5739 + }, + { + "epoch": 85.67, + "learning_rate": 0.00015136842105263154, + "loss": 0.0009, + "step": 5740 + }, + { + "epoch": 85.68, + "learning_rate": 0.00015133333333333332, + "loss": 0.0006, + "step": 5741 + }, + { + "epoch": 85.7, + "learning_rate": 0.00015129824561403507, + "loss": 0.0015, + "step": 5742 + }, + { + "epoch": 85.71, + "learning_rate": 0.00015126315789473681, + "loss": 0.0008, + "step": 5743 + }, + { + "epoch": 85.73, + "learning_rate": 0.0001512280701754386, + "loss": 0.0008, + "step": 5744 + }, + { + "epoch": 85.74, + "learning_rate": 0.00015119298245614034, + "loss": 0.0009, + "step": 5745 + }, + { + "epoch": 85.76, + "learning_rate": 0.0001511578947368421, + "loss": 0.0076, + "step": 5746 + }, + { + "epoch": 85.77, + "learning_rate": 0.00015112280701754384, + "loss": 0.0253, + "step": 5747 + }, + { + "epoch": 85.79, + "learning_rate": 0.0001510877192982456, + "loss": 0.0017, + "step": 5748 + }, + { + "epoch": 85.8, + "learning_rate": 0.00015105263157894736, + "loss": 0.0044, + "step": 5749 + }, + { + "epoch": 85.82, + "learning_rate": 0.0001510175438596491, + "loss": 0.0056, + "step": 5750 + }, + { + "epoch": 85.83, + "learning_rate": 0.00015098245614035086, + "loss": 0.0009, + "step": 5751 + }, + { + "epoch": 85.85, + "learning_rate": 0.00015094736842105263, + "loss": 0.0039, + "step": 5752 + }, + { + "epoch": 85.86, + "learning_rate": 0.00015091228070175438, + "loss": 0.0027, + "step": 5753 + }, + { + "epoch": 85.88, + "learning_rate": 0.00015087719298245613, + "loss": 0.0222, + "step": 5754 + }, + { + "epoch": 85.89, + "learning_rate": 0.0001508421052631579, + "loss": 0.1151, + "step": 5755 + }, + { + "epoch": 85.91, + "learning_rate": 0.00015080701754385963, + "loss": 0.322, + "step": 5756 + }, + { + "epoch": 85.92, + "learning_rate": 0.00015077192982456138, + "loss": 0.0006, + "step": 5757 + }, + { + "epoch": 85.94, + "learning_rate": 0.00015073684210526313, + "loss": 0.0007, + "step": 5758 + }, + { + "epoch": 85.95, + "learning_rate": 0.0001507017543859649, + "loss": 0.0091, + "step": 5759 + }, + { + "epoch": 85.97, + "learning_rate": 0.00015066666666666665, + "loss": 0.1359, + "step": 5760 + }, + { + "epoch": 85.98, + "learning_rate": 0.0001506315789473684, + "loss": 0.0006, + "step": 5761 + }, + { + "epoch": 86.0, + "learning_rate": 0.00015059649122807015, + "loss": 0.0034, + "step": 5762 + }, + { + "epoch": 86.01, + "learning_rate": 0.00015056140350877193, + "loss": 0.0012, + "step": 5763 + }, + { + "epoch": 86.03, + "learning_rate": 0.00015052631578947367, + "loss": 0.0997, + "step": 5764 + }, + { + "epoch": 86.04, + "learning_rate": 0.00015049122807017542, + "loss": 0.2033, + "step": 5765 + }, + { + "epoch": 86.06, + "learning_rate": 0.00015045614035087717, + "loss": 0.0006, + "step": 5766 + }, + { + "epoch": 86.07, + "learning_rate": 0.00015042105263157895, + "loss": 0.4448, + "step": 5767 + }, + { + "epoch": 86.09, + "learning_rate": 0.0001503859649122807, + "loss": 0.0009, + "step": 5768 + }, + { + "epoch": 86.1, + "learning_rate": 0.00015035087719298244, + "loss": 0.0669, + "step": 5769 + }, + { + "epoch": 86.12, + "learning_rate": 0.00015031578947368422, + "loss": 0.055, + "step": 5770 + }, + { + "epoch": 86.13, + "learning_rate": 0.00015028070175438597, + "loss": 0.0017, + "step": 5771 + }, + { + "epoch": 86.15, + "learning_rate": 0.0001502456140350877, + "loss": 0.0013, + "step": 5772 + }, + { + "epoch": 86.16, + "learning_rate": 0.00015021052631578944, + "loss": 0.0008, + "step": 5773 + }, + { + "epoch": 86.18, + "learning_rate": 0.00015017543859649122, + "loss": 0.0009, + "step": 5774 + }, + { + "epoch": 86.19, + "learning_rate": 0.00015014035087719296, + "loss": 0.0008, + "step": 5775 + }, + { + "epoch": 86.21, + "learning_rate": 0.0001501052631578947, + "loss": 0.1733, + "step": 5776 + }, + { + "epoch": 86.22, + "learning_rate": 0.00015007017543859646, + "loss": 0.0011, + "step": 5777 + }, + { + "epoch": 86.24, + "learning_rate": 0.00015003508771929824, + "loss": 0.0011, + "step": 5778 + }, + { + "epoch": 86.25, + "learning_rate": 0.00015, + "loss": 0.1676, + "step": 5779 + }, + { + "epoch": 86.27, + "learning_rate": 0.00014996491228070174, + "loss": 0.0156, + "step": 5780 + }, + { + "epoch": 86.28, + "learning_rate": 0.00014992982456140348, + "loss": 0.0034, + "step": 5781 + }, + { + "epoch": 86.3, + "learning_rate": 0.00014989473684210526, + "loss": 0.0028, + "step": 5782 + }, + { + "epoch": 86.31, + "learning_rate": 0.000149859649122807, + "loss": 0.0805, + "step": 5783 + }, + { + "epoch": 86.33, + "learning_rate": 0.00014982456140350876, + "loss": 0.0009, + "step": 5784 + }, + { + "epoch": 86.34, + "learning_rate": 0.0001497894736842105, + "loss": 0.001, + "step": 5785 + }, + { + "epoch": 86.36, + "learning_rate": 0.00014975438596491228, + "loss": 0.0014, + "step": 5786 + }, + { + "epoch": 86.37, + "learning_rate": 0.000149719298245614, + "loss": 0.0013, + "step": 5787 + }, + { + "epoch": 86.39, + "learning_rate": 0.00014968421052631578, + "loss": 0.004, + "step": 5788 + }, + { + "epoch": 86.4, + "learning_rate": 0.00014964912280701753, + "loss": 0.0011, + "step": 5789 + }, + { + "epoch": 86.42, + "learning_rate": 0.00014961403508771928, + "loss": 0.3142, + "step": 5790 + }, + { + "epoch": 86.43, + "learning_rate": 0.00014957894736842103, + "loss": 0.0419, + "step": 5791 + }, + { + "epoch": 86.45, + "learning_rate": 0.0001495438596491228, + "loss": 0.0111, + "step": 5792 + }, + { + "epoch": 86.46, + "learning_rate": 0.00014950877192982455, + "loss": 0.008, + "step": 5793 + }, + { + "epoch": 86.48, + "learning_rate": 0.0001494736842105263, + "loss": 0.0049, + "step": 5794 + }, + { + "epoch": 86.49, + "learning_rate": 0.00014943859649122808, + "loss": 0.1487, + "step": 5795 + }, + { + "epoch": 86.51, + "learning_rate": 0.0001494035087719298, + "loss": 0.0008, + "step": 5796 + }, + { + "epoch": 86.52, + "learning_rate": 0.00014936842105263157, + "loss": 0.0008, + "step": 5797 + }, + { + "epoch": 86.54, + "learning_rate": 0.00014933333333333332, + "loss": 0.001, + "step": 5798 + }, + { + "epoch": 86.55, + "learning_rate": 0.00014929824561403507, + "loss": 0.0466, + "step": 5799 + }, + { + "epoch": 86.57, + "learning_rate": 0.00014926315789473682, + "loss": 0.0162, + "step": 5800 + }, + { + "epoch": 86.57, + "eval_accuracy": 0.8663729809104258, + "eval_f1": 0.8667231212452812, + "eval_loss": 0.6782434582710266, + "eval_runtime": 344.5523, + "eval_samples_per_second": 11.859, + "eval_steps_per_second": 0.743, + "step": 5800 + }, + { + "epoch": 86.58, + "learning_rate": 0.0001492280701754386, + "loss": 0.1697, + "step": 5801 + }, + { + "epoch": 86.59, + "learning_rate": 0.00014919298245614034, + "loss": 0.0398, + "step": 5802 + }, + { + "epoch": 86.61, + "learning_rate": 0.0001491578947368421, + "loss": 0.0209, + "step": 5803 + }, + { + "epoch": 86.62, + "learning_rate": 0.00014912280701754384, + "loss": 0.001, + "step": 5804 + }, + { + "epoch": 86.64, + "learning_rate": 0.0001490877192982456, + "loss": 0.0625, + "step": 5805 + }, + { + "epoch": 86.65, + "learning_rate": 0.00014905263157894737, + "loss": 0.0015, + "step": 5806 + }, + { + "epoch": 86.67, + "learning_rate": 0.00014901754385964912, + "loss": 0.1041, + "step": 5807 + }, + { + "epoch": 86.68, + "learning_rate": 0.00014898245614035086, + "loss": 0.0991, + "step": 5808 + }, + { + "epoch": 86.7, + "learning_rate": 0.0001489473684210526, + "loss": 0.0031, + "step": 5809 + }, + { + "epoch": 86.71, + "learning_rate": 0.0001489122807017544, + "loss": 0.0017, + "step": 5810 + }, + { + "epoch": 86.73, + "learning_rate": 0.0001488771929824561, + "loss": 0.0084, + "step": 5811 + }, + { + "epoch": 86.74, + "learning_rate": 0.00014884210526315789, + "loss": 0.0009, + "step": 5812 + }, + { + "epoch": 86.76, + "learning_rate": 0.00014880701754385964, + "loss": 0.0012, + "step": 5813 + }, + { + "epoch": 86.77, + "learning_rate": 0.00014877192982456138, + "loss": 0.0019, + "step": 5814 + }, + { + "epoch": 86.79, + "learning_rate": 0.00014873684210526313, + "loss": 0.0017, + "step": 5815 + }, + { + "epoch": 86.8, + "learning_rate": 0.0001487017543859649, + "loss": 0.0127, + "step": 5816 + }, + { + "epoch": 86.82, + "learning_rate": 0.00014866666666666666, + "loss": 0.0011, + "step": 5817 + }, + { + "epoch": 86.83, + "learning_rate": 0.0001486315789473684, + "loss": 0.0011, + "step": 5818 + }, + { + "epoch": 86.85, + "learning_rate": 0.00014859649122807018, + "loss": 0.2392, + "step": 5819 + }, + { + "epoch": 86.86, + "learning_rate": 0.0001485614035087719, + "loss": 0.0805, + "step": 5820 + }, + { + "epoch": 86.88, + "learning_rate": 0.00014852631578947368, + "loss": 0.0014, + "step": 5821 + }, + { + "epoch": 86.89, + "learning_rate": 0.00014849122807017543, + "loss": 0.0017, + "step": 5822 + }, + { + "epoch": 86.91, + "learning_rate": 0.00014845614035087718, + "loss": 0.3464, + "step": 5823 + }, + { + "epoch": 86.92, + "learning_rate": 0.00014842105263157893, + "loss": 0.0017, + "step": 5824 + }, + { + "epoch": 86.94, + "learning_rate": 0.0001483859649122807, + "loss": 0.0026, + "step": 5825 + }, + { + "epoch": 86.95, + "learning_rate": 0.00014835087719298242, + "loss": 0.0041, + "step": 5826 + }, + { + "epoch": 86.97, + "learning_rate": 0.0001483157894736842, + "loss": 0.0068, + "step": 5827 + }, + { + "epoch": 86.98, + "learning_rate": 0.00014828070175438595, + "loss": 0.0137, + "step": 5828 + }, + { + "epoch": 87.0, + "learning_rate": 0.0001482456140350877, + "loss": 0.1182, + "step": 5829 + }, + { + "epoch": 87.01, + "learning_rate": 0.00014821052631578947, + "loss": 0.0045, + "step": 5830 + }, + { + "epoch": 87.03, + "learning_rate": 0.00014817543859649122, + "loss": 0.0103, + "step": 5831 + }, + { + "epoch": 87.04, + "learning_rate": 0.00014814035087719297, + "loss": 0.0048, + "step": 5832 + }, + { + "epoch": 87.06, + "learning_rate": 0.00014810526315789472, + "loss": 0.0839, + "step": 5833 + }, + { + "epoch": 87.07, + "learning_rate": 0.0001480701754385965, + "loss": 0.0019, + "step": 5834 + }, + { + "epoch": 87.09, + "learning_rate": 0.00014803508771929822, + "loss": 0.0032, + "step": 5835 + }, + { + "epoch": 87.1, + "learning_rate": 0.000148, + "loss": 0.0013, + "step": 5836 + }, + { + "epoch": 87.12, + "learning_rate": 0.00014796491228070174, + "loss": 0.0122, + "step": 5837 + }, + { + "epoch": 87.13, + "learning_rate": 0.0001479298245614035, + "loss": 0.024, + "step": 5838 + }, + { + "epoch": 87.15, + "learning_rate": 0.00014789473684210524, + "loss": 0.1766, + "step": 5839 + }, + { + "epoch": 87.16, + "learning_rate": 0.00014785964912280701, + "loss": 0.0011, + "step": 5840 + }, + { + "epoch": 87.18, + "learning_rate": 0.00014782456140350876, + "loss": 0.1159, + "step": 5841 + }, + { + "epoch": 87.19, + "learning_rate": 0.0001477894736842105, + "loss": 0.0019, + "step": 5842 + }, + { + "epoch": 87.21, + "learning_rate": 0.0001477543859649123, + "loss": 0.0011, + "step": 5843 + }, + { + "epoch": 87.22, + "learning_rate": 0.000147719298245614, + "loss": 0.0013, + "step": 5844 + }, + { + "epoch": 87.24, + "learning_rate": 0.00014768421052631579, + "loss": 0.0013, + "step": 5845 + }, + { + "epoch": 87.25, + "learning_rate": 0.00014764912280701753, + "loss": 0.0241, + "step": 5846 + }, + { + "epoch": 87.27, + "learning_rate": 0.00014761403508771928, + "loss": 0.0051, + "step": 5847 + }, + { + "epoch": 87.28, + "learning_rate": 0.00014757894736842103, + "loss": 0.0034, + "step": 5848 + }, + { + "epoch": 87.3, + "learning_rate": 0.0001475438596491228, + "loss": 0.0059, + "step": 5849 + }, + { + "epoch": 87.31, + "learning_rate": 0.00014750877192982453, + "loss": 0.002, + "step": 5850 + }, + { + "epoch": 87.33, + "learning_rate": 0.0001474736842105263, + "loss": 0.079, + "step": 5851 + }, + { + "epoch": 87.34, + "learning_rate": 0.00014743859649122805, + "loss": 0.0011, + "step": 5852 + }, + { + "epoch": 87.36, + "learning_rate": 0.0001474035087719298, + "loss": 0.0013, + "step": 5853 + }, + { + "epoch": 87.37, + "learning_rate": 0.00014736842105263155, + "loss": 0.004, + "step": 5854 + }, + { + "epoch": 87.39, + "learning_rate": 0.00014733333333333333, + "loss": 0.23, + "step": 5855 + }, + { + "epoch": 87.4, + "learning_rate": 0.00014729824561403508, + "loss": 0.0115, + "step": 5856 + }, + { + "epoch": 87.42, + "learning_rate": 0.00014726315789473683, + "loss": 0.0328, + "step": 5857 + }, + { + "epoch": 87.43, + "learning_rate": 0.0001472280701754386, + "loss": 0.0018, + "step": 5858 + }, + { + "epoch": 87.45, + "learning_rate": 0.00014719298245614032, + "loss": 0.0015, + "step": 5859 + }, + { + "epoch": 87.46, + "learning_rate": 0.0001471578947368421, + "loss": 0.0015, + "step": 5860 + }, + { + "epoch": 87.48, + "learning_rate": 0.00014712280701754385, + "loss": 0.0011, + "step": 5861 + }, + { + "epoch": 87.49, + "learning_rate": 0.0001470877192982456, + "loss": 0.0487, + "step": 5862 + }, + { + "epoch": 87.51, + "learning_rate": 0.00014705263157894734, + "loss": 0.0065, + "step": 5863 + }, + { + "epoch": 87.52, + "learning_rate": 0.00014701754385964912, + "loss": 0.0877, + "step": 5864 + }, + { + "epoch": 87.54, + "learning_rate": 0.00014698245614035087, + "loss": 0.0022, + "step": 5865 + }, + { + "epoch": 87.55, + "learning_rate": 0.00014694736842105262, + "loss": 0.0031, + "step": 5866 + }, + { + "epoch": 87.57, + "learning_rate": 0.0001469122807017544, + "loss": 0.2098, + "step": 5867 + }, + { + "epoch": 87.58, + "learning_rate": 0.00014687719298245612, + "loss": 0.002, + "step": 5868 + }, + { + "epoch": 87.59, + "learning_rate": 0.0001468421052631579, + "loss": 0.0015, + "step": 5869 + }, + { + "epoch": 87.61, + "learning_rate": 0.00014680701754385964, + "loss": 0.0051, + "step": 5870 + }, + { + "epoch": 87.62, + "learning_rate": 0.0001467719298245614, + "loss": 0.0025, + "step": 5871 + }, + { + "epoch": 87.64, + "learning_rate": 0.00014673684210526314, + "loss": 0.0078, + "step": 5872 + }, + { + "epoch": 87.65, + "learning_rate": 0.00014670175438596491, + "loss": 0.0011, + "step": 5873 + }, + { + "epoch": 87.67, + "learning_rate": 0.00014666666666666664, + "loss": 0.0079, + "step": 5874 + }, + { + "epoch": 87.68, + "learning_rate": 0.0001466315789473684, + "loss": 0.0046, + "step": 5875 + }, + { + "epoch": 87.7, + "learning_rate": 0.00014659649122807016, + "loss": 0.0021, + "step": 5876 + }, + { + "epoch": 87.71, + "learning_rate": 0.0001465614035087719, + "loss": 0.0013, + "step": 5877 + }, + { + "epoch": 87.73, + "learning_rate": 0.00014652631578947366, + "loss": 0.0009, + "step": 5878 + }, + { + "epoch": 87.74, + "learning_rate": 0.00014649122807017543, + "loss": 0.0011, + "step": 5879 + }, + { + "epoch": 87.76, + "learning_rate": 0.00014645614035087718, + "loss": 0.0012, + "step": 5880 + }, + { + "epoch": 87.77, + "learning_rate": 0.00014642105263157893, + "loss": 0.0009, + "step": 5881 + }, + { + "epoch": 87.79, + "learning_rate": 0.0001463859649122807, + "loss": 0.0008, + "step": 5882 + }, + { + "epoch": 87.8, + "learning_rate": 0.00014635087719298243, + "loss": 0.0012, + "step": 5883 + }, + { + "epoch": 87.82, + "learning_rate": 0.0001463157894736842, + "loss": 0.075, + "step": 5884 + }, + { + "epoch": 87.83, + "learning_rate": 0.00014628070175438595, + "loss": 0.003, + "step": 5885 + }, + { + "epoch": 87.85, + "learning_rate": 0.0001462456140350877, + "loss": 0.0025, + "step": 5886 + }, + { + "epoch": 87.86, + "learning_rate": 0.00014621052631578945, + "loss": 0.001, + "step": 5887 + }, + { + "epoch": 87.88, + "learning_rate": 0.00014617543859649123, + "loss": 0.0007, + "step": 5888 + }, + { + "epoch": 87.89, + "learning_rate": 0.00014614035087719298, + "loss": 0.0009, + "step": 5889 + }, + { + "epoch": 87.91, + "learning_rate": 0.00014610526315789472, + "loss": 0.0394, + "step": 5890 + }, + { + "epoch": 87.92, + "learning_rate": 0.00014607017543859647, + "loss": 0.0007, + "step": 5891 + }, + { + "epoch": 87.94, + "learning_rate": 0.00014603508771929822, + "loss": 0.0369, + "step": 5892 + }, + { + "epoch": 87.95, + "learning_rate": 0.000146, + "loss": 0.0059, + "step": 5893 + }, + { + "epoch": 87.97, + "learning_rate": 0.00014596491228070175, + "loss": 0.0013, + "step": 5894 + }, + { + "epoch": 87.98, + "learning_rate": 0.0001459298245614035, + "loss": 0.2778, + "step": 5895 + }, + { + "epoch": 88.0, + "learning_rate": 0.00014589473684210524, + "loss": 0.0017, + "step": 5896 + }, + { + "epoch": 88.01, + "learning_rate": 0.00014585964912280702, + "loss": 0.0066, + "step": 5897 + }, + { + "epoch": 88.03, + "learning_rate": 0.00014582456140350874, + "loss": 0.0257, + "step": 5898 + }, + { + "epoch": 88.04, + "learning_rate": 0.00014578947368421052, + "loss": 0.0007, + "step": 5899 + }, + { + "epoch": 88.06, + "learning_rate": 0.00014575438596491227, + "loss": 0.1894, + "step": 5900 + }, + { + "epoch": 88.07, + "learning_rate": 0.00014571929824561402, + "loss": 0.1666, + "step": 5901 + }, + { + "epoch": 88.09, + "learning_rate": 0.00014568421052631576, + "loss": 0.0976, + "step": 5902 + }, + { + "epoch": 88.1, + "learning_rate": 0.00014564912280701754, + "loss": 0.0007, + "step": 5903 + }, + { + "epoch": 88.12, + "learning_rate": 0.0001456140350877193, + "loss": 0.0009, + "step": 5904 + }, + { + "epoch": 88.13, + "learning_rate": 0.00014557894736842104, + "loss": 0.0785, + "step": 5905 + }, + { + "epoch": 88.15, + "learning_rate": 0.0001455438596491228, + "loss": 0.1406, + "step": 5906 + }, + { + "epoch": 88.16, + "learning_rate": 0.00014550877192982454, + "loss": 0.0008, + "step": 5907 + }, + { + "epoch": 88.18, + "learning_rate": 0.0001454736842105263, + "loss": 0.0007, + "step": 5908 + }, + { + "epoch": 88.19, + "learning_rate": 0.00014543859649122806, + "loss": 0.0007, + "step": 5909 + }, + { + "epoch": 88.21, + "learning_rate": 0.0001454035087719298, + "loss": 0.1557, + "step": 5910 + }, + { + "epoch": 88.22, + "learning_rate": 0.00014536842105263156, + "loss": 0.0011, + "step": 5911 + }, + { + "epoch": 88.24, + "learning_rate": 0.00014533333333333333, + "loss": 0.2107, + "step": 5912 + }, + { + "epoch": 88.25, + "learning_rate": 0.00014529824561403508, + "loss": 0.2468, + "step": 5913 + }, + { + "epoch": 88.27, + "learning_rate": 0.00014526315789473683, + "loss": 0.0007, + "step": 5914 + }, + { + "epoch": 88.28, + "learning_rate": 0.00014522807017543858, + "loss": 0.0032, + "step": 5915 + }, + { + "epoch": 88.3, + "learning_rate": 0.00014519298245614033, + "loss": 0.002, + "step": 5916 + }, + { + "epoch": 88.31, + "learning_rate": 0.0001451578947368421, + "loss": 0.0035, + "step": 5917 + }, + { + "epoch": 88.33, + "learning_rate": 0.00014512280701754385, + "loss": 0.0014, + "step": 5918 + }, + { + "epoch": 88.34, + "learning_rate": 0.0001450877192982456, + "loss": 0.0042, + "step": 5919 + }, + { + "epoch": 88.36, + "learning_rate": 0.00014505263157894735, + "loss": 0.0015, + "step": 5920 + }, + { + "epoch": 88.37, + "learning_rate": 0.00014501754385964913, + "loss": 0.0027, + "step": 5921 + }, + { + "epoch": 88.39, + "learning_rate": 0.00014498245614035085, + "loss": 0.2759, + "step": 5922 + }, + { + "epoch": 88.4, + "learning_rate": 0.00014494736842105262, + "loss": 0.0025, + "step": 5923 + }, + { + "epoch": 88.42, + "learning_rate": 0.00014491228070175437, + "loss": 0.0016, + "step": 5924 + }, + { + "epoch": 88.43, + "learning_rate": 0.00014487719298245612, + "loss": 0.0013, + "step": 5925 + }, + { + "epoch": 88.45, + "learning_rate": 0.00014484210526315787, + "loss": 0.0033, + "step": 5926 + }, + { + "epoch": 88.46, + "learning_rate": 0.00014480701754385965, + "loss": 0.0038, + "step": 5927 + }, + { + "epoch": 88.48, + "learning_rate": 0.0001447719298245614, + "loss": 0.1503, + "step": 5928 + }, + { + "epoch": 88.49, + "learning_rate": 0.00014473684210526314, + "loss": 0.0013, + "step": 5929 + }, + { + "epoch": 88.51, + "learning_rate": 0.00014470175438596492, + "loss": 0.0019, + "step": 5930 + }, + { + "epoch": 88.52, + "learning_rate": 0.00014466666666666664, + "loss": 0.0517, + "step": 5931 + }, + { + "epoch": 88.54, + "learning_rate": 0.00014463157894736842, + "loss": 0.0095, + "step": 5932 + }, + { + "epoch": 88.55, + "learning_rate": 0.00014459649122807017, + "loss": 0.0308, + "step": 5933 + }, + { + "epoch": 88.57, + "learning_rate": 0.00014456140350877191, + "loss": 0.0025, + "step": 5934 + }, + { + "epoch": 88.58, + "learning_rate": 0.00014452631578947366, + "loss": 0.0047, + "step": 5935 + }, + { + "epoch": 88.59, + "learning_rate": 0.00014449122807017544, + "loss": 0.0013, + "step": 5936 + }, + { + "epoch": 88.61, + "learning_rate": 0.0001444561403508772, + "loss": 0.004, + "step": 5937 + }, + { + "epoch": 88.62, + "learning_rate": 0.00014442105263157894, + "loss": 0.0042, + "step": 5938 + }, + { + "epoch": 88.64, + "learning_rate": 0.00014438596491228069, + "loss": 0.2497, + "step": 5939 + }, + { + "epoch": 88.65, + "learning_rate": 0.00014435087719298243, + "loss": 0.0088, + "step": 5940 + }, + { + "epoch": 88.67, + "learning_rate": 0.00014431578947368418, + "loss": 0.0128, + "step": 5941 + }, + { + "epoch": 88.68, + "learning_rate": 0.00014428070175438596, + "loss": 0.0015, + "step": 5942 + }, + { + "epoch": 88.7, + "learning_rate": 0.0001442456140350877, + "loss": 0.0049, + "step": 5943 + }, + { + "epoch": 88.71, + "learning_rate": 0.00014421052631578946, + "loss": 0.0013, + "step": 5944 + }, + { + "epoch": 88.73, + "learning_rate": 0.00014417543859649123, + "loss": 0.0062, + "step": 5945 + }, + { + "epoch": 88.74, + "learning_rate": 0.00014414035087719295, + "loss": 0.0102, + "step": 5946 + }, + { + "epoch": 88.76, + "learning_rate": 0.00014410526315789473, + "loss": 0.1041, + "step": 5947 + }, + { + "epoch": 88.77, + "learning_rate": 0.00014407017543859648, + "loss": 0.0318, + "step": 5948 + }, + { + "epoch": 88.79, + "learning_rate": 0.00014403508771929823, + "loss": 0.1817, + "step": 5949 + }, + { + "epoch": 88.8, + "learning_rate": 0.00014399999999999998, + "loss": 0.0012, + "step": 5950 + }, + { + "epoch": 88.82, + "learning_rate": 0.00014396491228070175, + "loss": 0.2596, + "step": 5951 + }, + { + "epoch": 88.83, + "learning_rate": 0.0001439298245614035, + "loss": 0.0012, + "step": 5952 + }, + { + "epoch": 88.85, + "learning_rate": 0.00014389473684210525, + "loss": 0.162, + "step": 5953 + }, + { + "epoch": 88.86, + "learning_rate": 0.000143859649122807, + "loss": 0.0988, + "step": 5954 + }, + { + "epoch": 88.88, + "learning_rate": 0.00014382456140350875, + "loss": 0.0008, + "step": 5955 + }, + { + "epoch": 88.89, + "learning_rate": 0.00014378947368421052, + "loss": 0.0078, + "step": 5956 + }, + { + "epoch": 88.91, + "learning_rate": 0.00014375438596491227, + "loss": 0.0186, + "step": 5957 + }, + { + "epoch": 88.92, + "learning_rate": 0.00014371929824561402, + "loss": 0.0009, + "step": 5958 + }, + { + "epoch": 88.94, + "learning_rate": 0.00014368421052631577, + "loss": 0.0046, + "step": 5959 + }, + { + "epoch": 88.95, + "learning_rate": 0.00014364912280701755, + "loss": 0.0011, + "step": 5960 + }, + { + "epoch": 88.97, + "learning_rate": 0.0001436140350877193, + "loss": 0.001, + "step": 5961 + }, + { + "epoch": 88.98, + "learning_rate": 0.00014357894736842104, + "loss": 0.0008, + "step": 5962 + }, + { + "epoch": 89.0, + "learning_rate": 0.0001435438596491228, + "loss": 0.0851, + "step": 5963 + }, + { + "epoch": 89.01, + "learning_rate": 0.00014350877192982454, + "loss": 0.0031, + "step": 5964 + }, + { + "epoch": 89.03, + "learning_rate": 0.0001434736842105263, + "loss": 0.0015, + "step": 5965 + }, + { + "epoch": 89.04, + "learning_rate": 0.00014343859649122807, + "loss": 0.2782, + "step": 5966 + }, + { + "epoch": 89.06, + "learning_rate": 0.00014340350877192981, + "loss": 0.0386, + "step": 5967 + }, + { + "epoch": 89.07, + "learning_rate": 0.00014336842105263156, + "loss": 0.2128, + "step": 5968 + }, + { + "epoch": 89.09, + "learning_rate": 0.00014333333333333334, + "loss": 0.001, + "step": 5969 + }, + { + "epoch": 89.1, + "learning_rate": 0.00014329824561403506, + "loss": 0.0344, + "step": 5970 + }, + { + "epoch": 89.12, + "learning_rate": 0.00014326315789473684, + "loss": 0.001, + "step": 5971 + }, + { + "epoch": 89.13, + "learning_rate": 0.00014322807017543859, + "loss": 0.0008, + "step": 5972 + }, + { + "epoch": 89.15, + "learning_rate": 0.00014319298245614033, + "loss": 0.0195, + "step": 5973 + }, + { + "epoch": 89.16, + "learning_rate": 0.00014315789473684208, + "loss": 0.0012, + "step": 5974 + }, + { + "epoch": 89.18, + "learning_rate": 0.00014312280701754386, + "loss": 0.0016, + "step": 5975 + }, + { + "epoch": 89.19, + "learning_rate": 0.0001430877192982456, + "loss": 0.0053, + "step": 5976 + }, + { + "epoch": 89.21, + "learning_rate": 0.00014305263157894736, + "loss": 0.0014, + "step": 5977 + }, + { + "epoch": 89.22, + "learning_rate": 0.0001430175438596491, + "loss": 0.1234, + "step": 5978 + }, + { + "epoch": 89.24, + "learning_rate": 0.00014298245614035085, + "loss": 0.0009, + "step": 5979 + }, + { + "epoch": 89.25, + "learning_rate": 0.00014294736842105263, + "loss": 0.0274, + "step": 5980 + }, + { + "epoch": 89.27, + "learning_rate": 0.00014291228070175438, + "loss": 0.0013, + "step": 5981 + }, + { + "epoch": 89.28, + "learning_rate": 0.00014287719298245613, + "loss": 0.0067, + "step": 5982 + }, + { + "epoch": 89.3, + "learning_rate": 0.00014284210526315788, + "loss": 0.0061, + "step": 5983 + }, + { + "epoch": 89.31, + "learning_rate": 0.00014280701754385965, + "loss": 0.1048, + "step": 5984 + }, + { + "epoch": 89.33, + "learning_rate": 0.0001427719298245614, + "loss": 0.2524, + "step": 5985 + }, + { + "epoch": 89.34, + "learning_rate": 0.00014273684210526315, + "loss": 0.0997, + "step": 5986 + }, + { + "epoch": 89.36, + "learning_rate": 0.0001427017543859649, + "loss": 0.0029, + "step": 5987 + }, + { + "epoch": 89.37, + "learning_rate": 0.00014266666666666665, + "loss": 0.1637, + "step": 5988 + }, + { + "epoch": 89.39, + "learning_rate": 0.0001426315789473684, + "loss": 0.0022, + "step": 5989 + }, + { + "epoch": 89.4, + "learning_rate": 0.00014259649122807017, + "loss": 0.0015, + "step": 5990 + }, + { + "epoch": 89.42, + "learning_rate": 0.00014256140350877192, + "loss": 0.0012, + "step": 5991 + }, + { + "epoch": 89.43, + "learning_rate": 0.00014252631578947367, + "loss": 0.1172, + "step": 5992 + }, + { + "epoch": 89.45, + "learning_rate": 0.00014249122807017545, + "loss": 0.1637, + "step": 5993 + }, + { + "epoch": 89.46, + "learning_rate": 0.00014245614035087717, + "loss": 0.0271, + "step": 5994 + }, + { + "epoch": 89.48, + "learning_rate": 0.00014242105263157894, + "loss": 0.0028, + "step": 5995 + }, + { + "epoch": 89.49, + "learning_rate": 0.0001423859649122807, + "loss": 0.2502, + "step": 5996 + }, + { + "epoch": 89.51, + "learning_rate": 0.00014235087719298244, + "loss": 0.0021, + "step": 5997 + }, + { + "epoch": 89.52, + "learning_rate": 0.0001423157894736842, + "loss": 0.1202, + "step": 5998 + }, + { + "epoch": 89.54, + "learning_rate": 0.00014228070175438596, + "loss": 0.1288, + "step": 5999 + }, + { + "epoch": 89.55, + "learning_rate": 0.00014224561403508771, + "loss": 0.1929, + "step": 6000 + }, + { + "epoch": 89.55, + "eval_accuracy": 0.860009789525208, + "eval_f1": 0.8615169079479937, + "eval_loss": 0.6371402144432068, + "eval_runtime": 345.6404, + "eval_samples_per_second": 11.822, + "eval_steps_per_second": 0.741, + "step": 6000 + }, + { + "epoch": 89.57, + "learning_rate": 0.00014221052631578946, + "loss": 0.0023, + "step": 6001 + }, + { + "epoch": 89.58, + "learning_rate": 0.0001421754385964912, + "loss": 0.1027, + "step": 6002 + }, + { + "epoch": 89.59, + "learning_rate": 0.00014214035087719296, + "loss": 0.0391, + "step": 6003 + }, + { + "epoch": 89.61, + "learning_rate": 0.0001421052631578947, + "loss": 0.0332, + "step": 6004 + }, + { + "epoch": 89.62, + "learning_rate": 0.00014207017543859648, + "loss": 0.0038, + "step": 6005 + }, + { + "epoch": 89.64, + "learning_rate": 0.00014203508771929823, + "loss": 0.0185, + "step": 6006 + }, + { + "epoch": 89.65, + "learning_rate": 0.00014199999999999998, + "loss": 0.0101, + "step": 6007 + }, + { + "epoch": 89.67, + "learning_rate": 0.00014196491228070176, + "loss": 0.0023, + "step": 6008 + }, + { + "epoch": 89.68, + "learning_rate": 0.0001419298245614035, + "loss": 0.0232, + "step": 6009 + }, + { + "epoch": 89.7, + "learning_rate": 0.00014189473684210526, + "loss": 0.1696, + "step": 6010 + }, + { + "epoch": 89.71, + "learning_rate": 0.000141859649122807, + "loss": 0.0579, + "step": 6011 + }, + { + "epoch": 89.73, + "learning_rate": 0.00014182456140350875, + "loss": 0.0025, + "step": 6012 + }, + { + "epoch": 89.74, + "learning_rate": 0.0001417894736842105, + "loss": 0.0084, + "step": 6013 + }, + { + "epoch": 89.76, + "learning_rate": 0.00014175438596491228, + "loss": 0.0014, + "step": 6014 + }, + { + "epoch": 89.77, + "learning_rate": 0.00014171929824561403, + "loss": 0.0051, + "step": 6015 + }, + { + "epoch": 89.79, + "learning_rate": 0.00014168421052631578, + "loss": 0.0147, + "step": 6016 + }, + { + "epoch": 89.8, + "learning_rate": 0.00014164912280701752, + "loss": 0.0327, + "step": 6017 + }, + { + "epoch": 89.82, + "learning_rate": 0.00014161403508771927, + "loss": 0.0069, + "step": 6018 + }, + { + "epoch": 89.83, + "learning_rate": 0.00014157894736842105, + "loss": 0.011, + "step": 6019 + }, + { + "epoch": 89.85, + "learning_rate": 0.0001415438596491228, + "loss": 0.016, + "step": 6020 + }, + { + "epoch": 89.86, + "learning_rate": 0.00014150877192982455, + "loss": 0.0099, + "step": 6021 + }, + { + "epoch": 89.88, + "learning_rate": 0.0001414736842105263, + "loss": 0.0036, + "step": 6022 + }, + { + "epoch": 89.89, + "learning_rate": 0.00014143859649122807, + "loss": 0.0856, + "step": 6023 + }, + { + "epoch": 89.91, + "learning_rate": 0.00014140350877192982, + "loss": 0.0012, + "step": 6024 + }, + { + "epoch": 89.92, + "learning_rate": 0.00014136842105263157, + "loss": 0.0026, + "step": 6025 + }, + { + "epoch": 89.94, + "learning_rate": 0.00014133333333333332, + "loss": 0.225, + "step": 6026 + }, + { + "epoch": 89.95, + "learning_rate": 0.00014129824561403507, + "loss": 0.2369, + "step": 6027 + }, + { + "epoch": 89.97, + "learning_rate": 0.00014126315789473681, + "loss": 0.0025, + "step": 6028 + }, + { + "epoch": 89.98, + "learning_rate": 0.0001412280701754386, + "loss": 0.0009, + "step": 6029 + }, + { + "epoch": 90.0, + "learning_rate": 0.00014119298245614034, + "loss": 0.0011, + "step": 6030 + }, + { + "epoch": 90.01, + "learning_rate": 0.0001411578947368421, + "loss": 0.0779, + "step": 6031 + }, + { + "epoch": 90.03, + "learning_rate": 0.00014112280701754386, + "loss": 0.127, + "step": 6032 + }, + { + "epoch": 90.04, + "learning_rate": 0.0001410877192982456, + "loss": 0.1497, + "step": 6033 + }, + { + "epoch": 90.06, + "learning_rate": 0.00014105263157894736, + "loss": 0.0013, + "step": 6034 + }, + { + "epoch": 90.07, + "learning_rate": 0.0001410175438596491, + "loss": 0.0189, + "step": 6035 + }, + { + "epoch": 90.09, + "learning_rate": 0.00014098245614035086, + "loss": 0.2014, + "step": 6036 + }, + { + "epoch": 90.1, + "learning_rate": 0.0001409473684210526, + "loss": 0.0027, + "step": 6037 + }, + { + "epoch": 90.12, + "learning_rate": 0.00014091228070175438, + "loss": 0.0085, + "step": 6038 + }, + { + "epoch": 90.13, + "learning_rate": 0.00014087719298245613, + "loss": 0.0017, + "step": 6039 + }, + { + "epoch": 90.15, + "learning_rate": 0.00014084210526315788, + "loss": 0.0009, + "step": 6040 + }, + { + "epoch": 90.16, + "learning_rate": 0.00014080701754385963, + "loss": 0.0031, + "step": 6041 + }, + { + "epoch": 90.18, + "learning_rate": 0.00014077192982456138, + "loss": 0.0009, + "step": 6042 + }, + { + "epoch": 90.19, + "learning_rate": 0.00014073684210526315, + "loss": 0.0007, + "step": 6043 + }, + { + "epoch": 90.21, + "learning_rate": 0.0001407017543859649, + "loss": 0.0053, + "step": 6044 + }, + { + "epoch": 90.22, + "learning_rate": 0.00014066666666666665, + "loss": 0.0176, + "step": 6045 + }, + { + "epoch": 90.24, + "learning_rate": 0.0001406315789473684, + "loss": 0.0015, + "step": 6046 + }, + { + "epoch": 90.25, + "learning_rate": 0.00014059649122807018, + "loss": 0.0021, + "step": 6047 + }, + { + "epoch": 90.27, + "learning_rate": 0.00014056140350877193, + "loss": 0.0071, + "step": 6048 + }, + { + "epoch": 90.28, + "learning_rate": 0.00014052631578947367, + "loss": 0.0008, + "step": 6049 + }, + { + "epoch": 90.3, + "learning_rate": 0.00014049122807017542, + "loss": 0.0057, + "step": 6050 + }, + { + "epoch": 90.31, + "learning_rate": 0.00014045614035087717, + "loss": 0.0154, + "step": 6051 + }, + { + "epoch": 90.33, + "learning_rate": 0.00014042105263157892, + "loss": 0.001, + "step": 6052 + }, + { + "epoch": 90.34, + "learning_rate": 0.0001403859649122807, + "loss": 0.001, + "step": 6053 + }, + { + "epoch": 90.36, + "learning_rate": 0.00014035087719298245, + "loss": 0.0235, + "step": 6054 + }, + { + "epoch": 90.37, + "learning_rate": 0.0001403157894736842, + "loss": 0.0128, + "step": 6055 + }, + { + "epoch": 90.39, + "learning_rate": 0.00014028070175438597, + "loss": 0.0008, + "step": 6056 + }, + { + "epoch": 90.4, + "learning_rate": 0.0001402456140350877, + "loss": 0.0027, + "step": 6057 + }, + { + "epoch": 90.42, + "learning_rate": 0.00014021052631578947, + "loss": 0.0024, + "step": 6058 + }, + { + "epoch": 90.43, + "learning_rate": 0.00014017543859649122, + "loss": 0.0008, + "step": 6059 + }, + { + "epoch": 90.45, + "learning_rate": 0.00014014035087719297, + "loss": 0.0984, + "step": 6060 + }, + { + "epoch": 90.46, + "learning_rate": 0.00014010526315789471, + "loss": 0.0008, + "step": 6061 + }, + { + "epoch": 90.48, + "learning_rate": 0.0001400701754385965, + "loss": 0.0008, + "step": 6062 + }, + { + "epoch": 90.49, + "learning_rate": 0.00014003508771929824, + "loss": 0.0664, + "step": 6063 + }, + { + "epoch": 90.51, + "learning_rate": 0.00014, + "loss": 0.0387, + "step": 6064 + }, + { + "epoch": 90.52, + "learning_rate": 0.00013996491228070174, + "loss": 0.0026, + "step": 6065 + }, + { + "epoch": 90.54, + "learning_rate": 0.00013992982456140349, + "loss": 0.003, + "step": 6066 + }, + { + "epoch": 90.55, + "learning_rate": 0.00013989473684210523, + "loss": 0.0011, + "step": 6067 + }, + { + "epoch": 90.57, + "learning_rate": 0.000139859649122807, + "loss": 0.0022, + "step": 6068 + }, + { + "epoch": 90.58, + "learning_rate": 0.00013982456140350876, + "loss": 0.001, + "step": 6069 + }, + { + "epoch": 90.59, + "learning_rate": 0.0001397894736842105, + "loss": 0.0246, + "step": 6070 + }, + { + "epoch": 90.61, + "learning_rate": 0.00013975438596491228, + "loss": 0.0157, + "step": 6071 + }, + { + "epoch": 90.62, + "learning_rate": 0.00013971929824561403, + "loss": 0.001, + "step": 6072 + }, + { + "epoch": 90.64, + "learning_rate": 0.00013968421052631578, + "loss": 0.0108, + "step": 6073 + }, + { + "epoch": 90.65, + "learning_rate": 0.00013964912280701753, + "loss": 0.0009, + "step": 6074 + }, + { + "epoch": 90.67, + "learning_rate": 0.00013961403508771928, + "loss": 0.0008, + "step": 6075 + }, + { + "epoch": 90.68, + "learning_rate": 0.00013957894736842103, + "loss": 0.0034, + "step": 6076 + }, + { + "epoch": 90.7, + "learning_rate": 0.0001395438596491228, + "loss": 0.0009, + "step": 6077 + }, + { + "epoch": 90.71, + "learning_rate": 0.00013950877192982455, + "loss": 0.0557, + "step": 6078 + }, + { + "epoch": 90.73, + "learning_rate": 0.0001394736842105263, + "loss": 0.0007, + "step": 6079 + }, + { + "epoch": 90.74, + "learning_rate": 0.00013943859649122808, + "loss": 0.0019, + "step": 6080 + }, + { + "epoch": 90.76, + "learning_rate": 0.0001394035087719298, + "loss": 0.0069, + "step": 6081 + }, + { + "epoch": 90.77, + "learning_rate": 0.00013936842105263157, + "loss": 0.0013, + "step": 6082 + }, + { + "epoch": 90.79, + "learning_rate": 0.00013933333333333332, + "loss": 0.0486, + "step": 6083 + }, + { + "epoch": 90.8, + "learning_rate": 0.00013929824561403507, + "loss": 0.0448, + "step": 6084 + }, + { + "epoch": 90.82, + "learning_rate": 0.00013926315789473682, + "loss": 0.0008, + "step": 6085 + }, + { + "epoch": 90.83, + "learning_rate": 0.0001392280701754386, + "loss": 0.0208, + "step": 6086 + }, + { + "epoch": 90.85, + "learning_rate": 0.00013919298245614035, + "loss": 0.0014, + "step": 6087 + }, + { + "epoch": 90.86, + "learning_rate": 0.0001391578947368421, + "loss": 0.258, + "step": 6088 + }, + { + "epoch": 90.88, + "learning_rate": 0.00013912280701754384, + "loss": 0.0007, + "step": 6089 + }, + { + "epoch": 90.89, + "learning_rate": 0.0001390877192982456, + "loss": 0.0034, + "step": 6090 + }, + { + "epoch": 90.91, + "learning_rate": 0.00013905263157894734, + "loss": 0.001, + "step": 6091 + }, + { + "epoch": 90.92, + "learning_rate": 0.00013901754385964912, + "loss": 0.0009, + "step": 6092 + }, + { + "epoch": 90.94, + "learning_rate": 0.00013898245614035086, + "loss": 0.0006, + "step": 6093 + }, + { + "epoch": 90.95, + "learning_rate": 0.00013894736842105261, + "loss": 0.0012, + "step": 6094 + }, + { + "epoch": 90.97, + "learning_rate": 0.0001389122807017544, + "loss": 0.001, + "step": 6095 + }, + { + "epoch": 90.98, + "learning_rate": 0.00013887719298245614, + "loss": 0.001, + "step": 6096 + }, + { + "epoch": 91.0, + "learning_rate": 0.0001388421052631579, + "loss": 0.0011, + "step": 6097 + }, + { + "epoch": 91.01, + "learning_rate": 0.00013880701754385964, + "loss": 0.0022, + "step": 6098 + }, + { + "epoch": 91.03, + "learning_rate": 0.00013877192982456138, + "loss": 0.0029, + "step": 6099 + }, + { + "epoch": 91.04, + "learning_rate": 0.00013873684210526313, + "loss": 0.0463, + "step": 6100 + }, + { + "epoch": 91.06, + "learning_rate": 0.0001387017543859649, + "loss": 0.0864, + "step": 6101 + }, + { + "epoch": 91.07, + "learning_rate": 0.00013866666666666666, + "loss": 0.001, + "step": 6102 + }, + { + "epoch": 91.09, + "learning_rate": 0.0001386315789473684, + "loss": 0.0009, + "step": 6103 + }, + { + "epoch": 91.1, + "learning_rate": 0.00013859649122807016, + "loss": 0.0079, + "step": 6104 + }, + { + "epoch": 91.12, + "learning_rate": 0.0001385614035087719, + "loss": 0.0109, + "step": 6105 + }, + { + "epoch": 91.13, + "learning_rate": 0.00013852631578947368, + "loss": 0.0017, + "step": 6106 + }, + { + "epoch": 91.15, + "learning_rate": 0.00013849122807017543, + "loss": 0.0009, + "step": 6107 + }, + { + "epoch": 91.16, + "learning_rate": 0.00013845614035087718, + "loss": 0.0028, + "step": 6108 + }, + { + "epoch": 91.18, + "learning_rate": 0.00013842105263157893, + "loss": 0.0007, + "step": 6109 + }, + { + "epoch": 91.19, + "learning_rate": 0.0001383859649122807, + "loss": 0.0013, + "step": 6110 + }, + { + "epoch": 91.21, + "learning_rate": 0.00013835087719298245, + "loss": 0.0051, + "step": 6111 + }, + { + "epoch": 91.22, + "learning_rate": 0.0001383157894736842, + "loss": 0.0016, + "step": 6112 + }, + { + "epoch": 91.24, + "learning_rate": 0.00013828070175438595, + "loss": 0.0112, + "step": 6113 + }, + { + "epoch": 91.25, + "learning_rate": 0.0001382456140350877, + "loss": 0.0008, + "step": 6114 + }, + { + "epoch": 91.27, + "learning_rate": 0.00013821052631578945, + "loss": 0.1127, + "step": 6115 + }, + { + "epoch": 91.28, + "learning_rate": 0.00013817543859649122, + "loss": 0.0011, + "step": 6116 + }, + { + "epoch": 91.3, + "learning_rate": 0.00013814035087719297, + "loss": 0.0027, + "step": 6117 + }, + { + "epoch": 91.31, + "learning_rate": 0.00013810526315789472, + "loss": 0.0007, + "step": 6118 + }, + { + "epoch": 91.33, + "learning_rate": 0.0001380701754385965, + "loss": 0.0555, + "step": 6119 + }, + { + "epoch": 91.34, + "learning_rate": 0.00013803508771929824, + "loss": 0.0025, + "step": 6120 + }, + { + "epoch": 91.36, + "learning_rate": 0.000138, + "loss": 0.0009, + "step": 6121 + }, + { + "epoch": 91.37, + "learning_rate": 0.00013796491228070174, + "loss": 0.0059, + "step": 6122 + }, + { + "epoch": 91.39, + "learning_rate": 0.0001379298245614035, + "loss": 0.0016, + "step": 6123 + }, + { + "epoch": 91.4, + "learning_rate": 0.00013789473684210524, + "loss": 0.0484, + "step": 6124 + }, + { + "epoch": 91.42, + "learning_rate": 0.00013785964912280702, + "loss": 0.0589, + "step": 6125 + }, + { + "epoch": 91.43, + "learning_rate": 0.00013782456140350876, + "loss": 0.0498, + "step": 6126 + }, + { + "epoch": 91.45, + "learning_rate": 0.0001377894736842105, + "loss": 0.0007, + "step": 6127 + }, + { + "epoch": 91.46, + "learning_rate": 0.00013775438596491226, + "loss": 0.0008, + "step": 6128 + }, + { + "epoch": 91.48, + "learning_rate": 0.000137719298245614, + "loss": 0.0011, + "step": 6129 + }, + { + "epoch": 91.49, + "learning_rate": 0.00013768421052631576, + "loss": 0.0008, + "step": 6130 + }, + { + "epoch": 91.51, + "learning_rate": 0.00013764912280701754, + "loss": 0.001, + "step": 6131 + }, + { + "epoch": 91.52, + "learning_rate": 0.00013761403508771928, + "loss": 0.0006, + "step": 6132 + }, + { + "epoch": 91.54, + "learning_rate": 0.00013757894736842103, + "loss": 0.0101, + "step": 6133 + }, + { + "epoch": 91.55, + "learning_rate": 0.0001375438596491228, + "loss": 0.0091, + "step": 6134 + }, + { + "epoch": 91.57, + "learning_rate": 0.00013750877192982456, + "loss": 0.0023, + "step": 6135 + }, + { + "epoch": 91.58, + "learning_rate": 0.0001374736842105263, + "loss": 0.001, + "step": 6136 + }, + { + "epoch": 91.59, + "learning_rate": 0.00013743859649122806, + "loss": 0.0009, + "step": 6137 + }, + { + "epoch": 91.61, + "learning_rate": 0.0001374035087719298, + "loss": 0.0023, + "step": 6138 + }, + { + "epoch": 91.62, + "learning_rate": 0.00013736842105263155, + "loss": 0.0014, + "step": 6139 + }, + { + "epoch": 91.64, + "learning_rate": 0.00013733333333333333, + "loss": 0.0016, + "step": 6140 + }, + { + "epoch": 91.65, + "learning_rate": 0.00013729824561403508, + "loss": 0.0045, + "step": 6141 + }, + { + "epoch": 91.67, + "learning_rate": 0.00013726315789473683, + "loss": 0.2162, + "step": 6142 + }, + { + "epoch": 91.68, + "learning_rate": 0.0001372280701754386, + "loss": 0.0006, + "step": 6143 + }, + { + "epoch": 91.7, + "learning_rate": 0.00013719298245614035, + "loss": 0.0014, + "step": 6144 + }, + { + "epoch": 91.71, + "learning_rate": 0.0001371578947368421, + "loss": 0.0006, + "step": 6145 + }, + { + "epoch": 91.73, + "learning_rate": 0.00013712280701754385, + "loss": 0.0007, + "step": 6146 + }, + { + "epoch": 91.74, + "learning_rate": 0.0001370877192982456, + "loss": 0.0077, + "step": 6147 + }, + { + "epoch": 91.76, + "learning_rate": 0.00013705263157894735, + "loss": 0.0008, + "step": 6148 + }, + { + "epoch": 91.77, + "learning_rate": 0.00013701754385964912, + "loss": 0.004, + "step": 6149 + }, + { + "epoch": 91.79, + "learning_rate": 0.00013698245614035087, + "loss": 0.0006, + "step": 6150 + }, + { + "epoch": 91.8, + "learning_rate": 0.00013694736842105262, + "loss": 0.1144, + "step": 6151 + }, + { + "epoch": 91.82, + "learning_rate": 0.00013691228070175437, + "loss": 0.0006, + "step": 6152 + }, + { + "epoch": 91.83, + "learning_rate": 0.00013687719298245612, + "loss": 0.0028, + "step": 6153 + }, + { + "epoch": 91.85, + "learning_rate": 0.00013684210526315787, + "loss": 0.0038, + "step": 6154 + }, + { + "epoch": 91.86, + "learning_rate": 0.00013680701754385964, + "loss": 0.0016, + "step": 6155 + }, + { + "epoch": 91.88, + "learning_rate": 0.0001367719298245614, + "loss": 0.0007, + "step": 6156 + }, + { + "epoch": 91.89, + "learning_rate": 0.00013673684210526314, + "loss": 0.0006, + "step": 6157 + }, + { + "epoch": 91.91, + "learning_rate": 0.00013670175438596491, + "loss": 0.001, + "step": 6158 + }, + { + "epoch": 91.92, + "learning_rate": 0.00013666666666666666, + "loss": 0.0008, + "step": 6159 + }, + { + "epoch": 91.94, + "learning_rate": 0.0001366315789473684, + "loss": 0.0173, + "step": 6160 + }, + { + "epoch": 91.95, + "learning_rate": 0.00013659649122807016, + "loss": 0.0082, + "step": 6161 + }, + { + "epoch": 91.97, + "learning_rate": 0.0001365614035087719, + "loss": 0.0005, + "step": 6162 + }, + { + "epoch": 91.98, + "learning_rate": 0.00013652631578947366, + "loss": 0.0015, + "step": 6163 + }, + { + "epoch": 92.0, + "learning_rate": 0.00013649122807017543, + "loss": 0.117, + "step": 6164 + }, + { + "epoch": 92.01, + "learning_rate": 0.00013645614035087718, + "loss": 0.0019, + "step": 6165 + }, + { + "epoch": 92.03, + "learning_rate": 0.00013642105263157893, + "loss": 0.0015, + "step": 6166 + }, + { + "epoch": 92.04, + "learning_rate": 0.00013638596491228068, + "loss": 0.0028, + "step": 6167 + }, + { + "epoch": 92.06, + "learning_rate": 0.00013635087719298246, + "loss": 0.0007, + "step": 6168 + }, + { + "epoch": 92.07, + "learning_rate": 0.0001363157894736842, + "loss": 0.0005, + "step": 6169 + }, + { + "epoch": 92.09, + "learning_rate": 0.00013628070175438595, + "loss": 0.0019, + "step": 6170 + }, + { + "epoch": 92.1, + "learning_rate": 0.0001362456140350877, + "loss": 0.0083, + "step": 6171 + }, + { + "epoch": 92.12, + "learning_rate": 0.00013621052631578945, + "loss": 0.0007, + "step": 6172 + }, + { + "epoch": 92.13, + "learning_rate": 0.00013617543859649123, + "loss": 0.0006, + "step": 6173 + }, + { + "epoch": 92.15, + "learning_rate": 0.00013614035087719298, + "loss": 0.0821, + "step": 6174 + }, + { + "epoch": 92.16, + "learning_rate": 0.00013610526315789473, + "loss": 0.0006, + "step": 6175 + }, + { + "epoch": 92.18, + "learning_rate": 0.00013607017543859647, + "loss": 0.0115, + "step": 6176 + }, + { + "epoch": 92.19, + "learning_rate": 0.00013603508771929822, + "loss": 0.0006, + "step": 6177 + }, + { + "epoch": 92.21, + "learning_rate": 0.00013599999999999997, + "loss": 0.0011, + "step": 6178 + }, + { + "epoch": 92.22, + "learning_rate": 0.00013596491228070175, + "loss": 0.0006, + "step": 6179 + }, + { + "epoch": 92.24, + "learning_rate": 0.0001359298245614035, + "loss": 0.0005, + "step": 6180 + }, + { + "epoch": 92.25, + "learning_rate": 0.00013589473684210525, + "loss": 0.0007, + "step": 6181 + }, + { + "epoch": 92.27, + "learning_rate": 0.00013585964912280702, + "loss": 0.0055, + "step": 6182 + }, + { + "epoch": 92.28, + "learning_rate": 0.00013582456140350877, + "loss": 0.0007, + "step": 6183 + }, + { + "epoch": 92.3, + "learning_rate": 0.00013578947368421052, + "loss": 0.0107, + "step": 6184 + }, + { + "epoch": 92.31, + "learning_rate": 0.00013575438596491227, + "loss": 0.0112, + "step": 6185 + }, + { + "epoch": 92.33, + "learning_rate": 0.00013571929824561402, + "loss": 0.0007, + "step": 6186 + }, + { + "epoch": 92.34, + "learning_rate": 0.00013568421052631577, + "loss": 0.0005, + "step": 6187 + }, + { + "epoch": 92.36, + "learning_rate": 0.00013564912280701754, + "loss": 0.0006, + "step": 6188 + }, + { + "epoch": 92.37, + "learning_rate": 0.0001356140350877193, + "loss": 0.0073, + "step": 6189 + }, + { + "epoch": 92.39, + "learning_rate": 0.00013557894736842104, + "loss": 0.0006, + "step": 6190 + }, + { + "epoch": 92.4, + "learning_rate": 0.0001355438596491228, + "loss": 0.0006, + "step": 6191 + }, + { + "epoch": 92.42, + "learning_rate": 0.00013550877192982456, + "loss": 0.0006, + "step": 6192 + }, + { + "epoch": 92.43, + "learning_rate": 0.0001354736842105263, + "loss": 0.0005, + "step": 6193 + }, + { + "epoch": 92.45, + "learning_rate": 0.00013543859649122806, + "loss": 0.0007, + "step": 6194 + }, + { + "epoch": 92.46, + "learning_rate": 0.0001354035087719298, + "loss": 0.0005, + "step": 6195 + }, + { + "epoch": 92.48, + "learning_rate": 0.00013536842105263156, + "loss": 0.1313, + "step": 6196 + }, + { + "epoch": 92.49, + "learning_rate": 0.00013533333333333333, + "loss": 0.0005, + "step": 6197 + }, + { + "epoch": 92.51, + "learning_rate": 0.00013529824561403508, + "loss": 0.0802, + "step": 6198 + }, + { + "epoch": 92.52, + "learning_rate": 0.00013526315789473683, + "loss": 0.0005, + "step": 6199 + }, + { + "epoch": 92.54, + "learning_rate": 0.00013522807017543858, + "loss": 0.0621, + "step": 6200 + }, + { + "epoch": 92.54, + "eval_accuracy": 0.8607440039158101, + "eval_f1": 0.8599750596640915, + "eval_loss": 0.8078528642654419, + "eval_runtime": 344.8499, + "eval_samples_per_second": 11.849, + "eval_steps_per_second": 0.742, + "step": 6200 + }, + { + "epoch": 92.55, + "learning_rate": 0.00013519298245614033, + "loss": 0.0008, + "step": 6201 + }, + { + "epoch": 92.57, + "learning_rate": 0.00013515789473684208, + "loss": 0.0007, + "step": 6202 + }, + { + "epoch": 92.58, + "learning_rate": 0.00013512280701754385, + "loss": 0.001, + "step": 6203 + }, + { + "epoch": 92.59, + "learning_rate": 0.0001350877192982456, + "loss": 0.0007, + "step": 6204 + }, + { + "epoch": 92.61, + "learning_rate": 0.00013505263157894735, + "loss": 0.0006, + "step": 6205 + }, + { + "epoch": 92.62, + "learning_rate": 0.00013501754385964913, + "loss": 0.0015, + "step": 6206 + }, + { + "epoch": 92.64, + "learning_rate": 0.00013498245614035088, + "loss": 0.0006, + "step": 6207 + }, + { + "epoch": 92.65, + "learning_rate": 0.00013494736842105262, + "loss": 0.0005, + "step": 6208 + }, + { + "epoch": 92.67, + "learning_rate": 0.00013491228070175437, + "loss": 0.0017, + "step": 6209 + }, + { + "epoch": 92.68, + "learning_rate": 0.00013487719298245612, + "loss": 0.0016, + "step": 6210 + }, + { + "epoch": 92.7, + "learning_rate": 0.00013484210526315787, + "loss": 0.0011, + "step": 6211 + }, + { + "epoch": 92.71, + "learning_rate": 0.00013480701754385965, + "loss": 0.0957, + "step": 6212 + }, + { + "epoch": 92.73, + "learning_rate": 0.0001347719298245614, + "loss": 0.0011, + "step": 6213 + }, + { + "epoch": 92.74, + "learning_rate": 0.00013473684210526314, + "loss": 0.0005, + "step": 6214 + }, + { + "epoch": 92.76, + "learning_rate": 0.0001347017543859649, + "loss": 0.0006, + "step": 6215 + }, + { + "epoch": 92.77, + "learning_rate": 0.00013466666666666667, + "loss": 0.0032, + "step": 6216 + }, + { + "epoch": 92.79, + "learning_rate": 0.0001346315789473684, + "loss": 0.0005, + "step": 6217 + }, + { + "epoch": 92.8, + "learning_rate": 0.00013459649122807017, + "loss": 0.0136, + "step": 6218 + }, + { + "epoch": 92.82, + "learning_rate": 0.00013456140350877192, + "loss": 0.0005, + "step": 6219 + }, + { + "epoch": 92.83, + "learning_rate": 0.00013452631578947366, + "loss": 0.0005, + "step": 6220 + }, + { + "epoch": 92.85, + "learning_rate": 0.00013449122807017544, + "loss": 0.0007, + "step": 6221 + }, + { + "epoch": 92.86, + "learning_rate": 0.0001344561403508772, + "loss": 0.0007, + "step": 6222 + }, + { + "epoch": 92.88, + "learning_rate": 0.00013442105263157894, + "loss": 0.0016, + "step": 6223 + }, + { + "epoch": 92.89, + "learning_rate": 0.0001343859649122807, + "loss": 0.0035, + "step": 6224 + }, + { + "epoch": 92.91, + "learning_rate": 0.00013435087719298244, + "loss": 0.0029, + "step": 6225 + }, + { + "epoch": 92.92, + "learning_rate": 0.00013431578947368418, + "loss": 0.0985, + "step": 6226 + }, + { + "epoch": 92.94, + "learning_rate": 0.00013428070175438596, + "loss": 0.0301, + "step": 6227 + }, + { + "epoch": 92.95, + "learning_rate": 0.0001342456140350877, + "loss": 0.0009, + "step": 6228 + }, + { + "epoch": 92.97, + "learning_rate": 0.00013421052631578946, + "loss": 0.0006, + "step": 6229 + }, + { + "epoch": 92.98, + "learning_rate": 0.0001341754385964912, + "loss": 0.0004, + "step": 6230 + }, + { + "epoch": 93.0, + "learning_rate": 0.00013414035087719298, + "loss": 0.1966, + "step": 6231 + }, + { + "epoch": 93.01, + "learning_rate": 0.00013410526315789473, + "loss": 0.0009, + "step": 6232 + }, + { + "epoch": 93.03, + "learning_rate": 0.00013407017543859648, + "loss": 0.0005, + "step": 6233 + }, + { + "epoch": 93.04, + "learning_rate": 0.00013403508771929823, + "loss": 0.0006, + "step": 6234 + }, + { + "epoch": 93.06, + "learning_rate": 0.00013399999999999998, + "loss": 0.1749, + "step": 6235 + }, + { + "epoch": 93.07, + "learning_rate": 0.00013396491228070175, + "loss": 0.0007, + "step": 6236 + }, + { + "epoch": 93.09, + "learning_rate": 0.0001339298245614035, + "loss": 0.0009, + "step": 6237 + }, + { + "epoch": 93.1, + "learning_rate": 0.00013389473684210525, + "loss": 0.0663, + "step": 6238 + }, + { + "epoch": 93.12, + "learning_rate": 0.000133859649122807, + "loss": 0.0005, + "step": 6239 + }, + { + "epoch": 93.13, + "learning_rate": 0.00013382456140350878, + "loss": 0.0692, + "step": 6240 + }, + { + "epoch": 93.15, + "learning_rate": 0.0001337894736842105, + "loss": 0.001, + "step": 6241 + }, + { + "epoch": 93.16, + "learning_rate": 0.00013375438596491227, + "loss": 0.0006, + "step": 6242 + }, + { + "epoch": 93.18, + "learning_rate": 0.00013371929824561402, + "loss": 0.0683, + "step": 6243 + }, + { + "epoch": 93.19, + "learning_rate": 0.00013368421052631577, + "loss": 0.0006, + "step": 6244 + }, + { + "epoch": 93.21, + "learning_rate": 0.00013364912280701755, + "loss": 0.002, + "step": 6245 + }, + { + "epoch": 93.22, + "learning_rate": 0.0001336140350877193, + "loss": 0.0018, + "step": 6246 + }, + { + "epoch": 93.24, + "learning_rate": 0.00013357894736842104, + "loss": 0.0647, + "step": 6247 + }, + { + "epoch": 93.25, + "learning_rate": 0.0001335438596491228, + "loss": 0.0004, + "step": 6248 + }, + { + "epoch": 93.27, + "learning_rate": 0.00013350877192982454, + "loss": 0.0005, + "step": 6249 + }, + { + "epoch": 93.28, + "learning_rate": 0.0001334736842105263, + "loss": 0.0004, + "step": 6250 + }, + { + "epoch": 93.3, + "learning_rate": 0.00013343859649122807, + "loss": 0.0005, + "step": 6251 + }, + { + "epoch": 93.31, + "learning_rate": 0.00013340350877192982, + "loss": 0.2038, + "step": 6252 + }, + { + "epoch": 93.33, + "learning_rate": 0.00013336842105263156, + "loss": 0.0026, + "step": 6253 + }, + { + "epoch": 93.34, + "learning_rate": 0.0001333333333333333, + "loss": 0.0317, + "step": 6254 + }, + { + "epoch": 93.36, + "learning_rate": 0.0001332982456140351, + "loss": 0.0086, + "step": 6255 + }, + { + "epoch": 93.37, + "learning_rate": 0.00013326315789473684, + "loss": 0.0007, + "step": 6256 + }, + { + "epoch": 93.39, + "learning_rate": 0.00013322807017543859, + "loss": 0.0007, + "step": 6257 + }, + { + "epoch": 93.4, + "learning_rate": 0.00013319298245614033, + "loss": 0.0006, + "step": 6258 + }, + { + "epoch": 93.42, + "learning_rate": 0.00013315789473684208, + "loss": 0.0013, + "step": 6259 + }, + { + "epoch": 93.43, + "learning_rate": 0.00013312280701754386, + "loss": 0.0005, + "step": 6260 + }, + { + "epoch": 93.45, + "learning_rate": 0.0001330877192982456, + "loss": 0.0021, + "step": 6261 + }, + { + "epoch": 93.46, + "learning_rate": 0.00013305263157894736, + "loss": 0.0004, + "step": 6262 + }, + { + "epoch": 93.48, + "learning_rate": 0.0001330175438596491, + "loss": 0.0004, + "step": 6263 + }, + { + "epoch": 93.49, + "learning_rate": 0.00013298245614035085, + "loss": 0.0005, + "step": 6264 + }, + { + "epoch": 93.51, + "learning_rate": 0.0001329473684210526, + "loss": 0.0008, + "step": 6265 + }, + { + "epoch": 93.52, + "learning_rate": 0.00013291228070175438, + "loss": 0.0006, + "step": 6266 + }, + { + "epoch": 93.54, + "learning_rate": 0.00013287719298245613, + "loss": 0.0008, + "step": 6267 + }, + { + "epoch": 93.55, + "learning_rate": 0.00013284210526315788, + "loss": 0.0007, + "step": 6268 + }, + { + "epoch": 93.57, + "learning_rate": 0.00013280701754385965, + "loss": 0.0006, + "step": 6269 + }, + { + "epoch": 93.58, + "learning_rate": 0.0001327719298245614, + "loss": 0.0005, + "step": 6270 + }, + { + "epoch": 93.59, + "learning_rate": 0.00013273684210526315, + "loss": 0.0024, + "step": 6271 + }, + { + "epoch": 93.61, + "learning_rate": 0.0001327017543859649, + "loss": 0.0011, + "step": 6272 + }, + { + "epoch": 93.62, + "learning_rate": 0.00013266666666666665, + "loss": 0.0007, + "step": 6273 + }, + { + "epoch": 93.64, + "learning_rate": 0.0001326315789473684, + "loss": 0.001, + "step": 6274 + }, + { + "epoch": 93.65, + "learning_rate": 0.00013259649122807017, + "loss": 0.0005, + "step": 6275 + }, + { + "epoch": 93.67, + "learning_rate": 0.00013256140350877192, + "loss": 0.0005, + "step": 6276 + }, + { + "epoch": 93.68, + "learning_rate": 0.00013252631578947367, + "loss": 0.0384, + "step": 6277 + }, + { + "epoch": 93.7, + "learning_rate": 0.00013249122807017542, + "loss": 0.0009, + "step": 6278 + }, + { + "epoch": 93.71, + "learning_rate": 0.0001324561403508772, + "loss": 0.0007, + "step": 6279 + }, + { + "epoch": 93.73, + "learning_rate": 0.00013242105263157892, + "loss": 0.0027, + "step": 6280 + }, + { + "epoch": 93.74, + "learning_rate": 0.0001323859649122807, + "loss": 0.0005, + "step": 6281 + }, + { + "epoch": 93.76, + "learning_rate": 0.00013235087719298244, + "loss": 0.0039, + "step": 6282 + }, + { + "epoch": 93.77, + "learning_rate": 0.0001323157894736842, + "loss": 0.0004, + "step": 6283 + }, + { + "epoch": 93.79, + "learning_rate": 0.00013228070175438597, + "loss": 0.001, + "step": 6284 + }, + { + "epoch": 93.8, + "learning_rate": 0.00013224561403508771, + "loss": 0.001, + "step": 6285 + }, + { + "epoch": 93.82, + "learning_rate": 0.00013221052631578946, + "loss": 0.1333, + "step": 6286 + }, + { + "epoch": 93.83, + "learning_rate": 0.0001321754385964912, + "loss": 0.0008, + "step": 6287 + }, + { + "epoch": 93.85, + "learning_rate": 0.00013214035087719296, + "loss": 0.0007, + "step": 6288 + }, + { + "epoch": 93.86, + "learning_rate": 0.0001321052631578947, + "loss": 0.0018, + "step": 6289 + }, + { + "epoch": 93.88, + "learning_rate": 0.00013207017543859649, + "loss": 0.1526, + "step": 6290 + }, + { + "epoch": 93.89, + "learning_rate": 0.00013203508771929823, + "loss": 0.0005, + "step": 6291 + }, + { + "epoch": 93.91, + "learning_rate": 0.00013199999999999998, + "loss": 0.0031, + "step": 6292 + }, + { + "epoch": 93.92, + "learning_rate": 0.00013196491228070173, + "loss": 0.0005, + "step": 6293 + }, + { + "epoch": 93.94, + "learning_rate": 0.0001319298245614035, + "loss": 0.0926, + "step": 6294 + }, + { + "epoch": 93.95, + "learning_rate": 0.00013189473684210526, + "loss": 0.0006, + "step": 6295 + }, + { + "epoch": 93.97, + "learning_rate": 0.000131859649122807, + "loss": 0.0005, + "step": 6296 + }, + { + "epoch": 93.98, + "learning_rate": 0.00013182456140350875, + "loss": 0.0016, + "step": 6297 + }, + { + "epoch": 94.0, + "learning_rate": 0.0001317894736842105, + "loss": 0.0028, + "step": 6298 + }, + { + "epoch": 94.01, + "learning_rate": 0.00013175438596491228, + "loss": 0.0095, + "step": 6299 + }, + { + "epoch": 94.03, + "learning_rate": 0.00013171929824561403, + "loss": 0.0258, + "step": 6300 + }, + { + "epoch": 94.04, + "learning_rate": 0.00013168421052631578, + "loss": 0.0004, + "step": 6301 + }, + { + "epoch": 94.06, + "learning_rate": 0.00013164912280701753, + "loss": 0.0005, + "step": 6302 + }, + { + "epoch": 94.07, + "learning_rate": 0.0001316140350877193, + "loss": 0.0438, + "step": 6303 + }, + { + "epoch": 94.09, + "learning_rate": 0.00013157894736842102, + "loss": 0.0025, + "step": 6304 + }, + { + "epoch": 94.1, + "learning_rate": 0.0001315438596491228, + "loss": 0.0166, + "step": 6305 + }, + { + "epoch": 94.12, + "learning_rate": 0.00013150877192982455, + "loss": 0.0006, + "step": 6306 + }, + { + "epoch": 94.13, + "learning_rate": 0.0001314736842105263, + "loss": 0.0005, + "step": 6307 + }, + { + "epoch": 94.15, + "learning_rate": 0.00013143859649122807, + "loss": 0.0006, + "step": 6308 + }, + { + "epoch": 94.16, + "learning_rate": 0.00013140350877192982, + "loss": 0.0017, + "step": 6309 + }, + { + "epoch": 94.18, + "learning_rate": 0.00013136842105263157, + "loss": 0.0004, + "step": 6310 + }, + { + "epoch": 94.19, + "learning_rate": 0.00013133333333333332, + "loss": 0.0006, + "step": 6311 + }, + { + "epoch": 94.21, + "learning_rate": 0.00013129824561403507, + "loss": 0.0006, + "step": 6312 + }, + { + "epoch": 94.22, + "learning_rate": 0.00013126315789473682, + "loss": 0.0004, + "step": 6313 + }, + { + "epoch": 94.24, + "learning_rate": 0.0001312280701754386, + "loss": 0.0004, + "step": 6314 + }, + { + "epoch": 94.25, + "learning_rate": 0.00013119298245614034, + "loss": 0.0006, + "step": 6315 + }, + { + "epoch": 94.27, + "learning_rate": 0.0001311578947368421, + "loss": 0.0004, + "step": 6316 + }, + { + "epoch": 94.28, + "learning_rate": 0.00013112280701754384, + "loss": 0.05, + "step": 6317 + }, + { + "epoch": 94.3, + "learning_rate": 0.00013108771929824561, + "loss": 0.0743, + "step": 6318 + }, + { + "epoch": 94.31, + "learning_rate": 0.00013105263157894736, + "loss": 0.0017, + "step": 6319 + }, + { + "epoch": 94.33, + "learning_rate": 0.0001310175438596491, + "loss": 0.1355, + "step": 6320 + }, + { + "epoch": 94.34, + "learning_rate": 0.00013098245614035086, + "loss": 0.0004, + "step": 6321 + }, + { + "epoch": 94.36, + "learning_rate": 0.0001309473684210526, + "loss": 0.0004, + "step": 6322 + }, + { + "epoch": 94.37, + "learning_rate": 0.00013091228070175438, + "loss": 0.0094, + "step": 6323 + }, + { + "epoch": 94.39, + "learning_rate": 0.00013087719298245613, + "loss": 0.0006, + "step": 6324 + }, + { + "epoch": 94.4, + "learning_rate": 0.00013084210526315788, + "loss": 0.0006, + "step": 6325 + }, + { + "epoch": 94.42, + "learning_rate": 0.00013080701754385963, + "loss": 0.0016, + "step": 6326 + }, + { + "epoch": 94.43, + "learning_rate": 0.0001307719298245614, + "loss": 0.0007, + "step": 6327 + }, + { + "epoch": 94.45, + "learning_rate": 0.00013073684210526313, + "loss": 0.0006, + "step": 6328 + }, + { + "epoch": 94.46, + "learning_rate": 0.0001307017543859649, + "loss": 0.0005, + "step": 6329 + }, + { + "epoch": 94.48, + "learning_rate": 0.00013066666666666665, + "loss": 0.0007, + "step": 6330 + }, + { + "epoch": 94.49, + "learning_rate": 0.0001306315789473684, + "loss": 0.0004, + "step": 6331 + }, + { + "epoch": 94.51, + "learning_rate": 0.00013059649122807018, + "loss": 0.0312, + "step": 6332 + }, + { + "epoch": 94.52, + "learning_rate": 0.00013056140350877193, + "loss": 0.0006, + "step": 6333 + }, + { + "epoch": 94.54, + "learning_rate": 0.00013052631578947368, + "loss": 0.0689, + "step": 6334 + }, + { + "epoch": 94.55, + "learning_rate": 0.00013049122807017542, + "loss": 0.0004, + "step": 6335 + }, + { + "epoch": 94.57, + "learning_rate": 0.00013045614035087717, + "loss": 0.002, + "step": 6336 + }, + { + "epoch": 94.58, + "learning_rate": 0.00013042105263157892, + "loss": 0.0004, + "step": 6337 + }, + { + "epoch": 94.59, + "learning_rate": 0.0001303859649122807, + "loss": 0.0005, + "step": 6338 + }, + { + "epoch": 94.61, + "learning_rate": 0.00013035087719298245, + "loss": 0.0005, + "step": 6339 + }, + { + "epoch": 94.62, + "learning_rate": 0.0001303157894736842, + "loss": 0.0027, + "step": 6340 + }, + { + "epoch": 94.64, + "learning_rate": 0.00013028070175438594, + "loss": 0.0174, + "step": 6341 + }, + { + "epoch": 94.65, + "learning_rate": 0.00013024561403508772, + "loss": 0.0006, + "step": 6342 + }, + { + "epoch": 94.67, + "learning_rate": 0.00013021052631578944, + "loss": 0.0006, + "step": 6343 + }, + { + "epoch": 94.68, + "learning_rate": 0.00013017543859649122, + "loss": 0.0004, + "step": 6344 + }, + { + "epoch": 94.7, + "learning_rate": 0.00013014035087719297, + "loss": 0.0829, + "step": 6345 + }, + { + "epoch": 94.71, + "learning_rate": 0.00013010526315789472, + "loss": 0.0004, + "step": 6346 + }, + { + "epoch": 94.73, + "learning_rate": 0.0001300701754385965, + "loss": 0.0004, + "step": 6347 + }, + { + "epoch": 94.74, + "learning_rate": 0.00013003508771929824, + "loss": 0.0714, + "step": 6348 + }, + { + "epoch": 94.76, + "learning_rate": 0.00013, + "loss": 0.0005, + "step": 6349 + }, + { + "epoch": 94.77, + "learning_rate": 0.00012996491228070174, + "loss": 0.0005, + "step": 6350 + }, + { + "epoch": 94.79, + "learning_rate": 0.0001299298245614035, + "loss": 0.048, + "step": 6351 + }, + { + "epoch": 94.8, + "learning_rate": 0.00012989473684210523, + "loss": 0.0013, + "step": 6352 + }, + { + "epoch": 94.82, + "learning_rate": 0.000129859649122807, + "loss": 0.0238, + "step": 6353 + }, + { + "epoch": 94.83, + "learning_rate": 0.00012982456140350876, + "loss": 0.0007, + "step": 6354 + }, + { + "epoch": 94.85, + "learning_rate": 0.0001297894736842105, + "loss": 0.0733, + "step": 6355 + }, + { + "epoch": 94.86, + "learning_rate": 0.00012975438596491228, + "loss": 0.0005, + "step": 6356 + }, + { + "epoch": 94.88, + "learning_rate": 0.00012971929824561403, + "loss": 0.0747, + "step": 6357 + }, + { + "epoch": 94.89, + "learning_rate": 0.00012968421052631578, + "loss": 0.0004, + "step": 6358 + }, + { + "epoch": 94.91, + "learning_rate": 0.00012964912280701753, + "loss": 0.0004, + "step": 6359 + }, + { + "epoch": 94.92, + "learning_rate": 0.00012961403508771928, + "loss": 0.0004, + "step": 6360 + }, + { + "epoch": 94.94, + "learning_rate": 0.00012957894736842103, + "loss": 0.0108, + "step": 6361 + }, + { + "epoch": 94.95, + "learning_rate": 0.0001295438596491228, + "loss": 0.0044, + "step": 6362 + }, + { + "epoch": 94.97, + "learning_rate": 0.00012950877192982455, + "loss": 0.0004, + "step": 6363 + }, + { + "epoch": 94.98, + "learning_rate": 0.0001294736842105263, + "loss": 0.0004, + "step": 6364 + }, + { + "epoch": 95.0, + "learning_rate": 0.00012943859649122805, + "loss": 0.0027, + "step": 6365 + }, + { + "epoch": 95.01, + "learning_rate": 0.00012940350877192983, + "loss": 0.0074, + "step": 6366 + }, + { + "epoch": 95.03, + "learning_rate": 0.00012936842105263155, + "loss": 0.0005, + "step": 6367 + }, + { + "epoch": 95.04, + "learning_rate": 0.00012933333333333332, + "loss": 0.0004, + "step": 6368 + }, + { + "epoch": 95.06, + "learning_rate": 0.00012929824561403507, + "loss": 0.0004, + "step": 6369 + }, + { + "epoch": 95.07, + "learning_rate": 0.00012926315789473682, + "loss": 0.0162, + "step": 6370 + }, + { + "epoch": 95.09, + "learning_rate": 0.0001292280701754386, + "loss": 0.0012, + "step": 6371 + }, + { + "epoch": 95.1, + "learning_rate": 0.00012919298245614035, + "loss": 0.0006, + "step": 6372 + }, + { + "epoch": 95.12, + "learning_rate": 0.0001291578947368421, + "loss": 0.0157, + "step": 6373 + }, + { + "epoch": 95.13, + "learning_rate": 0.00012912280701754384, + "loss": 0.0005, + "step": 6374 + }, + { + "epoch": 95.15, + "learning_rate": 0.00012908771929824562, + "loss": 0.0015, + "step": 6375 + }, + { + "epoch": 95.16, + "learning_rate": 0.00012905263157894734, + "loss": 0.0004, + "step": 6376 + }, + { + "epoch": 95.18, + "learning_rate": 0.00012901754385964912, + "loss": 0.1115, + "step": 6377 + }, + { + "epoch": 95.19, + "learning_rate": 0.00012898245614035087, + "loss": 0.0723, + "step": 6378 + }, + { + "epoch": 95.21, + "learning_rate": 0.00012894736842105261, + "loss": 0.0145, + "step": 6379 + }, + { + "epoch": 95.22, + "learning_rate": 0.00012891228070175436, + "loss": 0.0005, + "step": 6380 + }, + { + "epoch": 95.24, + "learning_rate": 0.00012887719298245614, + "loss": 0.1661, + "step": 6381 + }, + { + "epoch": 95.25, + "learning_rate": 0.0001288421052631579, + "loss": 0.0004, + "step": 6382 + }, + { + "epoch": 95.27, + "learning_rate": 0.00012880701754385964, + "loss": 0.001, + "step": 6383 + }, + { + "epoch": 95.28, + "learning_rate": 0.00012877192982456139, + "loss": 0.0004, + "step": 6384 + }, + { + "epoch": 95.3, + "learning_rate": 0.00012873684210526313, + "loss": 0.2419, + "step": 6385 + }, + { + "epoch": 95.31, + "learning_rate": 0.0001287017543859649, + "loss": 0.0008, + "step": 6386 + }, + { + "epoch": 95.33, + "learning_rate": 0.00012866666666666666, + "loss": 0.2924, + "step": 6387 + }, + { + "epoch": 95.34, + "learning_rate": 0.0001286315789473684, + "loss": 0.0004, + "step": 6388 + }, + { + "epoch": 95.36, + "learning_rate": 0.00012859649122807016, + "loss": 0.1491, + "step": 6389 + }, + { + "epoch": 95.37, + "learning_rate": 0.00012856140350877193, + "loss": 0.0007, + "step": 6390 + }, + { + "epoch": 95.39, + "learning_rate": 0.00012852631578947365, + "loss": 0.0036, + "step": 6391 + }, + { + "epoch": 95.4, + "learning_rate": 0.00012849122807017543, + "loss": 0.0036, + "step": 6392 + }, + { + "epoch": 95.42, + "learning_rate": 0.00012845614035087718, + "loss": 0.0024, + "step": 6393 + }, + { + "epoch": 95.43, + "learning_rate": 0.00012842105263157893, + "loss": 0.0925, + "step": 6394 + }, + { + "epoch": 95.45, + "learning_rate": 0.0001283859649122807, + "loss": 0.0011, + "step": 6395 + }, + { + "epoch": 95.46, + "learning_rate": 0.00012835087719298245, + "loss": 0.0024, + "step": 6396 + }, + { + "epoch": 95.48, + "learning_rate": 0.0001283157894736842, + "loss": 0.0005, + "step": 6397 + }, + { + "epoch": 95.49, + "learning_rate": 0.00012828070175438595, + "loss": 0.0031, + "step": 6398 + }, + { + "epoch": 95.51, + "learning_rate": 0.00012824561403508773, + "loss": 0.0012, + "step": 6399 + }, + { + "epoch": 95.52, + "learning_rate": 0.00012821052631578945, + "loss": 0.0017, + "step": 6400 + }, + { + "epoch": 95.52, + "eval_accuracy": 0.8668624571708272, + "eval_f1": 0.8678242564825082, + "eval_loss": 0.7071595191955566, + "eval_runtime": 345.1767, + "eval_samples_per_second": 11.837, + "eval_steps_per_second": 0.742, + "step": 6400 + }, + { + "epoch": 95.54, + "learning_rate": 0.00012817543859649122, + "loss": 0.0011, + "step": 6401 + }, + { + "epoch": 95.55, + "learning_rate": 0.00012814035087719297, + "loss": 0.1979, + "step": 6402 + }, + { + "epoch": 95.57, + "learning_rate": 0.00012810526315789472, + "loss": 0.205, + "step": 6403 + }, + { + "epoch": 95.58, + "learning_rate": 0.00012807017543859647, + "loss": 0.2207, + "step": 6404 + }, + { + "epoch": 95.59, + "learning_rate": 0.00012803508771929825, + "loss": 0.0012, + "step": 6405 + }, + { + "epoch": 95.61, + "learning_rate": 0.000128, + "loss": 0.0026, + "step": 6406 + }, + { + "epoch": 95.62, + "learning_rate": 0.00012796491228070174, + "loss": 0.0007, + "step": 6407 + }, + { + "epoch": 95.64, + "learning_rate": 0.0001279298245614035, + "loss": 0.0007, + "step": 6408 + }, + { + "epoch": 95.65, + "learning_rate": 0.00012789473684210524, + "loss": 0.0006, + "step": 6409 + }, + { + "epoch": 95.67, + "learning_rate": 0.00012785964912280702, + "loss": 0.0007, + "step": 6410 + }, + { + "epoch": 95.68, + "learning_rate": 0.00012782456140350877, + "loss": 0.0044, + "step": 6411 + }, + { + "epoch": 95.7, + "learning_rate": 0.00012778947368421051, + "loss": 0.0106, + "step": 6412 + }, + { + "epoch": 95.71, + "learning_rate": 0.00012775438596491226, + "loss": 0.0007, + "step": 6413 + }, + { + "epoch": 95.73, + "learning_rate": 0.00012771929824561404, + "loss": 0.0008, + "step": 6414 + }, + { + "epoch": 95.74, + "learning_rate": 0.00012768421052631576, + "loss": 0.0084, + "step": 6415 + }, + { + "epoch": 95.76, + "learning_rate": 0.00012764912280701754, + "loss": 0.0012, + "step": 6416 + }, + { + "epoch": 95.77, + "learning_rate": 0.00012761403508771928, + "loss": 0.0544, + "step": 6417 + }, + { + "epoch": 95.79, + "learning_rate": 0.00012757894736842103, + "loss": 0.01, + "step": 6418 + }, + { + "epoch": 95.8, + "learning_rate": 0.0001275438596491228, + "loss": 0.0022, + "step": 6419 + }, + { + "epoch": 95.82, + "learning_rate": 0.00012750877192982456, + "loss": 0.1372, + "step": 6420 + }, + { + "epoch": 95.83, + "learning_rate": 0.0001274736842105263, + "loss": 0.0036, + "step": 6421 + }, + { + "epoch": 95.85, + "learning_rate": 0.00012743859649122806, + "loss": 0.0005, + "step": 6422 + }, + { + "epoch": 95.86, + "learning_rate": 0.00012740350877192983, + "loss": 0.0006, + "step": 6423 + }, + { + "epoch": 95.88, + "learning_rate": 0.00012736842105263155, + "loss": 0.0088, + "step": 6424 + }, + { + "epoch": 95.89, + "learning_rate": 0.00012733333333333333, + "loss": 0.0007, + "step": 6425 + }, + { + "epoch": 95.91, + "learning_rate": 0.00012729824561403508, + "loss": 0.1873, + "step": 6426 + }, + { + "epoch": 95.92, + "learning_rate": 0.00012726315789473683, + "loss": 0.0013, + "step": 6427 + }, + { + "epoch": 95.94, + "learning_rate": 0.00012722807017543858, + "loss": 0.0006, + "step": 6428 + }, + { + "epoch": 95.95, + "learning_rate": 0.00012719298245614035, + "loss": 0.0006, + "step": 6429 + }, + { + "epoch": 95.97, + "learning_rate": 0.00012715789473684207, + "loss": 0.0028, + "step": 6430 + }, + { + "epoch": 95.98, + "learning_rate": 0.00012712280701754385, + "loss": 0.0006, + "step": 6431 + }, + { + "epoch": 96.0, + "learning_rate": 0.0001270877192982456, + "loss": 0.359, + "step": 6432 + }, + { + "epoch": 96.01, + "learning_rate": 0.00012705263157894735, + "loss": 0.0027, + "step": 6433 + }, + { + "epoch": 96.03, + "learning_rate": 0.00012701754385964912, + "loss": 0.0006, + "step": 6434 + }, + { + "epoch": 96.04, + "learning_rate": 0.00012698245614035087, + "loss": 0.0006, + "step": 6435 + }, + { + "epoch": 96.06, + "learning_rate": 0.00012694736842105262, + "loss": 0.1297, + "step": 6436 + }, + { + "epoch": 96.07, + "learning_rate": 0.00012691228070175437, + "loss": 0.0008, + "step": 6437 + }, + { + "epoch": 96.09, + "learning_rate": 0.00012687719298245614, + "loss": 0.0014, + "step": 6438 + }, + { + "epoch": 96.1, + "learning_rate": 0.00012684210526315787, + "loss": 0.045, + "step": 6439 + }, + { + "epoch": 96.12, + "learning_rate": 0.00012680701754385964, + "loss": 0.0009, + "step": 6440 + }, + { + "epoch": 96.13, + "learning_rate": 0.0001267719298245614, + "loss": 0.0013, + "step": 6441 + }, + { + "epoch": 96.15, + "learning_rate": 0.00012673684210526314, + "loss": 0.002, + "step": 6442 + }, + { + "epoch": 96.16, + "learning_rate": 0.0001267017543859649, + "loss": 0.0005, + "step": 6443 + }, + { + "epoch": 96.18, + "learning_rate": 0.00012666666666666666, + "loss": 0.1275, + "step": 6444 + }, + { + "epoch": 96.19, + "learning_rate": 0.0001266315789473684, + "loss": 0.0005, + "step": 6445 + }, + { + "epoch": 96.21, + "learning_rate": 0.00012659649122807016, + "loss": 0.0006, + "step": 6446 + }, + { + "epoch": 96.22, + "learning_rate": 0.00012656140350877194, + "loss": 0.0136, + "step": 6447 + }, + { + "epoch": 96.24, + "learning_rate": 0.00012652631578947366, + "loss": 0.038, + "step": 6448 + }, + { + "epoch": 96.25, + "learning_rate": 0.00012649122807017544, + "loss": 0.0005, + "step": 6449 + }, + { + "epoch": 96.27, + "learning_rate": 0.00012645614035087718, + "loss": 0.003, + "step": 6450 + }, + { + "epoch": 96.28, + "learning_rate": 0.00012642105263157893, + "loss": 0.0006, + "step": 6451 + }, + { + "epoch": 96.3, + "learning_rate": 0.00012638596491228068, + "loss": 0.0449, + "step": 6452 + }, + { + "epoch": 96.31, + "learning_rate": 0.00012635087719298246, + "loss": 0.1171, + "step": 6453 + }, + { + "epoch": 96.33, + "learning_rate": 0.00012631578947368418, + "loss": 0.0005, + "step": 6454 + }, + { + "epoch": 96.34, + "learning_rate": 0.00012628070175438596, + "loss": 0.0008, + "step": 6455 + }, + { + "epoch": 96.36, + "learning_rate": 0.0001262456140350877, + "loss": 0.0006, + "step": 6456 + }, + { + "epoch": 96.37, + "learning_rate": 0.00012621052631578945, + "loss": 0.0016, + "step": 6457 + }, + { + "epoch": 96.39, + "learning_rate": 0.00012617543859649123, + "loss": 0.0027, + "step": 6458 + }, + { + "epoch": 96.4, + "learning_rate": 0.00012614035087719298, + "loss": 0.1313, + "step": 6459 + }, + { + "epoch": 96.42, + "learning_rate": 0.00012610526315789473, + "loss": 0.0006, + "step": 6460 + }, + { + "epoch": 96.43, + "learning_rate": 0.00012607017543859648, + "loss": 0.0006, + "step": 6461 + }, + { + "epoch": 96.45, + "learning_rate": 0.00012603508771929825, + "loss": 0.0048, + "step": 6462 + }, + { + "epoch": 96.46, + "learning_rate": 0.00012599999999999997, + "loss": 0.0032, + "step": 6463 + }, + { + "epoch": 96.48, + "learning_rate": 0.00012596491228070175, + "loss": 0.0005, + "step": 6464 + }, + { + "epoch": 96.49, + "learning_rate": 0.0001259298245614035, + "loss": 0.0802, + "step": 6465 + }, + { + "epoch": 96.51, + "learning_rate": 0.00012589473684210525, + "loss": 0.0009, + "step": 6466 + }, + { + "epoch": 96.52, + "learning_rate": 0.000125859649122807, + "loss": 0.0037, + "step": 6467 + }, + { + "epoch": 96.54, + "learning_rate": 0.00012582456140350877, + "loss": 0.0009, + "step": 6468 + }, + { + "epoch": 96.55, + "learning_rate": 0.00012578947368421052, + "loss": 0.0008, + "step": 6469 + }, + { + "epoch": 96.57, + "learning_rate": 0.00012575438596491227, + "loss": 0.0007, + "step": 6470 + }, + { + "epoch": 96.58, + "learning_rate": 0.00012571929824561404, + "loss": 0.0011, + "step": 6471 + }, + { + "epoch": 96.59, + "learning_rate": 0.00012568421052631577, + "loss": 0.0257, + "step": 6472 + }, + { + "epoch": 96.61, + "learning_rate": 0.00012564912280701754, + "loss": 0.014, + "step": 6473 + }, + { + "epoch": 96.62, + "learning_rate": 0.0001256140350877193, + "loss": 0.0006, + "step": 6474 + }, + { + "epoch": 96.64, + "learning_rate": 0.00012557894736842104, + "loss": 0.0185, + "step": 6475 + }, + { + "epoch": 96.65, + "learning_rate": 0.0001255438596491228, + "loss": 0.0005, + "step": 6476 + }, + { + "epoch": 96.67, + "learning_rate": 0.00012550877192982456, + "loss": 0.0009, + "step": 6477 + }, + { + "epoch": 96.68, + "learning_rate": 0.00012547368421052629, + "loss": 0.0007, + "step": 6478 + }, + { + "epoch": 96.7, + "learning_rate": 0.00012543859649122806, + "loss": 0.0401, + "step": 6479 + }, + { + "epoch": 96.71, + "learning_rate": 0.0001254035087719298, + "loss": 0.0007, + "step": 6480 + }, + { + "epoch": 96.73, + "learning_rate": 0.00012536842105263156, + "loss": 0.0008, + "step": 6481 + }, + { + "epoch": 96.74, + "learning_rate": 0.00012533333333333334, + "loss": 0.0511, + "step": 6482 + }, + { + "epoch": 96.76, + "learning_rate": 0.00012529824561403508, + "loss": 0.0219, + "step": 6483 + }, + { + "epoch": 96.77, + "learning_rate": 0.00012526315789473683, + "loss": 0.0034, + "step": 6484 + }, + { + "epoch": 96.79, + "learning_rate": 0.00012522807017543858, + "loss": 0.0456, + "step": 6485 + }, + { + "epoch": 96.8, + "learning_rate": 0.00012519298245614036, + "loss": 0.0006, + "step": 6486 + }, + { + "epoch": 96.82, + "learning_rate": 0.00012515789473684208, + "loss": 0.0041, + "step": 6487 + }, + { + "epoch": 96.83, + "learning_rate": 0.00012512280701754385, + "loss": 0.0008, + "step": 6488 + }, + { + "epoch": 96.85, + "learning_rate": 0.0001250877192982456, + "loss": 0.0006, + "step": 6489 + }, + { + "epoch": 96.86, + "learning_rate": 0.00012505263157894735, + "loss": 0.0044, + "step": 6490 + }, + { + "epoch": 96.88, + "learning_rate": 0.0001250175438596491, + "loss": 0.0006, + "step": 6491 + }, + { + "epoch": 96.89, + "learning_rate": 0.00012498245614035088, + "loss": 0.0007, + "step": 6492 + }, + { + "epoch": 96.91, + "learning_rate": 0.00012494736842105263, + "loss": 0.0006, + "step": 6493 + }, + { + "epoch": 96.92, + "learning_rate": 0.00012491228070175437, + "loss": 0.0076, + "step": 6494 + }, + { + "epoch": 96.94, + "learning_rate": 0.00012487719298245612, + "loss": 0.3258, + "step": 6495 + }, + { + "epoch": 96.95, + "learning_rate": 0.00012484210526315787, + "loss": 0.0007, + "step": 6496 + }, + { + "epoch": 96.97, + "learning_rate": 0.00012480701754385965, + "loss": 0.0005, + "step": 6497 + }, + { + "epoch": 96.98, + "learning_rate": 0.0001247719298245614, + "loss": 0.0013, + "step": 6498 + }, + { + "epoch": 97.0, + "learning_rate": 0.00012473684210526315, + "loss": 0.1592, + "step": 6499 + }, + { + "epoch": 97.01, + "learning_rate": 0.0001247017543859649, + "loss": 0.0008, + "step": 6500 + }, + { + "epoch": 97.03, + "learning_rate": 0.00012466666666666667, + "loss": 0.0006, + "step": 6501 + }, + { + "epoch": 97.04, + "learning_rate": 0.0001246315789473684, + "loss": 0.1534, + "step": 6502 + }, + { + "epoch": 97.06, + "learning_rate": 0.00012459649122807017, + "loss": 0.0008, + "step": 6503 + }, + { + "epoch": 97.07, + "learning_rate": 0.00012456140350877192, + "loss": 0.001, + "step": 6504 + }, + { + "epoch": 97.09, + "learning_rate": 0.00012452631578947367, + "loss": 0.0008, + "step": 6505 + }, + { + "epoch": 97.1, + "learning_rate": 0.00012449122807017541, + "loss": 0.0243, + "step": 6506 + }, + { + "epoch": 97.12, + "learning_rate": 0.0001244561403508772, + "loss": 0.0334, + "step": 6507 + }, + { + "epoch": 97.13, + "learning_rate": 0.00012442105263157894, + "loss": 0.0012, + "step": 6508 + }, + { + "epoch": 97.15, + "learning_rate": 0.0001243859649122807, + "loss": 0.0018, + "step": 6509 + }, + { + "epoch": 97.16, + "learning_rate": 0.00012435087719298246, + "loss": 0.001, + "step": 6510 + }, + { + "epoch": 97.18, + "learning_rate": 0.00012431578947368419, + "loss": 0.0021, + "step": 6511 + }, + { + "epoch": 97.19, + "learning_rate": 0.00012428070175438596, + "loss": 0.0018, + "step": 6512 + }, + { + "epoch": 97.21, + "learning_rate": 0.0001242456140350877, + "loss": 0.0435, + "step": 6513 + }, + { + "epoch": 97.22, + "learning_rate": 0.00012421052631578946, + "loss": 0.1776, + "step": 6514 + }, + { + "epoch": 97.24, + "learning_rate": 0.0001241754385964912, + "loss": 0.0017, + "step": 6515 + }, + { + "epoch": 97.25, + "learning_rate": 0.00012414035087719298, + "loss": 0.0035, + "step": 6516 + }, + { + "epoch": 97.27, + "learning_rate": 0.00012410526315789473, + "loss": 0.1954, + "step": 6517 + }, + { + "epoch": 97.28, + "learning_rate": 0.00012407017543859648, + "loss": 0.1197, + "step": 6518 + }, + { + "epoch": 97.3, + "learning_rate": 0.00012403508771929823, + "loss": 0.0009, + "step": 6519 + }, + { + "epoch": 97.31, + "learning_rate": 0.00012399999999999998, + "loss": 0.0014, + "step": 6520 + }, + { + "epoch": 97.33, + "learning_rate": 0.00012396491228070175, + "loss": 0.0012, + "step": 6521 + }, + { + "epoch": 97.34, + "learning_rate": 0.0001239298245614035, + "loss": 0.1418, + "step": 6522 + }, + { + "epoch": 97.36, + "learning_rate": 0.00012389473684210525, + "loss": 0.0008, + "step": 6523 + }, + { + "epoch": 97.37, + "learning_rate": 0.000123859649122807, + "loss": 0.0011, + "step": 6524 + }, + { + "epoch": 97.39, + "learning_rate": 0.00012382456140350878, + "loss": 0.1914, + "step": 6525 + }, + { + "epoch": 97.4, + "learning_rate": 0.0001237894736842105, + "loss": 0.0006, + "step": 6526 + }, + { + "epoch": 97.42, + "learning_rate": 0.00012375438596491227, + "loss": 0.0008, + "step": 6527 + }, + { + "epoch": 97.43, + "learning_rate": 0.00012371929824561402, + "loss": 0.0015, + "step": 6528 + }, + { + "epoch": 97.45, + "learning_rate": 0.00012368421052631577, + "loss": 0.0129, + "step": 6529 + }, + { + "epoch": 97.46, + "learning_rate": 0.00012364912280701752, + "loss": 0.0008, + "step": 6530 + }, + { + "epoch": 97.48, + "learning_rate": 0.0001236140350877193, + "loss": 0.1816, + "step": 6531 + }, + { + "epoch": 97.49, + "learning_rate": 0.00012357894736842104, + "loss": 0.2985, + "step": 6532 + }, + { + "epoch": 97.51, + "learning_rate": 0.0001235438596491228, + "loss": 0.1943, + "step": 6533 + }, + { + "epoch": 97.52, + "learning_rate": 0.00012350877192982457, + "loss": 0.0019, + "step": 6534 + }, + { + "epoch": 97.54, + "learning_rate": 0.0001234736842105263, + "loss": 0.0129, + "step": 6535 + }, + { + "epoch": 97.55, + "learning_rate": 0.00012343859649122807, + "loss": 0.3213, + "step": 6536 + }, + { + "epoch": 97.57, + "learning_rate": 0.00012340350877192982, + "loss": 0.0094, + "step": 6537 + }, + { + "epoch": 97.58, + "learning_rate": 0.00012336842105263156, + "loss": 0.0682, + "step": 6538 + }, + { + "epoch": 97.59, + "learning_rate": 0.0001233333333333333, + "loss": 0.0008, + "step": 6539 + }, + { + "epoch": 97.61, + "learning_rate": 0.0001232982456140351, + "loss": 0.0007, + "step": 6540 + }, + { + "epoch": 97.62, + "learning_rate": 0.00012326315789473684, + "loss": 0.0007, + "step": 6541 + }, + { + "epoch": 97.64, + "learning_rate": 0.0001232280701754386, + "loss": 0.0043, + "step": 6542 + }, + { + "epoch": 97.65, + "learning_rate": 0.00012319298245614034, + "loss": 0.0012, + "step": 6543 + }, + { + "epoch": 97.67, + "learning_rate": 0.00012315789473684208, + "loss": 0.0501, + "step": 6544 + }, + { + "epoch": 97.68, + "learning_rate": 0.00012312280701754386, + "loss": 0.0713, + "step": 6545 + }, + { + "epoch": 97.7, + "learning_rate": 0.0001230877192982456, + "loss": 0.0011, + "step": 6546 + }, + { + "epoch": 97.71, + "learning_rate": 0.00012305263157894736, + "loss": 0.0039, + "step": 6547 + }, + { + "epoch": 97.73, + "learning_rate": 0.0001230175438596491, + "loss": 0.0008, + "step": 6548 + }, + { + "epoch": 97.74, + "learning_rate": 0.00012298245614035088, + "loss": 0.0011, + "step": 6549 + }, + { + "epoch": 97.76, + "learning_rate": 0.0001229473684210526, + "loss": 0.0009, + "step": 6550 + }, + { + "epoch": 97.77, + "learning_rate": 0.00012291228070175438, + "loss": 0.001, + "step": 6551 + }, + { + "epoch": 97.79, + "learning_rate": 0.00012287719298245613, + "loss": 0.0026, + "step": 6552 + }, + { + "epoch": 97.8, + "learning_rate": 0.00012284210526315788, + "loss": 0.2741, + "step": 6553 + }, + { + "epoch": 97.82, + "learning_rate": 0.00012280701754385963, + "loss": 0.0012, + "step": 6554 + }, + { + "epoch": 97.83, + "learning_rate": 0.0001227719298245614, + "loss": 0.0008, + "step": 6555 + }, + { + "epoch": 97.85, + "learning_rate": 0.00012273684210526315, + "loss": 0.0007, + "step": 6556 + }, + { + "epoch": 97.86, + "learning_rate": 0.0001227017543859649, + "loss": 0.0024, + "step": 6557 + }, + { + "epoch": 97.88, + "learning_rate": 0.00012266666666666668, + "loss": 0.0008, + "step": 6558 + }, + { + "epoch": 97.89, + "learning_rate": 0.0001226315789473684, + "loss": 0.0038, + "step": 6559 + }, + { + "epoch": 97.91, + "learning_rate": 0.00012259649122807017, + "loss": 0.0148, + "step": 6560 + }, + { + "epoch": 97.92, + "learning_rate": 0.00012256140350877192, + "loss": 0.0027, + "step": 6561 + }, + { + "epoch": 97.94, + "learning_rate": 0.00012252631578947367, + "loss": 0.2488, + "step": 6562 + }, + { + "epoch": 97.95, + "learning_rate": 0.00012249122807017542, + "loss": 0.002, + "step": 6563 + }, + { + "epoch": 97.97, + "learning_rate": 0.0001224561403508772, + "loss": 0.001, + "step": 6564 + }, + { + "epoch": 97.98, + "learning_rate": 0.00012242105263157894, + "loss": 0.0031, + "step": 6565 + }, + { + "epoch": 98.0, + "learning_rate": 0.0001223859649122807, + "loss": 0.0342, + "step": 6566 + }, + { + "epoch": 98.01, + "learning_rate": 0.00012235087719298244, + "loss": 0.0302, + "step": 6567 + }, + { + "epoch": 98.03, + "learning_rate": 0.0001223157894736842, + "loss": 0.0006, + "step": 6568 + }, + { + "epoch": 98.04, + "learning_rate": 0.00012228070175438597, + "loss": 0.0008, + "step": 6569 + }, + { + "epoch": 98.06, + "learning_rate": 0.00012224561403508772, + "loss": 0.0014, + "step": 6570 + }, + { + "epoch": 98.07, + "learning_rate": 0.00012221052631578946, + "loss": 0.0008, + "step": 6571 + }, + { + "epoch": 98.09, + "learning_rate": 0.0001221754385964912, + "loss": 0.0008, + "step": 6572 + }, + { + "epoch": 98.1, + "learning_rate": 0.000122140350877193, + "loss": 0.0027, + "step": 6573 + }, + { + "epoch": 98.12, + "learning_rate": 0.0001221052631578947, + "loss": 0.0007, + "step": 6574 + }, + { + "epoch": 98.13, + "learning_rate": 0.00012207017543859649, + "loss": 0.001, + "step": 6575 + }, + { + "epoch": 98.15, + "learning_rate": 0.00012203508771929824, + "loss": 0.001, + "step": 6576 + }, + { + "epoch": 98.16, + "learning_rate": 0.000122, + "loss": 0.0014, + "step": 6577 + }, + { + "epoch": 98.18, + "learning_rate": 0.00012196491228070173, + "loss": 0.1129, + "step": 6578 + }, + { + "epoch": 98.19, + "learning_rate": 0.0001219298245614035, + "loss": 0.0015, + "step": 6579 + }, + { + "epoch": 98.21, + "learning_rate": 0.00012189473684210524, + "loss": 0.0031, + "step": 6580 + }, + { + "epoch": 98.22, + "learning_rate": 0.000121859649122807, + "loss": 0.0012, + "step": 6581 + }, + { + "epoch": 98.24, + "learning_rate": 0.00012182456140350877, + "loss": 0.0009, + "step": 6582 + }, + { + "epoch": 98.25, + "learning_rate": 0.00012178947368421052, + "loss": 0.0543, + "step": 6583 + }, + { + "epoch": 98.27, + "learning_rate": 0.00012175438596491228, + "loss": 0.0008, + "step": 6584 + }, + { + "epoch": 98.28, + "learning_rate": 0.00012171929824561403, + "loss": 0.0016, + "step": 6585 + }, + { + "epoch": 98.3, + "learning_rate": 0.00012168421052631579, + "loss": 0.0021, + "step": 6586 + }, + { + "epoch": 98.31, + "learning_rate": 0.00012164912280701753, + "loss": 0.2339, + "step": 6587 + }, + { + "epoch": 98.33, + "learning_rate": 0.00012161403508771929, + "loss": 0.0009, + "step": 6588 + }, + { + "epoch": 98.34, + "learning_rate": 0.00012157894736842104, + "loss": 0.0053, + "step": 6589 + }, + { + "epoch": 98.36, + "learning_rate": 0.0001215438596491228, + "loss": 0.001, + "step": 6590 + }, + { + "epoch": 98.37, + "learning_rate": 0.00012150877192982455, + "loss": 0.1344, + "step": 6591 + }, + { + "epoch": 98.39, + "learning_rate": 0.00012147368421052631, + "loss": 0.2065, + "step": 6592 + }, + { + "epoch": 98.4, + "learning_rate": 0.00012143859649122805, + "loss": 0.0011, + "step": 6593 + }, + { + "epoch": 98.42, + "learning_rate": 0.00012140350877192981, + "loss": 0.0007, + "step": 6594 + }, + { + "epoch": 98.43, + "learning_rate": 0.00012136842105263157, + "loss": 0.1541, + "step": 6595 + }, + { + "epoch": 98.45, + "learning_rate": 0.00012133333333333332, + "loss": 0.0015, + "step": 6596 + }, + { + "epoch": 98.46, + "learning_rate": 0.00012129824561403508, + "loss": 0.0012, + "step": 6597 + }, + { + "epoch": 98.48, + "learning_rate": 0.00012126315789473683, + "loss": 0.0543, + "step": 6598 + }, + { + "epoch": 98.49, + "learning_rate": 0.00012122807017543859, + "loss": 0.001, + "step": 6599 + }, + { + "epoch": 98.51, + "learning_rate": 0.00012119298245614034, + "loss": 0.0008, + "step": 6600 + }, + { + "epoch": 98.51, + "eval_accuracy": 0.8541360744003916, + "eval_f1": 0.8572461579693889, + "eval_loss": 0.7322733402252197, + "eval_runtime": 344.5259, + "eval_samples_per_second": 11.86, + "eval_steps_per_second": 0.743, + "step": 6600 + }, + { + "epoch": 98.52, + "learning_rate": 0.0001211578947368421, + "loss": 0.0381, + "step": 6601 + }, + { + "epoch": 98.54, + "learning_rate": 0.00012112280701754384, + "loss": 0.0028, + "step": 6602 + }, + { + "epoch": 98.55, + "learning_rate": 0.0001210877192982456, + "loss": 0.2096, + "step": 6603 + }, + { + "epoch": 98.57, + "learning_rate": 0.00012105263157894735, + "loss": 0.1052, + "step": 6604 + }, + { + "epoch": 98.58, + "learning_rate": 0.00012101754385964911, + "loss": 0.277, + "step": 6605 + }, + { + "epoch": 98.59, + "learning_rate": 0.00012098245614035086, + "loss": 0.0104, + "step": 6606 + }, + { + "epoch": 98.61, + "learning_rate": 0.00012094736842105262, + "loss": 0.0007, + "step": 6607 + }, + { + "epoch": 98.62, + "learning_rate": 0.00012091228070175439, + "loss": 0.004, + "step": 6608 + }, + { + "epoch": 98.64, + "learning_rate": 0.00012087719298245613, + "loss": 0.2461, + "step": 6609 + }, + { + "epoch": 98.65, + "learning_rate": 0.0001208421052631579, + "loss": 0.0009, + "step": 6610 + }, + { + "epoch": 98.67, + "learning_rate": 0.00012080701754385963, + "loss": 0.0634, + "step": 6611 + }, + { + "epoch": 98.68, + "learning_rate": 0.0001207719298245614, + "loss": 0.0097, + "step": 6612 + }, + { + "epoch": 98.7, + "learning_rate": 0.00012073684210526314, + "loss": 0.0934, + "step": 6613 + }, + { + "epoch": 98.71, + "learning_rate": 0.0001207017543859649, + "loss": 0.001, + "step": 6614 + }, + { + "epoch": 98.73, + "learning_rate": 0.00012066666666666665, + "loss": 0.0013, + "step": 6615 + }, + { + "epoch": 98.74, + "learning_rate": 0.00012063157894736842, + "loss": 0.0033, + "step": 6616 + }, + { + "epoch": 98.76, + "learning_rate": 0.00012059649122807015, + "loss": 0.0016, + "step": 6617 + }, + { + "epoch": 98.77, + "learning_rate": 0.00012056140350877191, + "loss": 0.023, + "step": 6618 + }, + { + "epoch": 98.79, + "learning_rate": 0.00012052631578947368, + "loss": 0.0018, + "step": 6619 + }, + { + "epoch": 98.8, + "learning_rate": 0.00012049122807017543, + "loss": 0.1118, + "step": 6620 + }, + { + "epoch": 98.82, + "learning_rate": 0.00012045614035087719, + "loss": 0.0055, + "step": 6621 + }, + { + "epoch": 98.83, + "learning_rate": 0.00012042105263157894, + "loss": 0.011, + "step": 6622 + }, + { + "epoch": 98.85, + "learning_rate": 0.0001203859649122807, + "loss": 0.0015, + "step": 6623 + }, + { + "epoch": 98.86, + "learning_rate": 0.00012035087719298245, + "loss": 0.2383, + "step": 6624 + }, + { + "epoch": 98.88, + "learning_rate": 0.00012031578947368421, + "loss": 0.0013, + "step": 6625 + }, + { + "epoch": 98.89, + "learning_rate": 0.00012028070175438595, + "loss": 0.0012, + "step": 6626 + }, + { + "epoch": 98.91, + "learning_rate": 0.00012024561403508771, + "loss": 0.0179, + "step": 6627 + }, + { + "epoch": 98.92, + "learning_rate": 0.00012021052631578946, + "loss": 0.0349, + "step": 6628 + }, + { + "epoch": 98.94, + "learning_rate": 0.00012017543859649122, + "loss": 0.0045, + "step": 6629 + }, + { + "epoch": 98.95, + "learning_rate": 0.00012014035087719297, + "loss": 0.0787, + "step": 6630 + }, + { + "epoch": 98.97, + "learning_rate": 0.00012010526315789473, + "loss": 0.0008, + "step": 6631 + }, + { + "epoch": 98.98, + "learning_rate": 0.00012007017543859649, + "loss": 0.1505, + "step": 6632 + }, + { + "epoch": 99.0, + "learning_rate": 0.00012003508771929824, + "loss": 0.0029, + "step": 6633 + }, + { + "epoch": 99.01, + "learning_rate": 0.00011999999999999999, + "loss": 0.0435, + "step": 6634 + }, + { + "epoch": 99.03, + "learning_rate": 0.00011996491228070174, + "loss": 0.0363, + "step": 6635 + }, + { + "epoch": 99.04, + "learning_rate": 0.0001199298245614035, + "loss": 0.0013, + "step": 6636 + }, + { + "epoch": 99.06, + "learning_rate": 0.00011989473684210525, + "loss": 0.177, + "step": 6637 + }, + { + "epoch": 99.07, + "learning_rate": 0.00011985964912280701, + "loss": 0.001, + "step": 6638 + }, + { + "epoch": 99.09, + "learning_rate": 0.00011982456140350876, + "loss": 0.0011, + "step": 6639 + }, + { + "epoch": 99.1, + "learning_rate": 0.00011978947368421052, + "loss": 0.0017, + "step": 6640 + }, + { + "epoch": 99.12, + "learning_rate": 0.00011975438596491226, + "loss": 0.0028, + "step": 6641 + }, + { + "epoch": 99.13, + "learning_rate": 0.00011971929824561402, + "loss": 0.0211, + "step": 6642 + }, + { + "epoch": 99.15, + "learning_rate": 0.00011968421052631577, + "loss": 0.0025, + "step": 6643 + }, + { + "epoch": 99.16, + "learning_rate": 0.00011964912280701753, + "loss": 0.001, + "step": 6644 + }, + { + "epoch": 99.18, + "learning_rate": 0.0001196140350877193, + "loss": 0.0008, + "step": 6645 + }, + { + "epoch": 99.19, + "learning_rate": 0.00011957894736842104, + "loss": 0.0013, + "step": 6646 + }, + { + "epoch": 99.21, + "learning_rate": 0.0001195438596491228, + "loss": 0.0011, + "step": 6647 + }, + { + "epoch": 99.22, + "learning_rate": 0.00011950877192982455, + "loss": 0.3039, + "step": 6648 + }, + { + "epoch": 99.24, + "learning_rate": 0.00011947368421052632, + "loss": 0.0019, + "step": 6649 + }, + { + "epoch": 99.25, + "learning_rate": 0.00011943859649122805, + "loss": 0.1022, + "step": 6650 + }, + { + "epoch": 99.27, + "learning_rate": 0.00011940350877192981, + "loss": 0.0957, + "step": 6651 + }, + { + "epoch": 99.28, + "learning_rate": 0.00011936842105263156, + "loss": 0.1441, + "step": 6652 + }, + { + "epoch": 99.3, + "learning_rate": 0.00011933333333333332, + "loss": 0.2018, + "step": 6653 + }, + { + "epoch": 99.31, + "learning_rate": 0.00011929824561403507, + "loss": 0.0094, + "step": 6654 + }, + { + "epoch": 99.33, + "learning_rate": 0.00011926315789473684, + "loss": 0.0008, + "step": 6655 + }, + { + "epoch": 99.34, + "learning_rate": 0.00011922807017543858, + "loss": 0.0024, + "step": 6656 + }, + { + "epoch": 99.36, + "learning_rate": 0.00011919298245614035, + "loss": 0.0005, + "step": 6657 + }, + { + "epoch": 99.37, + "learning_rate": 0.0001191578947368421, + "loss": 0.0171, + "step": 6658 + }, + { + "epoch": 99.39, + "learning_rate": 0.00011912280701754384, + "loss": 0.0042, + "step": 6659 + }, + { + "epoch": 99.4, + "learning_rate": 0.00011908771929824561, + "loss": 0.1014, + "step": 6660 + }, + { + "epoch": 99.42, + "learning_rate": 0.00011905263157894736, + "loss": 0.0596, + "step": 6661 + }, + { + "epoch": 99.43, + "learning_rate": 0.00011901754385964912, + "loss": 0.0012, + "step": 6662 + }, + { + "epoch": 99.45, + "learning_rate": 0.00011898245614035087, + "loss": 0.001, + "step": 6663 + }, + { + "epoch": 99.46, + "learning_rate": 0.00011894736842105263, + "loss": 0.0005, + "step": 6664 + }, + { + "epoch": 99.48, + "learning_rate": 0.00011891228070175436, + "loss": 0.0008, + "step": 6665 + }, + { + "epoch": 99.49, + "learning_rate": 0.00011887719298245613, + "loss": 0.0008, + "step": 6666 + }, + { + "epoch": 99.51, + "learning_rate": 0.00011884210526315788, + "loss": 0.0011, + "step": 6667 + }, + { + "epoch": 99.52, + "learning_rate": 0.00011880701754385964, + "loss": 0.0267, + "step": 6668 + }, + { + "epoch": 99.54, + "learning_rate": 0.00011877192982456139, + "loss": 0.2269, + "step": 6669 + }, + { + "epoch": 99.55, + "learning_rate": 0.00011873684210526315, + "loss": 0.0203, + "step": 6670 + }, + { + "epoch": 99.57, + "learning_rate": 0.00011870175438596491, + "loss": 0.0008, + "step": 6671 + }, + { + "epoch": 99.58, + "learning_rate": 0.00011866666666666666, + "loss": 0.0025, + "step": 6672 + }, + { + "epoch": 99.59, + "learning_rate": 0.00011863157894736842, + "loss": 0.001, + "step": 6673 + }, + { + "epoch": 99.61, + "learning_rate": 0.00011859649122807016, + "loss": 0.0006, + "step": 6674 + }, + { + "epoch": 99.62, + "learning_rate": 0.00011856140350877192, + "loss": 0.0052, + "step": 6675 + }, + { + "epoch": 99.64, + "learning_rate": 0.00011852631578947367, + "loss": 0.0008, + "step": 6676 + }, + { + "epoch": 99.65, + "learning_rate": 0.00011849122807017543, + "loss": 0.0006, + "step": 6677 + }, + { + "epoch": 99.67, + "learning_rate": 0.00011845614035087718, + "loss": 0.1664, + "step": 6678 + }, + { + "epoch": 99.68, + "learning_rate": 0.00011842105263157894, + "loss": 0.0015, + "step": 6679 + }, + { + "epoch": 99.7, + "learning_rate": 0.00011838596491228069, + "loss": 0.0011, + "step": 6680 + }, + { + "epoch": 99.71, + "learning_rate": 0.00011835087719298244, + "loss": 0.0046, + "step": 6681 + }, + { + "epoch": 99.73, + "learning_rate": 0.0001183157894736842, + "loss": 0.0009, + "step": 6682 + }, + { + "epoch": 99.74, + "learning_rate": 0.00011828070175438595, + "loss": 0.0322, + "step": 6683 + }, + { + "epoch": 99.76, + "learning_rate": 0.00011824561403508771, + "loss": 0.0009, + "step": 6684 + }, + { + "epoch": 99.77, + "learning_rate": 0.00011821052631578946, + "loss": 0.0012, + "step": 6685 + }, + { + "epoch": 99.79, + "learning_rate": 0.00011817543859649122, + "loss": 0.0545, + "step": 6686 + }, + { + "epoch": 99.8, + "learning_rate": 0.00011814035087719297, + "loss": 0.0012, + "step": 6687 + }, + { + "epoch": 99.82, + "learning_rate": 0.00011810526315789474, + "loss": 0.0016, + "step": 6688 + }, + { + "epoch": 99.83, + "learning_rate": 0.00011807017543859647, + "loss": 0.007, + "step": 6689 + }, + { + "epoch": 99.85, + "learning_rate": 0.00011803508771929823, + "loss": 0.0365, + "step": 6690 + }, + { + "epoch": 99.86, + "learning_rate": 0.00011799999999999998, + "loss": 0.1005, + "step": 6691 + }, + { + "epoch": 99.88, + "learning_rate": 0.00011796491228070174, + "loss": 0.001, + "step": 6692 + }, + { + "epoch": 99.89, + "learning_rate": 0.00011792982456140349, + "loss": 0.0126, + "step": 6693 + }, + { + "epoch": 99.91, + "learning_rate": 0.00011789473684210525, + "loss": 0.0256, + "step": 6694 + }, + { + "epoch": 99.92, + "learning_rate": 0.00011785964912280702, + "loss": 0.1166, + "step": 6695 + }, + { + "epoch": 99.94, + "learning_rate": 0.00011782456140350877, + "loss": 0.0006, + "step": 6696 + }, + { + "epoch": 99.95, + "learning_rate": 0.00011778947368421053, + "loss": 0.0009, + "step": 6697 + }, + { + "epoch": 99.97, + "learning_rate": 0.00011775438596491226, + "loss": 0.0018, + "step": 6698 + }, + { + "epoch": 99.98, + "learning_rate": 0.00011771929824561403, + "loss": 0.001, + "step": 6699 + }, + { + "epoch": 100.0, + "learning_rate": 0.00011768421052631577, + "loss": 0.0008, + "step": 6700 + }, + { + "epoch": 100.01, + "learning_rate": 0.00011764912280701754, + "loss": 0.0034, + "step": 6701 + }, + { + "epoch": 100.03, + "learning_rate": 0.00011761403508771929, + "loss": 0.0061, + "step": 6702 + }, + { + "epoch": 100.04, + "learning_rate": 0.00011757894736842105, + "loss": 0.0006, + "step": 6703 + }, + { + "epoch": 100.06, + "learning_rate": 0.0001175438596491228, + "loss": 0.0007, + "step": 6704 + }, + { + "epoch": 100.07, + "learning_rate": 0.00011750877192982455, + "loss": 0.0068, + "step": 6705 + }, + { + "epoch": 100.09, + "learning_rate": 0.0001174736842105263, + "loss": 0.1123, + "step": 6706 + }, + { + "epoch": 100.1, + "learning_rate": 0.00011743859649122806, + "loss": 0.0011, + "step": 6707 + }, + { + "epoch": 100.12, + "learning_rate": 0.00011740350877192982, + "loss": 0.0603, + "step": 6708 + }, + { + "epoch": 100.13, + "learning_rate": 0.00011736842105263157, + "loss": 0.0008, + "step": 6709 + }, + { + "epoch": 100.15, + "learning_rate": 0.00011733333333333333, + "loss": 0.0006, + "step": 6710 + }, + { + "epoch": 100.16, + "learning_rate": 0.00011729824561403508, + "loss": 0.0008, + "step": 6711 + }, + { + "epoch": 100.18, + "learning_rate": 0.00011726315789473684, + "loss": 0.2151, + "step": 6712 + }, + { + "epoch": 100.19, + "learning_rate": 0.00011722807017543858, + "loss": 0.0026, + "step": 6713 + }, + { + "epoch": 100.21, + "learning_rate": 0.00011719298245614034, + "loss": 0.0011, + "step": 6714 + }, + { + "epoch": 100.22, + "learning_rate": 0.00011715789473684209, + "loss": 0.0007, + "step": 6715 + }, + { + "epoch": 100.24, + "learning_rate": 0.00011712280701754385, + "loss": 0.0009, + "step": 6716 + }, + { + "epoch": 100.25, + "learning_rate": 0.0001170877192982456, + "loss": 0.0048, + "step": 6717 + }, + { + "epoch": 100.27, + "learning_rate": 0.00011705263157894736, + "loss": 0.1761, + "step": 6718 + }, + { + "epoch": 100.28, + "learning_rate": 0.00011701754385964911, + "loss": 0.0009, + "step": 6719 + }, + { + "epoch": 100.3, + "learning_rate": 0.00011698245614035087, + "loss": 0.0514, + "step": 6720 + }, + { + "epoch": 100.31, + "learning_rate": 0.00011694736842105263, + "loss": 0.063, + "step": 6721 + }, + { + "epoch": 100.33, + "learning_rate": 0.00011691228070175437, + "loss": 0.0164, + "step": 6722 + }, + { + "epoch": 100.34, + "learning_rate": 0.00011687719298245613, + "loss": 0.0022, + "step": 6723 + }, + { + "epoch": 100.36, + "learning_rate": 0.00011684210526315788, + "loss": 0.0007, + "step": 6724 + }, + { + "epoch": 100.37, + "learning_rate": 0.00011680701754385964, + "loss": 0.0019, + "step": 6725 + }, + { + "epoch": 100.39, + "learning_rate": 0.00011677192982456139, + "loss": 0.0429, + "step": 6726 + }, + { + "epoch": 100.4, + "learning_rate": 0.00011673684210526315, + "loss": 0.0109, + "step": 6727 + }, + { + "epoch": 100.42, + "learning_rate": 0.00011670175438596489, + "loss": 0.0123, + "step": 6728 + }, + { + "epoch": 100.43, + "learning_rate": 0.00011666666666666665, + "loss": 0.0008, + "step": 6729 + }, + { + "epoch": 100.45, + "learning_rate": 0.0001166315789473684, + "loss": 0.0429, + "step": 6730 + }, + { + "epoch": 100.46, + "learning_rate": 0.00011659649122807016, + "loss": 0.3201, + "step": 6731 + }, + { + "epoch": 100.48, + "learning_rate": 0.00011656140350877193, + "loss": 0.0026, + "step": 6732 + }, + { + "epoch": 100.49, + "learning_rate": 0.00011652631578947367, + "loss": 0.002, + "step": 6733 + }, + { + "epoch": 100.51, + "learning_rate": 0.00011649122807017544, + "loss": 0.017, + "step": 6734 + }, + { + "epoch": 100.52, + "learning_rate": 0.00011645614035087719, + "loss": 0.0023, + "step": 6735 + }, + { + "epoch": 100.54, + "learning_rate": 0.00011642105263157895, + "loss": 0.0273, + "step": 6736 + }, + { + "epoch": 100.55, + "learning_rate": 0.00011638596491228068, + "loss": 0.0007, + "step": 6737 + }, + { + "epoch": 100.57, + "learning_rate": 0.00011635087719298245, + "loss": 0.0015, + "step": 6738 + }, + { + "epoch": 100.58, + "learning_rate": 0.0001163157894736842, + "loss": 0.0036, + "step": 6739 + }, + { + "epoch": 100.59, + "learning_rate": 0.00011628070175438596, + "loss": 0.0009, + "step": 6740 + }, + { + "epoch": 100.61, + "learning_rate": 0.0001162456140350877, + "loss": 0.0008, + "step": 6741 + }, + { + "epoch": 100.62, + "learning_rate": 0.00011621052631578947, + "loss": 0.0743, + "step": 6742 + }, + { + "epoch": 100.64, + "learning_rate": 0.00011617543859649122, + "loss": 0.0029, + "step": 6743 + }, + { + "epoch": 100.65, + "learning_rate": 0.00011614035087719298, + "loss": 0.0018, + "step": 6744 + }, + { + "epoch": 100.67, + "learning_rate": 0.00011610526315789474, + "loss": 0.0005, + "step": 6745 + }, + { + "epoch": 100.68, + "learning_rate": 0.00011607017543859648, + "loss": 0.0006, + "step": 6746 + }, + { + "epoch": 100.7, + "learning_rate": 0.00011603508771929824, + "loss": 0.0709, + "step": 6747 + }, + { + "epoch": 100.71, + "learning_rate": 0.00011599999999999999, + "loss": 0.0006, + "step": 6748 + }, + { + "epoch": 100.73, + "learning_rate": 0.00011596491228070175, + "loss": 0.135, + "step": 6749 + }, + { + "epoch": 100.74, + "learning_rate": 0.0001159298245614035, + "loss": 0.0049, + "step": 6750 + }, + { + "epoch": 100.76, + "learning_rate": 0.00011589473684210526, + "loss": 0.0007, + "step": 6751 + }, + { + "epoch": 100.77, + "learning_rate": 0.000115859649122807, + "loss": 0.0038, + "step": 6752 + }, + { + "epoch": 100.79, + "learning_rate": 0.00011582456140350876, + "loss": 0.0005, + "step": 6753 + }, + { + "epoch": 100.8, + "learning_rate": 0.00011578947368421051, + "loss": 0.0006, + "step": 6754 + }, + { + "epoch": 100.82, + "learning_rate": 0.00011575438596491227, + "loss": 0.0157, + "step": 6755 + }, + { + "epoch": 100.83, + "learning_rate": 0.00011571929824561402, + "loss": 0.0022, + "step": 6756 + }, + { + "epoch": 100.85, + "learning_rate": 0.00011568421052631578, + "loss": 0.0006, + "step": 6757 + }, + { + "epoch": 100.86, + "learning_rate": 0.00011564912280701754, + "loss": 0.2283, + "step": 6758 + }, + { + "epoch": 100.88, + "learning_rate": 0.00011561403508771929, + "loss": 0.0035, + "step": 6759 + }, + { + "epoch": 100.89, + "learning_rate": 0.00011557894736842105, + "loss": 0.001, + "step": 6760 + }, + { + "epoch": 100.91, + "learning_rate": 0.00011554385964912279, + "loss": 0.0009, + "step": 6761 + }, + { + "epoch": 100.92, + "learning_rate": 0.00011550877192982455, + "loss": 0.0014, + "step": 6762 + }, + { + "epoch": 100.94, + "learning_rate": 0.0001154736842105263, + "loss": 0.0028, + "step": 6763 + }, + { + "epoch": 100.95, + "learning_rate": 0.00011543859649122806, + "loss": 0.0009, + "step": 6764 + }, + { + "epoch": 100.97, + "learning_rate": 0.00011540350877192981, + "loss": 0.0024, + "step": 6765 + }, + { + "epoch": 100.98, + "learning_rate": 0.00011536842105263157, + "loss": 0.0009, + "step": 6766 + }, + { + "epoch": 101.0, + "learning_rate": 0.00011533333333333332, + "loss": 0.0269, + "step": 6767 + }, + { + "epoch": 101.01, + "learning_rate": 0.00011529824561403508, + "loss": 0.0018, + "step": 6768 + }, + { + "epoch": 101.03, + "learning_rate": 0.00011526315789473682, + "loss": 0.002, + "step": 6769 + }, + { + "epoch": 101.04, + "learning_rate": 0.00011522807017543858, + "loss": 0.1631, + "step": 6770 + }, + { + "epoch": 101.06, + "learning_rate": 0.00011519298245614034, + "loss": 0.0057, + "step": 6771 + }, + { + "epoch": 101.07, + "learning_rate": 0.0001151578947368421, + "loss": 0.0009, + "step": 6772 + }, + { + "epoch": 101.09, + "learning_rate": 0.00011512280701754386, + "loss": 0.1869, + "step": 6773 + }, + { + "epoch": 101.1, + "learning_rate": 0.0001150877192982456, + "loss": 0.0018, + "step": 6774 + }, + { + "epoch": 101.12, + "learning_rate": 0.00011505263157894737, + "loss": 0.0007, + "step": 6775 + }, + { + "epoch": 101.13, + "learning_rate": 0.0001150175438596491, + "loss": 0.0008, + "step": 6776 + }, + { + "epoch": 101.15, + "learning_rate": 0.00011498245614035086, + "loss": 0.0088, + "step": 6777 + }, + { + "epoch": 101.16, + "learning_rate": 0.00011494736842105261, + "loss": 0.0016, + "step": 6778 + }, + { + "epoch": 101.18, + "learning_rate": 0.00011491228070175438, + "loss": 0.0006, + "step": 6779 + }, + { + "epoch": 101.19, + "learning_rate": 0.00011487719298245612, + "loss": 0.0011, + "step": 6780 + }, + { + "epoch": 101.21, + "learning_rate": 0.00011484210526315789, + "loss": 0.0407, + "step": 6781 + }, + { + "epoch": 101.22, + "learning_rate": 0.00011480701754385965, + "loss": 0.0213, + "step": 6782 + }, + { + "epoch": 101.24, + "learning_rate": 0.0001147719298245614, + "loss": 0.0024, + "step": 6783 + }, + { + "epoch": 101.25, + "learning_rate": 0.00011473684210526316, + "loss": 0.0057, + "step": 6784 + }, + { + "epoch": 101.27, + "learning_rate": 0.0001147017543859649, + "loss": 0.0007, + "step": 6785 + }, + { + "epoch": 101.28, + "learning_rate": 0.00011466666666666666, + "loss": 0.0972, + "step": 6786 + }, + { + "epoch": 101.3, + "learning_rate": 0.0001146315789473684, + "loss": 0.1075, + "step": 6787 + }, + { + "epoch": 101.31, + "learning_rate": 0.00011459649122807017, + "loss": 0.0015, + "step": 6788 + }, + { + "epoch": 101.33, + "learning_rate": 0.00011456140350877192, + "loss": 0.0007, + "step": 6789 + }, + { + "epoch": 101.34, + "learning_rate": 0.00011452631578947368, + "loss": 0.0006, + "step": 6790 + }, + { + "epoch": 101.36, + "learning_rate": 0.00011449122807017543, + "loss": 0.0019, + "step": 6791 + }, + { + "epoch": 101.37, + "learning_rate": 0.00011445614035087719, + "loss": 0.0008, + "step": 6792 + }, + { + "epoch": 101.39, + "learning_rate": 0.00011442105263157893, + "loss": 0.2029, + "step": 6793 + }, + { + "epoch": 101.4, + "learning_rate": 0.00011438596491228069, + "loss": 0.0061, + "step": 6794 + }, + { + "epoch": 101.42, + "learning_rate": 0.00011435087719298245, + "loss": 0.0006, + "step": 6795 + }, + { + "epoch": 101.43, + "learning_rate": 0.0001143157894736842, + "loss": 0.0015, + "step": 6796 + }, + { + "epoch": 101.45, + "learning_rate": 0.00011428070175438596, + "loss": 0.0066, + "step": 6797 + }, + { + "epoch": 101.46, + "learning_rate": 0.00011424561403508771, + "loss": 0.0121, + "step": 6798 + }, + { + "epoch": 101.48, + "learning_rate": 0.00011421052631578947, + "loss": 0.006, + "step": 6799 + }, + { + "epoch": 101.49, + "learning_rate": 0.00011417543859649121, + "loss": 0.1655, + "step": 6800 + }, + { + "epoch": 101.49, + "eval_accuracy": 0.8504650024473813, + "eval_f1": 0.8521261710057196, + "eval_loss": 0.6953144073486328, + "eval_runtime": 343.8957, + "eval_samples_per_second": 11.882, + "eval_steps_per_second": 0.744, + "step": 6800 + }, + { + "epoch": 101.51, + "learning_rate": 0.00011414035087719297, + "loss": 0.004, + "step": 6801 + }, + { + "epoch": 101.52, + "learning_rate": 0.00011410526315789472, + "loss": 0.1086, + "step": 6802 + }, + { + "epoch": 101.54, + "learning_rate": 0.00011407017543859648, + "loss": 0.0871, + "step": 6803 + }, + { + "epoch": 101.55, + "learning_rate": 0.00011403508771929823, + "loss": 0.071, + "step": 6804 + }, + { + "epoch": 101.57, + "learning_rate": 0.00011399999999999999, + "loss": 0.075, + "step": 6805 + }, + { + "epoch": 101.58, + "learning_rate": 0.00011396491228070174, + "loss": 0.0009, + "step": 6806 + }, + { + "epoch": 101.59, + "learning_rate": 0.0001139298245614035, + "loss": 0.0031, + "step": 6807 + }, + { + "epoch": 101.61, + "learning_rate": 0.00011389473684210527, + "loss": 0.0009, + "step": 6808 + }, + { + "epoch": 101.62, + "learning_rate": 0.000113859649122807, + "loss": 0.0013, + "step": 6809 + }, + { + "epoch": 101.64, + "learning_rate": 0.00011382456140350876, + "loss": 0.0874, + "step": 6810 + }, + { + "epoch": 101.65, + "learning_rate": 0.00011378947368421051, + "loss": 0.0005, + "step": 6811 + }, + { + "epoch": 101.67, + "learning_rate": 0.00011375438596491227, + "loss": 0.02, + "step": 6812 + }, + { + "epoch": 101.68, + "learning_rate": 0.00011371929824561402, + "loss": 0.0116, + "step": 6813 + }, + { + "epoch": 101.7, + "learning_rate": 0.00011368421052631579, + "loss": 0.001, + "step": 6814 + }, + { + "epoch": 101.71, + "learning_rate": 0.00011364912280701753, + "loss": 0.0017, + "step": 6815 + }, + { + "epoch": 101.73, + "learning_rate": 0.0001136140350877193, + "loss": 0.0016, + "step": 6816 + }, + { + "epoch": 101.74, + "learning_rate": 0.00011357894736842103, + "loss": 0.0042, + "step": 6817 + }, + { + "epoch": 101.76, + "learning_rate": 0.0001135438596491228, + "loss": 0.0054, + "step": 6818 + }, + { + "epoch": 101.77, + "learning_rate": 0.00011350877192982454, + "loss": 0.0103, + "step": 6819 + }, + { + "epoch": 101.79, + "learning_rate": 0.0001134736842105263, + "loss": 0.0009, + "step": 6820 + }, + { + "epoch": 101.8, + "learning_rate": 0.00011343859649122807, + "loss": 0.0088, + "step": 6821 + }, + { + "epoch": 101.82, + "learning_rate": 0.00011340350877192982, + "loss": 0.0007, + "step": 6822 + }, + { + "epoch": 101.83, + "learning_rate": 0.00011336842105263158, + "loss": 0.0122, + "step": 6823 + }, + { + "epoch": 101.85, + "learning_rate": 0.00011333333333333331, + "loss": 0.0046, + "step": 6824 + }, + { + "epoch": 101.86, + "learning_rate": 0.00011329824561403508, + "loss": 0.0005, + "step": 6825 + }, + { + "epoch": 101.88, + "learning_rate": 0.00011326315789473683, + "loss": 0.001, + "step": 6826 + }, + { + "epoch": 101.89, + "learning_rate": 0.00011322807017543859, + "loss": 0.0014, + "step": 6827 + }, + { + "epoch": 101.91, + "learning_rate": 0.00011319298245614034, + "loss": 0.0048, + "step": 6828 + }, + { + "epoch": 101.92, + "learning_rate": 0.0001131578947368421, + "loss": 0.0039, + "step": 6829 + }, + { + "epoch": 101.94, + "learning_rate": 0.00011312280701754385, + "loss": 0.0007, + "step": 6830 + }, + { + "epoch": 101.95, + "learning_rate": 0.00011308771929824561, + "loss": 0.1849, + "step": 6831 + }, + { + "epoch": 101.97, + "learning_rate": 0.00011305263157894735, + "loss": 0.0843, + "step": 6832 + }, + { + "epoch": 101.98, + "learning_rate": 0.00011301754385964911, + "loss": 0.0008, + "step": 6833 + }, + { + "epoch": 102.0, + "learning_rate": 0.00011298245614035087, + "loss": 0.0574, + "step": 6834 + }, + { + "epoch": 102.01, + "learning_rate": 0.00011294736842105262, + "loss": 0.0702, + "step": 6835 + }, + { + "epoch": 102.03, + "learning_rate": 0.00011291228070175438, + "loss": 0.0056, + "step": 6836 + }, + { + "epoch": 102.04, + "learning_rate": 0.00011287719298245613, + "loss": 0.0005, + "step": 6837 + }, + { + "epoch": 102.06, + "learning_rate": 0.00011284210526315789, + "loss": 0.0851, + "step": 6838 + }, + { + "epoch": 102.07, + "learning_rate": 0.00011280701754385964, + "loss": 0.0006, + "step": 6839 + }, + { + "epoch": 102.09, + "learning_rate": 0.0001127719298245614, + "loss": 0.0006, + "step": 6840 + }, + { + "epoch": 102.1, + "learning_rate": 0.00011273684210526314, + "loss": 0.0008, + "step": 6841 + }, + { + "epoch": 102.12, + "learning_rate": 0.0001127017543859649, + "loss": 0.0299, + "step": 6842 + }, + { + "epoch": 102.13, + "learning_rate": 0.00011266666666666665, + "loss": 0.0005, + "step": 6843 + }, + { + "epoch": 102.15, + "learning_rate": 0.00011263157894736841, + "loss": 0.0631, + "step": 6844 + }, + { + "epoch": 102.16, + "learning_rate": 0.00011259649122807017, + "loss": 0.0007, + "step": 6845 + }, + { + "epoch": 102.18, + "learning_rate": 0.00011256140350877192, + "loss": 0.0005, + "step": 6846 + }, + { + "epoch": 102.19, + "learning_rate": 0.00011252631578947369, + "loss": 0.0008, + "step": 6847 + }, + { + "epoch": 102.21, + "learning_rate": 0.00011249122807017542, + "loss": 0.0005, + "step": 6848 + }, + { + "epoch": 102.22, + "learning_rate": 0.00011245614035087718, + "loss": 0.0006, + "step": 6849 + }, + { + "epoch": 102.24, + "learning_rate": 0.00011242105263157893, + "loss": 0.0006, + "step": 6850 + }, + { + "epoch": 102.25, + "learning_rate": 0.0001123859649122807, + "loss": 0.0006, + "step": 6851 + }, + { + "epoch": 102.27, + "learning_rate": 0.00011235087719298244, + "loss": 0.0018, + "step": 6852 + }, + { + "epoch": 102.28, + "learning_rate": 0.0001123157894736842, + "loss": 0.0006, + "step": 6853 + }, + { + "epoch": 102.3, + "learning_rate": 0.00011228070175438595, + "loss": 0.0616, + "step": 6854 + }, + { + "epoch": 102.31, + "learning_rate": 0.00011224561403508772, + "loss": 0.0104, + "step": 6855 + }, + { + "epoch": 102.33, + "learning_rate": 0.00011221052631578945, + "loss": 0.0321, + "step": 6856 + }, + { + "epoch": 102.34, + "learning_rate": 0.00011217543859649121, + "loss": 0.0013, + "step": 6857 + }, + { + "epoch": 102.36, + "learning_rate": 0.00011214035087719298, + "loss": 0.0983, + "step": 6858 + }, + { + "epoch": 102.37, + "learning_rate": 0.00011210526315789472, + "loss": 0.0005, + "step": 6859 + }, + { + "epoch": 102.39, + "learning_rate": 0.00011207017543859649, + "loss": 0.0901, + "step": 6860 + }, + { + "epoch": 102.4, + "learning_rate": 0.00011203508771929824, + "loss": 0.078, + "step": 6861 + }, + { + "epoch": 102.42, + "learning_rate": 0.000112, + "loss": 0.0034, + "step": 6862 + }, + { + "epoch": 102.43, + "learning_rate": 0.00011196491228070175, + "loss": 0.0006, + "step": 6863 + }, + { + "epoch": 102.45, + "learning_rate": 0.00011192982456140351, + "loss": 0.0079, + "step": 6864 + }, + { + "epoch": 102.46, + "learning_rate": 0.00011189473684210524, + "loss": 0.0381, + "step": 6865 + }, + { + "epoch": 102.48, + "learning_rate": 0.00011185964912280701, + "loss": 0.0005, + "step": 6866 + }, + { + "epoch": 102.49, + "learning_rate": 0.00011182456140350876, + "loss": 0.0005, + "step": 6867 + }, + { + "epoch": 102.51, + "learning_rate": 0.00011178947368421052, + "loss": 0.0006, + "step": 6868 + }, + { + "epoch": 102.52, + "learning_rate": 0.00011175438596491227, + "loss": 0.0259, + "step": 6869 + }, + { + "epoch": 102.54, + "learning_rate": 0.00011171929824561403, + "loss": 0.0227, + "step": 6870 + }, + { + "epoch": 102.55, + "learning_rate": 0.00011168421052631579, + "loss": 0.0005, + "step": 6871 + }, + { + "epoch": 102.57, + "learning_rate": 0.00011164912280701753, + "loss": 0.0165, + "step": 6872 + }, + { + "epoch": 102.58, + "learning_rate": 0.00011161403508771929, + "loss": 0.0009, + "step": 6873 + }, + { + "epoch": 102.59, + "learning_rate": 0.00011157894736842104, + "loss": 0.0006, + "step": 6874 + }, + { + "epoch": 102.61, + "learning_rate": 0.0001115438596491228, + "loss": 0.0014, + "step": 6875 + }, + { + "epoch": 102.62, + "learning_rate": 0.00011150877192982455, + "loss": 0.2293, + "step": 6876 + }, + { + "epoch": 102.64, + "learning_rate": 0.00011147368421052631, + "loss": 0.0006, + "step": 6877 + }, + { + "epoch": 102.65, + "learning_rate": 0.00011143859649122806, + "loss": 0.0031, + "step": 6878 + }, + { + "epoch": 102.67, + "learning_rate": 0.00011140350877192982, + "loss": 0.0006, + "step": 6879 + }, + { + "epoch": 102.68, + "learning_rate": 0.00011136842105263156, + "loss": 0.0005, + "step": 6880 + }, + { + "epoch": 102.7, + "learning_rate": 0.00011133333333333332, + "loss": 0.0006, + "step": 6881 + }, + { + "epoch": 102.71, + "learning_rate": 0.00011129824561403507, + "loss": 0.0009, + "step": 6882 + }, + { + "epoch": 102.73, + "learning_rate": 0.00011126315789473683, + "loss": 0.0055, + "step": 6883 + }, + { + "epoch": 102.74, + "learning_rate": 0.0001112280701754386, + "loss": 0.029, + "step": 6884 + }, + { + "epoch": 102.76, + "learning_rate": 0.00011119298245614034, + "loss": 0.0482, + "step": 6885 + }, + { + "epoch": 102.77, + "learning_rate": 0.0001111578947368421, + "loss": 0.0178, + "step": 6886 + }, + { + "epoch": 102.79, + "learning_rate": 0.00011112280701754385, + "loss": 0.0007, + "step": 6887 + }, + { + "epoch": 102.8, + "learning_rate": 0.0001110877192982456, + "loss": 0.0005, + "step": 6888 + }, + { + "epoch": 102.82, + "learning_rate": 0.00011105263157894735, + "loss": 0.0006, + "step": 6889 + }, + { + "epoch": 102.83, + "learning_rate": 0.00011101754385964911, + "loss": 0.0014, + "step": 6890 + }, + { + "epoch": 102.85, + "learning_rate": 0.00011098245614035086, + "loss": 0.0006, + "step": 6891 + }, + { + "epoch": 102.86, + "learning_rate": 0.00011094736842105262, + "loss": 0.0005, + "step": 6892 + }, + { + "epoch": 102.88, + "learning_rate": 0.00011091228070175437, + "loss": 0.0545, + "step": 6893 + }, + { + "epoch": 102.89, + "learning_rate": 0.00011087719298245614, + "loss": 0.018, + "step": 6894 + }, + { + "epoch": 102.91, + "learning_rate": 0.0001108421052631579, + "loss": 0.0625, + "step": 6895 + }, + { + "epoch": 102.92, + "learning_rate": 0.00011080701754385963, + "loss": 0.0008, + "step": 6896 + }, + { + "epoch": 102.94, + "learning_rate": 0.0001107719298245614, + "loss": 0.0009, + "step": 6897 + }, + { + "epoch": 102.95, + "learning_rate": 0.00011073684210526314, + "loss": 0.0486, + "step": 6898 + }, + { + "epoch": 102.97, + "learning_rate": 0.0001107017543859649, + "loss": 0.0005, + "step": 6899 + }, + { + "epoch": 102.98, + "learning_rate": 0.00011066666666666666, + "loss": 0.0007, + "step": 6900 + }, + { + "epoch": 103.0, + "learning_rate": 0.00011063157894736842, + "loss": 0.0005, + "step": 6901 + }, + { + "epoch": 103.01, + "learning_rate": 0.00011059649122807017, + "loss": 0.0022, + "step": 6902 + }, + { + "epoch": 103.03, + "learning_rate": 0.00011056140350877193, + "loss": 0.0014, + "step": 6903 + }, + { + "epoch": 103.04, + "learning_rate": 0.00011052631578947366, + "loss": 0.001, + "step": 6904 + }, + { + "epoch": 103.06, + "learning_rate": 0.00011049122807017543, + "loss": 0.0007, + "step": 6905 + }, + { + "epoch": 103.07, + "learning_rate": 0.00011045614035087717, + "loss": 0.0019, + "step": 6906 + }, + { + "epoch": 103.09, + "learning_rate": 0.00011042105263157894, + "loss": 0.0005, + "step": 6907 + }, + { + "epoch": 103.1, + "learning_rate": 0.0001103859649122807, + "loss": 0.0007, + "step": 6908 + }, + { + "epoch": 103.12, + "learning_rate": 0.00011035087719298245, + "loss": 0.0418, + "step": 6909 + }, + { + "epoch": 103.13, + "learning_rate": 0.00011031578947368421, + "loss": 0.0005, + "step": 6910 + }, + { + "epoch": 103.15, + "learning_rate": 0.00011028070175438596, + "loss": 0.0019, + "step": 6911 + }, + { + "epoch": 103.16, + "learning_rate": 0.00011024561403508771, + "loss": 0.0004, + "step": 6912 + }, + { + "epoch": 103.18, + "learning_rate": 0.00011021052631578946, + "loss": 0.0004, + "step": 6913 + }, + { + "epoch": 103.19, + "learning_rate": 0.00011017543859649122, + "loss": 0.0004, + "step": 6914 + }, + { + "epoch": 103.21, + "learning_rate": 0.00011014035087719297, + "loss": 0.0005, + "step": 6915 + }, + { + "epoch": 103.22, + "learning_rate": 0.00011010526315789473, + "loss": 0.0005, + "step": 6916 + }, + { + "epoch": 103.24, + "learning_rate": 0.00011007017543859648, + "loss": 0.0011, + "step": 6917 + }, + { + "epoch": 103.25, + "learning_rate": 0.00011003508771929824, + "loss": 0.0006, + "step": 6918 + }, + { + "epoch": 103.27, + "learning_rate": 0.00010999999999999998, + "loss": 0.0006, + "step": 6919 + }, + { + "epoch": 103.28, + "learning_rate": 0.00010996491228070174, + "loss": 0.0004, + "step": 6920 + }, + { + "epoch": 103.3, + "learning_rate": 0.0001099298245614035, + "loss": 0.0006, + "step": 6921 + }, + { + "epoch": 103.31, + "learning_rate": 0.00010989473684210525, + "loss": 0.0006, + "step": 6922 + }, + { + "epoch": 103.33, + "learning_rate": 0.00010985964912280701, + "loss": 0.0007, + "step": 6923 + }, + { + "epoch": 103.34, + "learning_rate": 0.00010982456140350876, + "loss": 0.008, + "step": 6924 + }, + { + "epoch": 103.36, + "learning_rate": 0.00010978947368421052, + "loss": 0.0014, + "step": 6925 + }, + { + "epoch": 103.37, + "learning_rate": 0.00010975438596491227, + "loss": 0.0016, + "step": 6926 + }, + { + "epoch": 103.39, + "learning_rate": 0.00010971929824561403, + "loss": 0.0005, + "step": 6927 + }, + { + "epoch": 103.4, + "learning_rate": 0.00010968421052631577, + "loss": 0.0005, + "step": 6928 + }, + { + "epoch": 103.42, + "learning_rate": 0.00010964912280701753, + "loss": 0.0724, + "step": 6929 + }, + { + "epoch": 103.43, + "learning_rate": 0.00010961403508771928, + "loss": 0.0007, + "step": 6930 + }, + { + "epoch": 103.45, + "learning_rate": 0.00010957894736842104, + "loss": 0.0006, + "step": 6931 + }, + { + "epoch": 103.46, + "learning_rate": 0.00010954385964912279, + "loss": 0.0149, + "step": 6932 + }, + { + "epoch": 103.48, + "learning_rate": 0.00010950877192982455, + "loss": 0.0024, + "step": 6933 + }, + { + "epoch": 103.49, + "learning_rate": 0.00010947368421052632, + "loss": 0.1264, + "step": 6934 + }, + { + "epoch": 103.51, + "learning_rate": 0.00010943859649122805, + "loss": 0.0011, + "step": 6935 + }, + { + "epoch": 103.52, + "learning_rate": 0.00010940350877192981, + "loss": 0.0013, + "step": 6936 + }, + { + "epoch": 103.54, + "learning_rate": 0.00010936842105263156, + "loss": 0.0004, + "step": 6937 + }, + { + "epoch": 103.55, + "learning_rate": 0.00010933333333333333, + "loss": 0.1629, + "step": 6938 + }, + { + "epoch": 103.57, + "learning_rate": 0.00010929824561403507, + "loss": 0.0004, + "step": 6939 + }, + { + "epoch": 103.58, + "learning_rate": 0.00010926315789473684, + "loss": 0.0007, + "step": 6940 + }, + { + "epoch": 103.59, + "learning_rate": 0.00010922807017543859, + "loss": 0.0009, + "step": 6941 + }, + { + "epoch": 103.61, + "learning_rate": 0.00010919298245614035, + "loss": 0.0004, + "step": 6942 + }, + { + "epoch": 103.62, + "learning_rate": 0.00010915789473684208, + "loss": 0.0004, + "step": 6943 + }, + { + "epoch": 103.64, + "learning_rate": 0.00010912280701754385, + "loss": 0.0004, + "step": 6944 + }, + { + "epoch": 103.65, + "learning_rate": 0.00010908771929824561, + "loss": 0.0004, + "step": 6945 + }, + { + "epoch": 103.67, + "learning_rate": 0.00010905263157894736, + "loss": 0.0215, + "step": 6946 + }, + { + "epoch": 103.68, + "learning_rate": 0.00010901754385964912, + "loss": 0.0005, + "step": 6947 + }, + { + "epoch": 103.7, + "learning_rate": 0.00010898245614035087, + "loss": 0.0006, + "step": 6948 + }, + { + "epoch": 103.71, + "learning_rate": 0.00010894736842105263, + "loss": 0.0005, + "step": 6949 + }, + { + "epoch": 103.73, + "learning_rate": 0.00010891228070175438, + "loss": 0.0006, + "step": 6950 + }, + { + "epoch": 103.74, + "learning_rate": 0.00010887719298245614, + "loss": 0.0006, + "step": 6951 + }, + { + "epoch": 103.76, + "learning_rate": 0.00010884210526315788, + "loss": 0.0238, + "step": 6952 + }, + { + "epoch": 103.77, + "learning_rate": 0.00010880701754385964, + "loss": 0.0102, + "step": 6953 + }, + { + "epoch": 103.79, + "learning_rate": 0.00010877192982456139, + "loss": 0.0005, + "step": 6954 + }, + { + "epoch": 103.8, + "learning_rate": 0.00010873684210526315, + "loss": 0.0006, + "step": 6955 + }, + { + "epoch": 103.82, + "learning_rate": 0.0001087017543859649, + "loss": 0.0011, + "step": 6956 + }, + { + "epoch": 103.83, + "learning_rate": 0.00010866666666666666, + "loss": 0.0007, + "step": 6957 + }, + { + "epoch": 103.85, + "learning_rate": 0.00010863157894736842, + "loss": 0.0612, + "step": 6958 + }, + { + "epoch": 103.86, + "learning_rate": 0.00010859649122807016, + "loss": 0.0005, + "step": 6959 + }, + { + "epoch": 103.88, + "learning_rate": 0.00010856140350877192, + "loss": 0.0006, + "step": 6960 + }, + { + "epoch": 103.89, + "learning_rate": 0.00010852631578947367, + "loss": 0.0005, + "step": 6961 + }, + { + "epoch": 103.91, + "learning_rate": 0.00010849122807017543, + "loss": 0.0165, + "step": 6962 + }, + { + "epoch": 103.92, + "learning_rate": 0.00010845614035087718, + "loss": 0.0005, + "step": 6963 + }, + { + "epoch": 103.94, + "learning_rate": 0.00010842105263157894, + "loss": 0.0006, + "step": 6964 + }, + { + "epoch": 103.95, + "learning_rate": 0.00010838596491228069, + "loss": 0.0004, + "step": 6965 + }, + { + "epoch": 103.97, + "learning_rate": 0.00010835087719298245, + "loss": 0.0009, + "step": 6966 + }, + { + "epoch": 103.98, + "learning_rate": 0.00010831578947368419, + "loss": 0.0008, + "step": 6967 + }, + { + "epoch": 104.0, + "learning_rate": 0.00010828070175438595, + "loss": 0.0005, + "step": 6968 + }, + { + "epoch": 104.01, + "learning_rate": 0.0001082456140350877, + "loss": 0.0032, + "step": 6969 + }, + { + "epoch": 104.03, + "learning_rate": 0.00010821052631578946, + "loss": 0.0005, + "step": 6970 + }, + { + "epoch": 104.04, + "learning_rate": 0.00010817543859649122, + "loss": 0.0006, + "step": 6971 + }, + { + "epoch": 104.06, + "learning_rate": 0.00010814035087719297, + "loss": 0.0052, + "step": 6972 + }, + { + "epoch": 104.07, + "learning_rate": 0.00010810526315789474, + "loss": 0.0004, + "step": 6973 + }, + { + "epoch": 104.09, + "learning_rate": 0.00010807017543859648, + "loss": 0.0004, + "step": 6974 + }, + { + "epoch": 104.1, + "learning_rate": 0.00010803508771929825, + "loss": 0.0005, + "step": 6975 + }, + { + "epoch": 104.12, + "learning_rate": 0.00010799999999999998, + "loss": 0.0102, + "step": 6976 + }, + { + "epoch": 104.13, + "learning_rate": 0.00010796491228070174, + "loss": 0.0004, + "step": 6977 + }, + { + "epoch": 104.15, + "learning_rate": 0.0001079298245614035, + "loss": 0.0003, + "step": 6978 + }, + { + "epoch": 104.16, + "learning_rate": 0.00010789473684210526, + "loss": 0.0009, + "step": 6979 + }, + { + "epoch": 104.18, + "learning_rate": 0.000107859649122807, + "loss": 0.0116, + "step": 6980 + }, + { + "epoch": 104.19, + "learning_rate": 0.00010782456140350877, + "loss": 0.0003, + "step": 6981 + }, + { + "epoch": 104.21, + "learning_rate": 0.0001077894736842105, + "loss": 0.0005, + "step": 6982 + }, + { + "epoch": 104.22, + "learning_rate": 0.00010775438596491226, + "loss": 0.0074, + "step": 6983 + }, + { + "epoch": 104.24, + "learning_rate": 0.00010771929824561403, + "loss": 0.0005, + "step": 6984 + }, + { + "epoch": 104.25, + "learning_rate": 0.00010768421052631578, + "loss": 0.0004, + "step": 6985 + }, + { + "epoch": 104.27, + "learning_rate": 0.00010764912280701754, + "loss": 0.0471, + "step": 6986 + }, + { + "epoch": 104.28, + "learning_rate": 0.00010761403508771929, + "loss": 0.0005, + "step": 6987 + }, + { + "epoch": 104.3, + "learning_rate": 0.00010757894736842105, + "loss": 0.0229, + "step": 6988 + }, + { + "epoch": 104.31, + "learning_rate": 0.0001075438596491228, + "loss": 0.0003, + "step": 6989 + }, + { + "epoch": 104.33, + "learning_rate": 0.00010750877192982456, + "loss": 0.0005, + "step": 6990 + }, + { + "epoch": 104.34, + "learning_rate": 0.0001074736842105263, + "loss": 0.0004, + "step": 6991 + }, + { + "epoch": 104.36, + "learning_rate": 0.00010743859649122806, + "loss": 0.0005, + "step": 6992 + }, + { + "epoch": 104.37, + "learning_rate": 0.0001074035087719298, + "loss": 0.1992, + "step": 6993 + }, + { + "epoch": 104.39, + "learning_rate": 0.00010736842105263157, + "loss": 0.0003, + "step": 6994 + }, + { + "epoch": 104.4, + "learning_rate": 0.00010733333333333332, + "loss": 0.0005, + "step": 6995 + }, + { + "epoch": 104.42, + "learning_rate": 0.00010729824561403508, + "loss": 0.0018, + "step": 6996 + }, + { + "epoch": 104.43, + "learning_rate": 0.00010726315789473684, + "loss": 0.0399, + "step": 6997 + }, + { + "epoch": 104.45, + "learning_rate": 0.00010722807017543859, + "loss": 0.0004, + "step": 6998 + }, + { + "epoch": 104.46, + "learning_rate": 0.00010719298245614035, + "loss": 0.1261, + "step": 6999 + }, + { + "epoch": 104.48, + "learning_rate": 0.00010715789473684209, + "loss": 0.01, + "step": 7000 + }, + { + "epoch": 104.48, + "eval_accuracy": 0.8673519334312286, + "eval_f1": 0.866532551783554, + "eval_loss": 0.7149230241775513, + "eval_runtime": 346.4155, + "eval_samples_per_second": 11.795, + "eval_steps_per_second": 0.739, + "step": 7000 + }, + { + "epoch": 104.49, + "learning_rate": 0.00010712280701754385, + "loss": 0.0006, + "step": 7001 + }, + { + "epoch": 104.51, + "learning_rate": 0.0001070877192982456, + "loss": 0.0004, + "step": 7002 + }, + { + "epoch": 104.52, + "learning_rate": 0.00010705263157894736, + "loss": 0.0005, + "step": 7003 + }, + { + "epoch": 104.54, + "learning_rate": 0.00010701754385964911, + "loss": 0.0046, + "step": 7004 + }, + { + "epoch": 104.55, + "learning_rate": 0.00010698245614035087, + "loss": 0.0004, + "step": 7005 + }, + { + "epoch": 104.57, + "learning_rate": 0.00010694736842105261, + "loss": 0.0004, + "step": 7006 + }, + { + "epoch": 104.58, + "learning_rate": 0.00010691228070175437, + "loss": 0.0004, + "step": 7007 + }, + { + "epoch": 104.59, + "learning_rate": 0.00010687719298245613, + "loss": 0.0015, + "step": 7008 + }, + { + "epoch": 104.61, + "learning_rate": 0.00010684210526315788, + "loss": 0.0003, + "step": 7009 + }, + { + "epoch": 104.62, + "learning_rate": 0.00010680701754385964, + "loss": 0.0004, + "step": 7010 + }, + { + "epoch": 104.64, + "learning_rate": 0.00010677192982456139, + "loss": 0.0011, + "step": 7011 + }, + { + "epoch": 104.65, + "learning_rate": 0.00010673684210526316, + "loss": 0.0004, + "step": 7012 + }, + { + "epoch": 104.67, + "learning_rate": 0.0001067017543859649, + "loss": 0.0004, + "step": 7013 + }, + { + "epoch": 104.68, + "learning_rate": 0.00010666666666666667, + "loss": 0.0218, + "step": 7014 + }, + { + "epoch": 104.7, + "learning_rate": 0.0001066315789473684, + "loss": 0.0021, + "step": 7015 + }, + { + "epoch": 104.71, + "learning_rate": 0.00010659649122807016, + "loss": 0.0008, + "step": 7016 + }, + { + "epoch": 104.73, + "learning_rate": 0.00010656140350877191, + "loss": 0.0005, + "step": 7017 + }, + { + "epoch": 104.74, + "learning_rate": 0.00010652631578947368, + "loss": 0.0005, + "step": 7018 + }, + { + "epoch": 104.76, + "learning_rate": 0.00010649122807017542, + "loss": 0.0005, + "step": 7019 + }, + { + "epoch": 104.77, + "learning_rate": 0.00010645614035087719, + "loss": 0.0004, + "step": 7020 + }, + { + "epoch": 104.79, + "learning_rate": 0.00010642105263157895, + "loss": 0.0005, + "step": 7021 + }, + { + "epoch": 104.8, + "learning_rate": 0.0001063859649122807, + "loss": 0.0157, + "step": 7022 + }, + { + "epoch": 104.82, + "learning_rate": 0.00010635087719298246, + "loss": 0.0004, + "step": 7023 + }, + { + "epoch": 104.83, + "learning_rate": 0.0001063157894736842, + "loss": 0.0014, + "step": 7024 + }, + { + "epoch": 104.85, + "learning_rate": 0.00010628070175438596, + "loss": 0.0004, + "step": 7025 + }, + { + "epoch": 104.86, + "learning_rate": 0.0001062456140350877, + "loss": 0.0005, + "step": 7026 + }, + { + "epoch": 104.88, + "learning_rate": 0.00010621052631578947, + "loss": 0.0149, + "step": 7027 + }, + { + "epoch": 104.89, + "learning_rate": 0.00010617543859649122, + "loss": 0.062, + "step": 7028 + }, + { + "epoch": 104.91, + "learning_rate": 0.00010614035087719298, + "loss": 0.0003, + "step": 7029 + }, + { + "epoch": 104.92, + "learning_rate": 0.00010610526315789471, + "loss": 0.0006, + "step": 7030 + }, + { + "epoch": 104.94, + "learning_rate": 0.00010607017543859648, + "loss": 0.0006, + "step": 7031 + }, + { + "epoch": 104.95, + "learning_rate": 0.00010603508771929823, + "loss": 0.0004, + "step": 7032 + }, + { + "epoch": 104.97, + "learning_rate": 0.00010599999999999999, + "loss": 0.3033, + "step": 7033 + }, + { + "epoch": 104.98, + "learning_rate": 0.00010596491228070175, + "loss": 0.0003, + "step": 7034 + }, + { + "epoch": 105.0, + "learning_rate": 0.0001059298245614035, + "loss": 0.1008, + "step": 7035 + }, + { + "epoch": 105.01, + "learning_rate": 0.00010589473684210526, + "loss": 0.0013, + "step": 7036 + }, + { + "epoch": 105.03, + "learning_rate": 0.00010585964912280701, + "loss": 0.0004, + "step": 7037 + }, + { + "epoch": 105.04, + "learning_rate": 0.00010582456140350877, + "loss": 0.0014, + "step": 7038 + }, + { + "epoch": 105.06, + "learning_rate": 0.00010578947368421051, + "loss": 0.0006, + "step": 7039 + }, + { + "epoch": 105.07, + "learning_rate": 0.00010575438596491227, + "loss": 0.0003, + "step": 7040 + }, + { + "epoch": 105.09, + "learning_rate": 0.00010571929824561402, + "loss": 0.0016, + "step": 7041 + }, + { + "epoch": 105.1, + "learning_rate": 0.00010568421052631578, + "loss": 0.0015, + "step": 7042 + }, + { + "epoch": 105.12, + "learning_rate": 0.00010564912280701753, + "loss": 0.0004, + "step": 7043 + }, + { + "epoch": 105.13, + "learning_rate": 0.00010561403508771929, + "loss": 0.0005, + "step": 7044 + }, + { + "epoch": 105.15, + "learning_rate": 0.00010557894736842104, + "loss": 0.0004, + "step": 7045 + }, + { + "epoch": 105.16, + "learning_rate": 0.0001055438596491228, + "loss": 0.0015, + "step": 7046 + }, + { + "epoch": 105.18, + "learning_rate": 0.00010550877192982457, + "loss": 0.0004, + "step": 7047 + }, + { + "epoch": 105.19, + "learning_rate": 0.0001054736842105263, + "loss": 0.0322, + "step": 7048 + }, + { + "epoch": 105.21, + "learning_rate": 0.00010543859649122806, + "loss": 0.0088, + "step": 7049 + }, + { + "epoch": 105.22, + "learning_rate": 0.00010540350877192981, + "loss": 0.0003, + "step": 7050 + }, + { + "epoch": 105.24, + "learning_rate": 0.00010536842105263157, + "loss": 0.0005, + "step": 7051 + }, + { + "epoch": 105.25, + "learning_rate": 0.00010533333333333332, + "loss": 0.0082, + "step": 7052 + }, + { + "epoch": 105.27, + "learning_rate": 0.00010529824561403509, + "loss": 0.0004, + "step": 7053 + }, + { + "epoch": 105.28, + "learning_rate": 0.00010526315789473682, + "loss": 0.0005, + "step": 7054 + }, + { + "epoch": 105.3, + "learning_rate": 0.00010522807017543858, + "loss": 0.0004, + "step": 7055 + }, + { + "epoch": 105.31, + "learning_rate": 0.00010519298245614033, + "loss": 0.2876, + "step": 7056 + }, + { + "epoch": 105.33, + "learning_rate": 0.0001051578947368421, + "loss": 0.0004, + "step": 7057 + }, + { + "epoch": 105.34, + "learning_rate": 0.00010512280701754386, + "loss": 0.0005, + "step": 7058 + }, + { + "epoch": 105.36, + "learning_rate": 0.0001050877192982456, + "loss": 0.0005, + "step": 7059 + }, + { + "epoch": 105.37, + "learning_rate": 0.00010505263157894737, + "loss": 0.0408, + "step": 7060 + }, + { + "epoch": 105.39, + "learning_rate": 0.00010501754385964912, + "loss": 0.0003, + "step": 7061 + }, + { + "epoch": 105.4, + "learning_rate": 0.00010498245614035088, + "loss": 0.0004, + "step": 7062 + }, + { + "epoch": 105.42, + "learning_rate": 0.00010494736842105261, + "loss": 0.0004, + "step": 7063 + }, + { + "epoch": 105.43, + "learning_rate": 0.00010491228070175438, + "loss": 0.0342, + "step": 7064 + }, + { + "epoch": 105.45, + "learning_rate": 0.00010487719298245613, + "loss": 0.0008, + "step": 7065 + }, + { + "epoch": 105.46, + "learning_rate": 0.00010484210526315789, + "loss": 0.0004, + "step": 7066 + }, + { + "epoch": 105.48, + "learning_rate": 0.00010480701754385964, + "loss": 0.0004, + "step": 7067 + }, + { + "epoch": 105.49, + "learning_rate": 0.0001047719298245614, + "loss": 0.0004, + "step": 7068 + }, + { + "epoch": 105.51, + "learning_rate": 0.00010473684210526315, + "loss": 0.0005, + "step": 7069 + }, + { + "epoch": 105.52, + "learning_rate": 0.00010470175438596491, + "loss": 0.044, + "step": 7070 + }, + { + "epoch": 105.54, + "learning_rate": 0.00010466666666666667, + "loss": 0.0004, + "step": 7071 + }, + { + "epoch": 105.55, + "learning_rate": 0.00010463157894736841, + "loss": 0.0051, + "step": 7072 + }, + { + "epoch": 105.57, + "learning_rate": 0.00010459649122807017, + "loss": 0.0007, + "step": 7073 + }, + { + "epoch": 105.58, + "learning_rate": 0.00010456140350877192, + "loss": 0.0009, + "step": 7074 + }, + { + "epoch": 105.59, + "learning_rate": 0.00010452631578947368, + "loss": 0.0004, + "step": 7075 + }, + { + "epoch": 105.61, + "learning_rate": 0.00010449122807017543, + "loss": 0.0009, + "step": 7076 + }, + { + "epoch": 105.62, + "learning_rate": 0.00010445614035087719, + "loss": 0.0007, + "step": 7077 + }, + { + "epoch": 105.64, + "learning_rate": 0.00010442105263157893, + "loss": 0.0501, + "step": 7078 + }, + { + "epoch": 105.65, + "learning_rate": 0.00010438596491228069, + "loss": 0.1193, + "step": 7079 + }, + { + "epoch": 105.67, + "learning_rate": 0.00010435087719298244, + "loss": 0.0006, + "step": 7080 + }, + { + "epoch": 105.68, + "learning_rate": 0.0001043157894736842, + "loss": 0.0005, + "step": 7081 + }, + { + "epoch": 105.7, + "learning_rate": 0.00010428070175438595, + "loss": 0.0393, + "step": 7082 + }, + { + "epoch": 105.71, + "learning_rate": 0.00010424561403508771, + "loss": 0.0015, + "step": 7083 + }, + { + "epoch": 105.73, + "learning_rate": 0.00010421052631578947, + "loss": 0.0004, + "step": 7084 + }, + { + "epoch": 105.74, + "learning_rate": 0.00010417543859649122, + "loss": 0.0006, + "step": 7085 + }, + { + "epoch": 105.76, + "learning_rate": 0.00010414035087719298, + "loss": 0.0011, + "step": 7086 + }, + { + "epoch": 105.77, + "learning_rate": 0.00010410526315789472, + "loss": 0.0026, + "step": 7087 + }, + { + "epoch": 105.79, + "learning_rate": 0.00010407017543859648, + "loss": 0.0008, + "step": 7088 + }, + { + "epoch": 105.8, + "learning_rate": 0.00010403508771929823, + "loss": 0.0011, + "step": 7089 + }, + { + "epoch": 105.82, + "learning_rate": 0.000104, + "loss": 0.0729, + "step": 7090 + }, + { + "epoch": 105.83, + "learning_rate": 0.00010396491228070174, + "loss": 0.0147, + "step": 7091 + }, + { + "epoch": 105.85, + "learning_rate": 0.0001039298245614035, + "loss": 0.0004, + "step": 7092 + }, + { + "epoch": 105.86, + "learning_rate": 0.00010389473684210525, + "loss": 0.0007, + "step": 7093 + }, + { + "epoch": 105.88, + "learning_rate": 0.00010385964912280702, + "loss": 0.0004, + "step": 7094 + }, + { + "epoch": 105.89, + "learning_rate": 0.00010382456140350875, + "loss": 0.0019, + "step": 7095 + }, + { + "epoch": 105.91, + "learning_rate": 0.00010378947368421051, + "loss": 0.0011, + "step": 7096 + }, + { + "epoch": 105.92, + "learning_rate": 0.00010375438596491228, + "loss": 0.0003, + "step": 7097 + }, + { + "epoch": 105.94, + "learning_rate": 0.00010371929824561402, + "loss": 0.0016, + "step": 7098 + }, + { + "epoch": 105.95, + "learning_rate": 0.00010368421052631579, + "loss": 0.313, + "step": 7099 + }, + { + "epoch": 105.97, + "learning_rate": 0.00010364912280701754, + "loss": 0.0122, + "step": 7100 + }, + { + "epoch": 105.98, + "learning_rate": 0.0001036140350877193, + "loss": 0.0004, + "step": 7101 + }, + { + "epoch": 106.0, + "learning_rate": 0.00010357894736842103, + "loss": 0.0011, + "step": 7102 + }, + { + "epoch": 106.01, + "learning_rate": 0.0001035438596491228, + "loss": 0.0025, + "step": 7103 + }, + { + "epoch": 106.03, + "learning_rate": 0.00010350877192982454, + "loss": 0.0007, + "step": 7104 + }, + { + "epoch": 106.04, + "learning_rate": 0.0001034736842105263, + "loss": 0.0091, + "step": 7105 + }, + { + "epoch": 106.06, + "learning_rate": 0.00010343859649122806, + "loss": 0.0014, + "step": 7106 + }, + { + "epoch": 106.07, + "learning_rate": 0.00010340350877192982, + "loss": 0.0006, + "step": 7107 + }, + { + "epoch": 106.09, + "learning_rate": 0.00010336842105263158, + "loss": 0.0014, + "step": 7108 + }, + { + "epoch": 106.1, + "learning_rate": 0.00010333333333333333, + "loss": 0.0007, + "step": 7109 + }, + { + "epoch": 106.12, + "learning_rate": 0.00010329824561403509, + "loss": 0.0005, + "step": 7110 + }, + { + "epoch": 106.13, + "learning_rate": 0.00010326315789473683, + "loss": 0.0054, + "step": 7111 + }, + { + "epoch": 106.15, + "learning_rate": 0.00010322807017543859, + "loss": 0.0003, + "step": 7112 + }, + { + "epoch": 106.16, + "learning_rate": 0.00010319298245614034, + "loss": 0.0121, + "step": 7113 + }, + { + "epoch": 106.18, + "learning_rate": 0.0001031578947368421, + "loss": 0.0257, + "step": 7114 + }, + { + "epoch": 106.19, + "learning_rate": 0.00010312280701754385, + "loss": 0.0006, + "step": 7115 + }, + { + "epoch": 106.21, + "learning_rate": 0.00010308771929824561, + "loss": 0.0004, + "step": 7116 + }, + { + "epoch": 106.22, + "learning_rate": 0.00010305263157894736, + "loss": 0.0145, + "step": 7117 + }, + { + "epoch": 106.24, + "learning_rate": 0.00010301754385964912, + "loss": 0.0004, + "step": 7118 + }, + { + "epoch": 106.25, + "learning_rate": 0.00010298245614035086, + "loss": 0.0669, + "step": 7119 + }, + { + "epoch": 106.27, + "learning_rate": 0.00010294736842105262, + "loss": 0.0009, + "step": 7120 + }, + { + "epoch": 106.28, + "learning_rate": 0.00010291228070175438, + "loss": 0.0004, + "step": 7121 + }, + { + "epoch": 106.3, + "learning_rate": 0.00010287719298245613, + "loss": 0.0005, + "step": 7122 + }, + { + "epoch": 106.31, + "learning_rate": 0.00010284210526315789, + "loss": 0.0005, + "step": 7123 + }, + { + "epoch": 106.33, + "learning_rate": 0.00010280701754385964, + "loss": 0.0003, + "step": 7124 + }, + { + "epoch": 106.34, + "learning_rate": 0.0001027719298245614, + "loss": 0.0004, + "step": 7125 + }, + { + "epoch": 106.36, + "learning_rate": 0.00010273684210526314, + "loss": 0.0439, + "step": 7126 + }, + { + "epoch": 106.37, + "learning_rate": 0.0001027017543859649, + "loss": 0.0006, + "step": 7127 + }, + { + "epoch": 106.39, + "learning_rate": 0.00010266666666666665, + "loss": 0.1396, + "step": 7128 + }, + { + "epoch": 106.4, + "learning_rate": 0.00010263157894736841, + "loss": 0.0007, + "step": 7129 + }, + { + "epoch": 106.42, + "learning_rate": 0.00010259649122807016, + "loss": 0.0177, + "step": 7130 + }, + { + "epoch": 106.43, + "learning_rate": 0.00010256140350877192, + "loss": 0.0004, + "step": 7131 + }, + { + "epoch": 106.45, + "learning_rate": 0.00010252631578947367, + "loss": 0.0007, + "step": 7132 + }, + { + "epoch": 106.46, + "learning_rate": 0.00010249122807017543, + "loss": 0.0005, + "step": 7133 + }, + { + "epoch": 106.48, + "learning_rate": 0.0001024561403508772, + "loss": 0.0004, + "step": 7134 + }, + { + "epoch": 106.49, + "learning_rate": 0.00010242105263157893, + "loss": 0.1185, + "step": 7135 + }, + { + "epoch": 106.51, + "learning_rate": 0.0001023859649122807, + "loss": 0.0004, + "step": 7136 + }, + { + "epoch": 106.52, + "learning_rate": 0.00010235087719298244, + "loss": 0.0011, + "step": 7137 + }, + { + "epoch": 106.54, + "learning_rate": 0.0001023157894736842, + "loss": 0.0003, + "step": 7138 + }, + { + "epoch": 106.55, + "learning_rate": 0.00010228070175438595, + "loss": 0.0006, + "step": 7139 + }, + { + "epoch": 106.57, + "learning_rate": 0.00010224561403508772, + "loss": 0.0004, + "step": 7140 + }, + { + "epoch": 106.58, + "learning_rate": 0.00010221052631578947, + "loss": 0.0005, + "step": 7141 + }, + { + "epoch": 106.59, + "learning_rate": 0.00010217543859649123, + "loss": 0.0237, + "step": 7142 + }, + { + "epoch": 106.61, + "learning_rate": 0.00010214035087719296, + "loss": 0.0056, + "step": 7143 + }, + { + "epoch": 106.62, + "learning_rate": 0.00010210526315789473, + "loss": 0.0004, + "step": 7144 + }, + { + "epoch": 106.64, + "learning_rate": 0.00010207017543859647, + "loss": 0.0005, + "step": 7145 + }, + { + "epoch": 106.65, + "learning_rate": 0.00010203508771929824, + "loss": 0.0004, + "step": 7146 + }, + { + "epoch": 106.67, + "learning_rate": 0.000102, + "loss": 0.0004, + "step": 7147 + }, + { + "epoch": 106.68, + "learning_rate": 0.00010196491228070175, + "loss": 0.0003, + "step": 7148 + }, + { + "epoch": 106.7, + "learning_rate": 0.00010192982456140351, + "loss": 0.1856, + "step": 7149 + }, + { + "epoch": 106.71, + "learning_rate": 0.00010189473684210525, + "loss": 0.0004, + "step": 7150 + }, + { + "epoch": 106.73, + "learning_rate": 0.00010185964912280701, + "loss": 0.0004, + "step": 7151 + }, + { + "epoch": 106.74, + "learning_rate": 0.00010182456140350876, + "loss": 0.0004, + "step": 7152 + }, + { + "epoch": 106.76, + "learning_rate": 0.00010178947368421052, + "loss": 0.0005, + "step": 7153 + }, + { + "epoch": 106.77, + "learning_rate": 0.00010175438596491227, + "loss": 0.0337, + "step": 7154 + }, + { + "epoch": 106.79, + "learning_rate": 0.00010171929824561403, + "loss": 0.0004, + "step": 7155 + }, + { + "epoch": 106.8, + "learning_rate": 0.00010168421052631578, + "loss": 0.0031, + "step": 7156 + }, + { + "epoch": 106.82, + "learning_rate": 0.00010164912280701754, + "loss": 0.0008, + "step": 7157 + }, + { + "epoch": 106.83, + "learning_rate": 0.00010161403508771928, + "loss": 0.0005, + "step": 7158 + }, + { + "epoch": 106.85, + "learning_rate": 0.00010157894736842104, + "loss": 0.0005, + "step": 7159 + }, + { + "epoch": 106.86, + "learning_rate": 0.0001015438596491228, + "loss": 0.0004, + "step": 7160 + }, + { + "epoch": 106.88, + "learning_rate": 0.00010150877192982455, + "loss": 0.0003, + "step": 7161 + }, + { + "epoch": 106.89, + "learning_rate": 0.00010147368421052631, + "loss": 0.0009, + "step": 7162 + }, + { + "epoch": 106.91, + "learning_rate": 0.00010143859649122806, + "loss": 0.0054, + "step": 7163 + }, + { + "epoch": 106.92, + "learning_rate": 0.00010140350877192982, + "loss": 0.0003, + "step": 7164 + }, + { + "epoch": 106.94, + "learning_rate": 0.00010136842105263157, + "loss": 0.0005, + "step": 7165 + }, + { + "epoch": 106.95, + "learning_rate": 0.00010133333333333332, + "loss": 0.0004, + "step": 7166 + }, + { + "epoch": 106.97, + "learning_rate": 0.00010129824561403507, + "loss": 0.0015, + "step": 7167 + }, + { + "epoch": 106.98, + "learning_rate": 0.00010126315789473683, + "loss": 0.0294, + "step": 7168 + }, + { + "epoch": 107.0, + "learning_rate": 0.00010122807017543858, + "loss": 0.0004, + "step": 7169 + }, + { + "epoch": 107.01, + "learning_rate": 0.00010119298245614034, + "loss": 0.0877, + "step": 7170 + }, + { + "epoch": 107.03, + "learning_rate": 0.0001011578947368421, + "loss": 0.0004, + "step": 7171 + }, + { + "epoch": 107.04, + "learning_rate": 0.00010112280701754385, + "loss": 0.0005, + "step": 7172 + }, + { + "epoch": 107.06, + "learning_rate": 0.00010108771929824562, + "loss": 0.0004, + "step": 7173 + }, + { + "epoch": 107.07, + "learning_rate": 0.00010105263157894735, + "loss": 0.0003, + "step": 7174 + }, + { + "epoch": 107.09, + "learning_rate": 0.00010101754385964911, + "loss": 0.0008, + "step": 7175 + }, + { + "epoch": 107.1, + "learning_rate": 0.00010098245614035086, + "loss": 0.0104, + "step": 7176 + }, + { + "epoch": 107.12, + "learning_rate": 0.00010094736842105263, + "loss": 0.0695, + "step": 7177 + }, + { + "epoch": 107.13, + "learning_rate": 0.00010091228070175437, + "loss": 0.0003, + "step": 7178 + }, + { + "epoch": 107.15, + "learning_rate": 0.00010087719298245614, + "loss": 0.1677, + "step": 7179 + }, + { + "epoch": 107.16, + "learning_rate": 0.00010084210526315789, + "loss": 0.1828, + "step": 7180 + }, + { + "epoch": 107.18, + "learning_rate": 0.00010080701754385965, + "loss": 0.0051, + "step": 7181 + }, + { + "epoch": 107.19, + "learning_rate": 0.00010077192982456138, + "loss": 0.0006, + "step": 7182 + }, + { + "epoch": 107.21, + "learning_rate": 0.00010073684210526314, + "loss": 0.0007, + "step": 7183 + }, + { + "epoch": 107.22, + "learning_rate": 0.00010070175438596491, + "loss": 0.0192, + "step": 7184 + }, + { + "epoch": 107.24, + "learning_rate": 0.00010066666666666666, + "loss": 0.0003, + "step": 7185 + }, + { + "epoch": 107.25, + "learning_rate": 0.00010063157894736842, + "loss": 0.0004, + "step": 7186 + }, + { + "epoch": 107.27, + "learning_rate": 0.00010059649122807017, + "loss": 0.1569, + "step": 7187 + }, + { + "epoch": 107.28, + "learning_rate": 0.00010056140350877193, + "loss": 0.0006, + "step": 7188 + }, + { + "epoch": 107.3, + "learning_rate": 0.00010052631578947368, + "loss": 0.193, + "step": 7189 + }, + { + "epoch": 107.31, + "learning_rate": 0.00010049122807017543, + "loss": 0.1177, + "step": 7190 + }, + { + "epoch": 107.33, + "learning_rate": 0.00010045614035087718, + "loss": 0.0047, + "step": 7191 + }, + { + "epoch": 107.34, + "learning_rate": 0.00010042105263157894, + "loss": 0.0005, + "step": 7192 + }, + { + "epoch": 107.36, + "learning_rate": 0.00010038596491228069, + "loss": 0.0005, + "step": 7193 + }, + { + "epoch": 107.37, + "learning_rate": 0.00010035087719298245, + "loss": 0.0005, + "step": 7194 + }, + { + "epoch": 107.39, + "learning_rate": 0.0001003157894736842, + "loss": 0.0082, + "step": 7195 + }, + { + "epoch": 107.4, + "learning_rate": 0.00010028070175438596, + "loss": 0.084, + "step": 7196 + }, + { + "epoch": 107.42, + "learning_rate": 0.00010024561403508772, + "loss": 0.0384, + "step": 7197 + }, + { + "epoch": 107.43, + "learning_rate": 0.00010021052631578946, + "loss": 0.0004, + "step": 7198 + }, + { + "epoch": 107.45, + "learning_rate": 0.00010017543859649122, + "loss": 0.2362, + "step": 7199 + }, + { + "epoch": 107.46, + "learning_rate": 0.00010014035087719297, + "loss": 0.0135, + "step": 7200 + }, + { + "epoch": 107.46, + "eval_accuracy": 0.8487518355359766, + "eval_f1": 0.8522686889090442, + "eval_loss": 0.8989996314048767, + "eval_runtime": 344.41, + "eval_samples_per_second": 11.864, + "eval_steps_per_second": 0.743, + "step": 7200 + }, + { + "epoch": 107.48, + "learning_rate": 0.00010010526315789473, + "loss": 0.0007, + "step": 7201 + }, + { + "epoch": 107.49, + "learning_rate": 0.00010007017543859648, + "loss": 0.0007, + "step": 7202 + }, + { + "epoch": 107.51, + "learning_rate": 0.00010003508771929824, + "loss": 0.0039, + "step": 7203 + }, + { + "epoch": 107.52, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0029, + "step": 7204 + }, + { + "epoch": 107.54, + "learning_rate": 9.996491228070175e-05, + "loss": 0.0005, + "step": 7205 + }, + { + "epoch": 107.55, + "learning_rate": 9.992982456140349e-05, + "loss": 0.0929, + "step": 7206 + }, + { + "epoch": 107.57, + "learning_rate": 9.989473684210525e-05, + "loss": 0.0011, + "step": 7207 + }, + { + "epoch": 107.58, + "learning_rate": 9.9859649122807e-05, + "loss": 0.0015, + "step": 7208 + }, + { + "epoch": 107.59, + "learning_rate": 9.982456140350876e-05, + "loss": 0.1055, + "step": 7209 + }, + { + "epoch": 107.61, + "learning_rate": 9.978947368421052e-05, + "loss": 0.0126, + "step": 7210 + }, + { + "epoch": 107.62, + "learning_rate": 9.975438596491227e-05, + "loss": 0.0004, + "step": 7211 + }, + { + "epoch": 107.64, + "learning_rate": 9.971929824561404e-05, + "loss": 0.0492, + "step": 7212 + }, + { + "epoch": 107.65, + "learning_rate": 9.968421052631577e-05, + "loss": 0.0005, + "step": 7213 + }, + { + "epoch": 107.67, + "learning_rate": 9.964912280701753e-05, + "loss": 0.0005, + "step": 7214 + }, + { + "epoch": 107.68, + "learning_rate": 9.961403508771928e-05, + "loss": 0.0004, + "step": 7215 + }, + { + "epoch": 107.7, + "learning_rate": 9.957894736842104e-05, + "loss": 0.0005, + "step": 7216 + }, + { + "epoch": 107.71, + "learning_rate": 9.954385964912279e-05, + "loss": 0.1363, + "step": 7217 + }, + { + "epoch": 107.73, + "learning_rate": 9.950877192982456e-05, + "loss": 0.0007, + "step": 7218 + }, + { + "epoch": 107.74, + "learning_rate": 9.94736842105263e-05, + "loss": 0.0005, + "step": 7219 + }, + { + "epoch": 107.76, + "learning_rate": 9.943859649122807e-05, + "loss": 0.0035, + "step": 7220 + }, + { + "epoch": 107.77, + "learning_rate": 9.940350877192983e-05, + "loss": 0.0008, + "step": 7221 + }, + { + "epoch": 107.79, + "learning_rate": 9.936842105263156e-05, + "loss": 0.0004, + "step": 7222 + }, + { + "epoch": 107.8, + "learning_rate": 9.933333333333333e-05, + "loss": 0.0004, + "step": 7223 + }, + { + "epoch": 107.82, + "learning_rate": 9.929824561403508e-05, + "loss": 0.2155, + "step": 7224 + }, + { + "epoch": 107.83, + "learning_rate": 9.926315789473684e-05, + "loss": 0.0007, + "step": 7225 + }, + { + "epoch": 107.85, + "learning_rate": 9.922807017543859e-05, + "loss": 0.2681, + "step": 7226 + }, + { + "epoch": 107.86, + "learning_rate": 9.919298245614035e-05, + "loss": 0.0009, + "step": 7227 + }, + { + "epoch": 107.88, + "learning_rate": 9.91578947368421e-05, + "loss": 0.2484, + "step": 7228 + }, + { + "epoch": 107.89, + "learning_rate": 9.912280701754386e-05, + "loss": 0.0004, + "step": 7229 + }, + { + "epoch": 107.91, + "learning_rate": 9.90877192982456e-05, + "loss": 0.0012, + "step": 7230 + }, + { + "epoch": 107.92, + "learning_rate": 9.905263157894736e-05, + "loss": 0.0004, + "step": 7231 + }, + { + "epoch": 107.94, + "learning_rate": 9.90175438596491e-05, + "loss": 0.0005, + "step": 7232 + }, + { + "epoch": 107.95, + "learning_rate": 9.898245614035087e-05, + "loss": 0.0005, + "step": 7233 + }, + { + "epoch": 107.97, + "learning_rate": 9.894736842105263e-05, + "loss": 0.0008, + "step": 7234 + }, + { + "epoch": 107.98, + "learning_rate": 9.891228070175438e-05, + "loss": 0.2469, + "step": 7235 + }, + { + "epoch": 108.0, + "learning_rate": 9.887719298245614e-05, + "loss": 0.0005, + "step": 7236 + }, + { + "epoch": 108.01, + "learning_rate": 9.884210526315788e-05, + "loss": 0.0026, + "step": 7237 + }, + { + "epoch": 108.03, + "learning_rate": 9.880701754385964e-05, + "loss": 0.0469, + "step": 7238 + }, + { + "epoch": 108.04, + "learning_rate": 9.877192982456139e-05, + "loss": 0.0003, + "step": 7239 + }, + { + "epoch": 108.06, + "learning_rate": 9.873684210526315e-05, + "loss": 0.0006, + "step": 7240 + }, + { + "epoch": 108.07, + "learning_rate": 9.87017543859649e-05, + "loss": 0.0007, + "step": 7241 + }, + { + "epoch": 108.09, + "learning_rate": 9.866666666666666e-05, + "loss": 0.0048, + "step": 7242 + }, + { + "epoch": 108.1, + "learning_rate": 9.863157894736841e-05, + "loss": 0.0006, + "step": 7243 + }, + { + "epoch": 108.12, + "learning_rate": 9.859649122807017e-05, + "loss": 0.0006, + "step": 7244 + }, + { + "epoch": 108.13, + "learning_rate": 9.856140350877191e-05, + "loss": 0.0004, + "step": 7245 + }, + { + "epoch": 108.15, + "learning_rate": 9.852631578947367e-05, + "loss": 0.0757, + "step": 7246 + }, + { + "epoch": 108.16, + "learning_rate": 9.849122807017543e-05, + "loss": 0.0004, + "step": 7247 + }, + { + "epoch": 108.18, + "learning_rate": 9.845614035087718e-05, + "loss": 0.0097, + "step": 7248 + }, + { + "epoch": 108.19, + "learning_rate": 9.842105263157894e-05, + "loss": 0.0004, + "step": 7249 + }, + { + "epoch": 108.21, + "learning_rate": 9.838596491228069e-05, + "loss": 0.0033, + "step": 7250 + }, + { + "epoch": 108.22, + "learning_rate": 9.835087719298245e-05, + "loss": 0.0004, + "step": 7251 + }, + { + "epoch": 108.24, + "learning_rate": 9.83157894736842e-05, + "loss": 0.0004, + "step": 7252 + }, + { + "epoch": 108.25, + "learning_rate": 9.828070175438597e-05, + "loss": 0.1304, + "step": 7253 + }, + { + "epoch": 108.27, + "learning_rate": 9.82456140350877e-05, + "loss": 0.0005, + "step": 7254 + }, + { + "epoch": 108.28, + "learning_rate": 9.821052631578946e-05, + "loss": 0.0037, + "step": 7255 + }, + { + "epoch": 108.3, + "learning_rate": 9.817543859649121e-05, + "loss": 0.0031, + "step": 7256 + }, + { + "epoch": 108.31, + "learning_rate": 9.814035087719297e-05, + "loss": 0.1553, + "step": 7257 + }, + { + "epoch": 108.33, + "learning_rate": 9.810526315789472e-05, + "loss": 0.0009, + "step": 7258 + }, + { + "epoch": 108.34, + "learning_rate": 9.807017543859649e-05, + "loss": 0.0005, + "step": 7259 + }, + { + "epoch": 108.36, + "learning_rate": 9.803508771929825e-05, + "loss": 0.0206, + "step": 7260 + }, + { + "epoch": 108.37, + "learning_rate": 9.799999999999998e-05, + "loss": 0.0004, + "step": 7261 + }, + { + "epoch": 108.39, + "learning_rate": 9.796491228070175e-05, + "loss": 0.0011, + "step": 7262 + }, + { + "epoch": 108.4, + "learning_rate": 9.79298245614035e-05, + "loss": 0.0066, + "step": 7263 + }, + { + "epoch": 108.42, + "learning_rate": 9.789473684210526e-05, + "loss": 0.0004, + "step": 7264 + }, + { + "epoch": 108.43, + "learning_rate": 9.7859649122807e-05, + "loss": 0.0007, + "step": 7265 + }, + { + "epoch": 108.45, + "learning_rate": 9.782456140350877e-05, + "loss": 0.0005, + "step": 7266 + }, + { + "epoch": 108.46, + "learning_rate": 9.778947368421052e-05, + "loss": 0.0004, + "step": 7267 + }, + { + "epoch": 108.48, + "learning_rate": 9.775438596491228e-05, + "loss": 0.0006, + "step": 7268 + }, + { + "epoch": 108.49, + "learning_rate": 9.771929824561401e-05, + "loss": 0.0005, + "step": 7269 + }, + { + "epoch": 108.51, + "learning_rate": 9.768421052631578e-05, + "loss": 0.0004, + "step": 7270 + }, + { + "epoch": 108.52, + "learning_rate": 9.764912280701754e-05, + "loss": 0.0018, + "step": 7271 + }, + { + "epoch": 108.54, + "learning_rate": 9.761403508771929e-05, + "loss": 0.2924, + "step": 7272 + }, + { + "epoch": 108.55, + "learning_rate": 9.757894736842105e-05, + "loss": 0.0004, + "step": 7273 + }, + { + "epoch": 108.57, + "learning_rate": 9.75438596491228e-05, + "loss": 0.0018, + "step": 7274 + }, + { + "epoch": 108.58, + "learning_rate": 9.750877192982456e-05, + "loss": 0.0012, + "step": 7275 + }, + { + "epoch": 108.59, + "learning_rate": 9.747368421052631e-05, + "loss": 0.0125, + "step": 7276 + }, + { + "epoch": 108.61, + "learning_rate": 9.743859649122807e-05, + "loss": 0.0004, + "step": 7277 + }, + { + "epoch": 108.62, + "learning_rate": 9.740350877192981e-05, + "loss": 0.0005, + "step": 7278 + }, + { + "epoch": 108.64, + "learning_rate": 9.736842105263157e-05, + "loss": 0.0007, + "step": 7279 + }, + { + "epoch": 108.65, + "learning_rate": 9.733333333333332e-05, + "loss": 0.0005, + "step": 7280 + }, + { + "epoch": 108.67, + "learning_rate": 9.729824561403508e-05, + "loss": 0.0003, + "step": 7281 + }, + { + "epoch": 108.68, + "learning_rate": 9.726315789473683e-05, + "loss": 0.0005, + "step": 7282 + }, + { + "epoch": 108.7, + "learning_rate": 9.722807017543859e-05, + "loss": 0.0014, + "step": 7283 + }, + { + "epoch": 108.71, + "learning_rate": 9.719298245614035e-05, + "loss": 0.0005, + "step": 7284 + }, + { + "epoch": 108.73, + "learning_rate": 9.715789473684209e-05, + "loss": 0.0026, + "step": 7285 + }, + { + "epoch": 108.74, + "learning_rate": 9.712280701754385e-05, + "loss": 0.0128, + "step": 7286 + }, + { + "epoch": 108.76, + "learning_rate": 9.70877192982456e-05, + "loss": 0.0005, + "step": 7287 + }, + { + "epoch": 108.77, + "learning_rate": 9.705263157894736e-05, + "loss": 0.0045, + "step": 7288 + }, + { + "epoch": 108.79, + "learning_rate": 9.701754385964911e-05, + "loss": 0.0012, + "step": 7289 + }, + { + "epoch": 108.8, + "learning_rate": 9.698245614035087e-05, + "loss": 0.0269, + "step": 7290 + }, + { + "epoch": 108.82, + "learning_rate": 9.694736842105262e-05, + "loss": 0.0005, + "step": 7291 + }, + { + "epoch": 108.83, + "learning_rate": 9.691228070175439e-05, + "loss": 0.0006, + "step": 7292 + }, + { + "epoch": 108.85, + "learning_rate": 9.687719298245612e-05, + "loss": 0.0005, + "step": 7293 + }, + { + "epoch": 108.86, + "learning_rate": 9.684210526315788e-05, + "loss": 0.0004, + "step": 7294 + }, + { + "epoch": 108.88, + "learning_rate": 9.680701754385963e-05, + "loss": 0.0004, + "step": 7295 + }, + { + "epoch": 108.89, + "learning_rate": 9.67719298245614e-05, + "loss": 0.0004, + "step": 7296 + }, + { + "epoch": 108.91, + "learning_rate": 9.673684210526316e-05, + "loss": 0.0004, + "step": 7297 + }, + { + "epoch": 108.92, + "learning_rate": 9.67017543859649e-05, + "loss": 0.0051, + "step": 7298 + }, + { + "epoch": 108.94, + "learning_rate": 9.666666666666667e-05, + "loss": 0.0005, + "step": 7299 + }, + { + "epoch": 108.95, + "learning_rate": 9.663157894736842e-05, + "loss": 0.0012, + "step": 7300 + }, + { + "epoch": 108.97, + "learning_rate": 9.659649122807018e-05, + "loss": 0.0003, + "step": 7301 + }, + { + "epoch": 108.98, + "learning_rate": 9.656140350877191e-05, + "loss": 0.0017, + "step": 7302 + }, + { + "epoch": 109.0, + "learning_rate": 9.652631578947368e-05, + "loss": 0.0251, + "step": 7303 + }, + { + "epoch": 109.01, + "learning_rate": 9.649122807017542e-05, + "loss": 0.0009, + "step": 7304 + }, + { + "epoch": 109.03, + "learning_rate": 9.645614035087719e-05, + "loss": 0.0003, + "step": 7305 + }, + { + "epoch": 109.04, + "learning_rate": 9.642105263157894e-05, + "loss": 0.1369, + "step": 7306 + }, + { + "epoch": 109.06, + "learning_rate": 9.63859649122807e-05, + "loss": 0.0092, + "step": 7307 + }, + { + "epoch": 109.07, + "learning_rate": 9.635087719298243e-05, + "loss": 0.0006, + "step": 7308 + }, + { + "epoch": 109.09, + "learning_rate": 9.63157894736842e-05, + "loss": 0.1566, + "step": 7309 + }, + { + "epoch": 109.1, + "learning_rate": 9.628070175438596e-05, + "loss": 0.0004, + "step": 7310 + }, + { + "epoch": 109.12, + "learning_rate": 9.624561403508771e-05, + "loss": 0.0004, + "step": 7311 + }, + { + "epoch": 109.13, + "learning_rate": 9.621052631578947e-05, + "loss": 0.0006, + "step": 7312 + }, + { + "epoch": 109.15, + "learning_rate": 9.617543859649122e-05, + "loss": 0.0004, + "step": 7313 + }, + { + "epoch": 109.16, + "learning_rate": 9.614035087719298e-05, + "loss": 0.0004, + "step": 7314 + }, + { + "epoch": 109.18, + "learning_rate": 9.610526315789473e-05, + "loss": 0.0004, + "step": 7315 + }, + { + "epoch": 109.19, + "learning_rate": 9.607017543859649e-05, + "loss": 0.0004, + "step": 7316 + }, + { + "epoch": 109.21, + "learning_rate": 9.603508771929823e-05, + "loss": 0.0009, + "step": 7317 + }, + { + "epoch": 109.22, + "learning_rate": 9.599999999999999e-05, + "loss": 0.0004, + "step": 7318 + }, + { + "epoch": 109.24, + "learning_rate": 9.596491228070174e-05, + "loss": 0.0004, + "step": 7319 + }, + { + "epoch": 109.25, + "learning_rate": 9.59298245614035e-05, + "loss": 0.0003, + "step": 7320 + }, + { + "epoch": 109.27, + "learning_rate": 9.589473684210525e-05, + "loss": 0.0003, + "step": 7321 + }, + { + "epoch": 109.28, + "learning_rate": 9.585964912280701e-05, + "loss": 0.0005, + "step": 7322 + }, + { + "epoch": 109.3, + "learning_rate": 9.582456140350877e-05, + "loss": 0.0004, + "step": 7323 + }, + { + "epoch": 109.31, + "learning_rate": 9.578947368421052e-05, + "loss": 0.0005, + "step": 7324 + }, + { + "epoch": 109.33, + "learning_rate": 9.575438596491228e-05, + "loss": 0.0004, + "step": 7325 + }, + { + "epoch": 109.34, + "learning_rate": 9.571929824561402e-05, + "loss": 0.0006, + "step": 7326 + }, + { + "epoch": 109.36, + "learning_rate": 9.568421052631578e-05, + "loss": 0.0004, + "step": 7327 + }, + { + "epoch": 109.37, + "learning_rate": 9.564912280701753e-05, + "loss": 0.0004, + "step": 7328 + }, + { + "epoch": 109.39, + "learning_rate": 9.561403508771929e-05, + "loss": 0.286, + "step": 7329 + }, + { + "epoch": 109.4, + "learning_rate": 9.557894736842104e-05, + "loss": 0.0004, + "step": 7330 + }, + { + "epoch": 109.42, + "learning_rate": 9.55438596491228e-05, + "loss": 0.0005, + "step": 7331 + }, + { + "epoch": 109.43, + "learning_rate": 9.550877192982454e-05, + "loss": 0.0003, + "step": 7332 + }, + { + "epoch": 109.45, + "learning_rate": 9.54736842105263e-05, + "loss": 0.0005, + "step": 7333 + }, + { + "epoch": 109.46, + "learning_rate": 9.543859649122806e-05, + "loss": 0.0005, + "step": 7334 + }, + { + "epoch": 109.48, + "learning_rate": 9.540350877192981e-05, + "loss": 0.0004, + "step": 7335 + }, + { + "epoch": 109.49, + "learning_rate": 9.536842105263158e-05, + "loss": 0.1862, + "step": 7336 + }, + { + "epoch": 109.51, + "learning_rate": 9.533333333333332e-05, + "loss": 0.0016, + "step": 7337 + }, + { + "epoch": 109.52, + "learning_rate": 9.529824561403509e-05, + "loss": 0.0007, + "step": 7338 + }, + { + "epoch": 109.54, + "learning_rate": 9.526315789473684e-05, + "loss": 0.0004, + "step": 7339 + }, + { + "epoch": 109.55, + "learning_rate": 9.52280701754386e-05, + "loss": 0.0475, + "step": 7340 + }, + { + "epoch": 109.57, + "learning_rate": 9.519298245614033e-05, + "loss": 0.0006, + "step": 7341 + }, + { + "epoch": 109.58, + "learning_rate": 9.51578947368421e-05, + "loss": 0.0012, + "step": 7342 + }, + { + "epoch": 109.59, + "learning_rate": 9.512280701754384e-05, + "loss": 0.0007, + "step": 7343 + }, + { + "epoch": 109.61, + "learning_rate": 9.50877192982456e-05, + "loss": 0.0071, + "step": 7344 + }, + { + "epoch": 109.62, + "learning_rate": 9.505263157894735e-05, + "loss": 0.0008, + "step": 7345 + }, + { + "epoch": 109.64, + "learning_rate": 9.501754385964912e-05, + "loss": 0.0005, + "step": 7346 + }, + { + "epoch": 109.65, + "learning_rate": 9.498245614035088e-05, + "loss": 0.0004, + "step": 7347 + }, + { + "epoch": 109.67, + "learning_rate": 9.494736842105263e-05, + "loss": 0.0324, + "step": 7348 + }, + { + "epoch": 109.68, + "learning_rate": 9.491228070175439e-05, + "loss": 0.0005, + "step": 7349 + }, + { + "epoch": 109.7, + "learning_rate": 9.487719298245613e-05, + "loss": 0.0459, + "step": 7350 + }, + { + "epoch": 109.71, + "learning_rate": 9.484210526315789e-05, + "loss": 0.0004, + "step": 7351 + }, + { + "epoch": 109.73, + "learning_rate": 9.480701754385964e-05, + "loss": 0.0005, + "step": 7352 + }, + { + "epoch": 109.74, + "learning_rate": 9.47719298245614e-05, + "loss": 0.0036, + "step": 7353 + }, + { + "epoch": 109.76, + "learning_rate": 9.473684210526315e-05, + "loss": 0.0005, + "step": 7354 + }, + { + "epoch": 109.77, + "learning_rate": 9.470175438596491e-05, + "loss": 0.0005, + "step": 7355 + }, + { + "epoch": 109.79, + "learning_rate": 9.466666666666665e-05, + "loss": 0.0013, + "step": 7356 + }, + { + "epoch": 109.8, + "learning_rate": 9.463157894736841e-05, + "loss": 0.0005, + "step": 7357 + }, + { + "epoch": 109.82, + "learning_rate": 9.459649122807016e-05, + "loss": 0.0004, + "step": 7358 + }, + { + "epoch": 109.83, + "learning_rate": 9.456140350877192e-05, + "loss": 0.0004, + "step": 7359 + }, + { + "epoch": 109.85, + "learning_rate": 9.452631578947368e-05, + "loss": 0.0005, + "step": 7360 + }, + { + "epoch": 109.86, + "learning_rate": 9.449122807017543e-05, + "loss": 0.0005, + "step": 7361 + }, + { + "epoch": 109.88, + "learning_rate": 9.445614035087719e-05, + "loss": 0.0005, + "step": 7362 + }, + { + "epoch": 109.89, + "learning_rate": 9.442105263157894e-05, + "loss": 0.0006, + "step": 7363 + }, + { + "epoch": 109.91, + "learning_rate": 9.43859649122807e-05, + "loss": 0.0004, + "step": 7364 + }, + { + "epoch": 109.92, + "learning_rate": 9.435087719298244e-05, + "loss": 0.0005, + "step": 7365 + }, + { + "epoch": 109.94, + "learning_rate": 9.43157894736842e-05, + "loss": 0.3628, + "step": 7366 + }, + { + "epoch": 109.95, + "learning_rate": 9.428070175438595e-05, + "loss": 0.0009, + "step": 7367 + }, + { + "epoch": 109.97, + "learning_rate": 9.424561403508771e-05, + "loss": 0.0006, + "step": 7368 + }, + { + "epoch": 109.98, + "learning_rate": 9.421052631578946e-05, + "loss": 0.0004, + "step": 7369 + }, + { + "epoch": 110.0, + "learning_rate": 9.417543859649122e-05, + "loss": 0.0581, + "step": 7370 + }, + { + "epoch": 110.01, + "learning_rate": 9.414035087719297e-05, + "loss": 0.0037, + "step": 7371 + }, + { + "epoch": 110.03, + "learning_rate": 9.410526315789473e-05, + "loss": 0.0004, + "step": 7372 + }, + { + "epoch": 110.04, + "learning_rate": 9.407017543859648e-05, + "loss": 0.0005, + "step": 7373 + }, + { + "epoch": 110.06, + "learning_rate": 9.403508771929823e-05, + "loss": 0.0006, + "step": 7374 + }, + { + "epoch": 110.07, + "learning_rate": 9.4e-05, + "loss": 0.0005, + "step": 7375 + }, + { + "epoch": 110.09, + "learning_rate": 9.396491228070174e-05, + "loss": 0.0007, + "step": 7376 + }, + { + "epoch": 110.1, + "learning_rate": 9.39298245614035e-05, + "loss": 0.0004, + "step": 7377 + }, + { + "epoch": 110.12, + "learning_rate": 9.389473684210525e-05, + "loss": 0.1453, + "step": 7378 + }, + { + "epoch": 110.13, + "learning_rate": 9.385964912280702e-05, + "loss": 0.0018, + "step": 7379 + }, + { + "epoch": 110.15, + "learning_rate": 9.382456140350875e-05, + "loss": 0.0004, + "step": 7380 + }, + { + "epoch": 110.16, + "learning_rate": 9.378947368421051e-05, + "loss": 0.0005, + "step": 7381 + }, + { + "epoch": 110.18, + "learning_rate": 9.375438596491226e-05, + "loss": 0.0008, + "step": 7382 + }, + { + "epoch": 110.19, + "learning_rate": 9.371929824561403e-05, + "loss": 0.0005, + "step": 7383 + }, + { + "epoch": 110.21, + "learning_rate": 9.368421052631579e-05, + "loss": 0.0007, + "step": 7384 + }, + { + "epoch": 110.22, + "learning_rate": 9.364912280701754e-05, + "loss": 0.0005, + "step": 7385 + }, + { + "epoch": 110.24, + "learning_rate": 9.36140350877193e-05, + "loss": 0.0006, + "step": 7386 + }, + { + "epoch": 110.25, + "learning_rate": 9.357894736842105e-05, + "loss": 0.0199, + "step": 7387 + }, + { + "epoch": 110.27, + "learning_rate": 9.354385964912281e-05, + "loss": 0.0981, + "step": 7388 + }, + { + "epoch": 110.28, + "learning_rate": 9.350877192982455e-05, + "loss": 0.0014, + "step": 7389 + }, + { + "epoch": 110.3, + "learning_rate": 9.347368421052631e-05, + "loss": 0.0004, + "step": 7390 + }, + { + "epoch": 110.31, + "learning_rate": 9.343859649122806e-05, + "loss": 0.0005, + "step": 7391 + }, + { + "epoch": 110.33, + "learning_rate": 9.340350877192982e-05, + "loss": 0.001, + "step": 7392 + }, + { + "epoch": 110.34, + "learning_rate": 9.336842105263157e-05, + "loss": 0.0007, + "step": 7393 + }, + { + "epoch": 110.36, + "learning_rate": 9.333333333333333e-05, + "loss": 0.0005, + "step": 7394 + }, + { + "epoch": 110.37, + "learning_rate": 9.329824561403508e-05, + "loss": 0.003, + "step": 7395 + }, + { + "epoch": 110.39, + "learning_rate": 9.326315789473684e-05, + "loss": 0.0005, + "step": 7396 + }, + { + "epoch": 110.4, + "learning_rate": 9.322807017543859e-05, + "loss": 0.0031, + "step": 7397 + }, + { + "epoch": 110.42, + "learning_rate": 9.319298245614034e-05, + "loss": 0.1359, + "step": 7398 + }, + { + "epoch": 110.43, + "learning_rate": 9.31578947368421e-05, + "loss": 0.0018, + "step": 7399 + }, + { + "epoch": 110.45, + "learning_rate": 9.312280701754385e-05, + "loss": 0.0056, + "step": 7400 + }, + { + "epoch": 110.45, + "eval_accuracy": 0.8663729809104258, + "eval_f1": 0.8672745803593204, + "eval_loss": 0.7320371270179749, + "eval_runtime": 345.8375, + "eval_samples_per_second": 11.815, + "eval_steps_per_second": 0.74, + "step": 7400 + }, + { + "epoch": 110.46, + "learning_rate": 9.308771929824561e-05, + "loss": 0.0007, + "step": 7401 + }, + { + "epoch": 110.48, + "learning_rate": 9.305263157894736e-05, + "loss": 0.0004, + "step": 7402 + }, + { + "epoch": 110.49, + "learning_rate": 9.301754385964912e-05, + "loss": 0.0004, + "step": 7403 + }, + { + "epoch": 110.51, + "learning_rate": 9.298245614035086e-05, + "loss": 0.0007, + "step": 7404 + }, + { + "epoch": 110.52, + "learning_rate": 9.294736842105262e-05, + "loss": 0.0004, + "step": 7405 + }, + { + "epoch": 110.54, + "learning_rate": 9.291228070175437e-05, + "loss": 0.0021, + "step": 7406 + }, + { + "epoch": 110.55, + "learning_rate": 9.287719298245613e-05, + "loss": 0.0136, + "step": 7407 + }, + { + "epoch": 110.57, + "learning_rate": 9.284210526315788e-05, + "loss": 0.072, + "step": 7408 + }, + { + "epoch": 110.58, + "learning_rate": 9.280701754385964e-05, + "loss": 0.0006, + "step": 7409 + }, + { + "epoch": 110.59, + "learning_rate": 9.27719298245614e-05, + "loss": 0.0004, + "step": 7410 + }, + { + "epoch": 110.61, + "learning_rate": 9.273684210526315e-05, + "loss": 0.2028, + "step": 7411 + }, + { + "epoch": 110.62, + "learning_rate": 9.270175438596492e-05, + "loss": 0.0009, + "step": 7412 + }, + { + "epoch": 110.64, + "learning_rate": 9.266666666666665e-05, + "loss": 0.0004, + "step": 7413 + }, + { + "epoch": 110.65, + "learning_rate": 9.263157894736841e-05, + "loss": 0.0006, + "step": 7414 + }, + { + "epoch": 110.67, + "learning_rate": 9.259649122807016e-05, + "loss": 0.0006, + "step": 7415 + }, + { + "epoch": 110.68, + "learning_rate": 9.256140350877192e-05, + "loss": 0.0008, + "step": 7416 + }, + { + "epoch": 110.7, + "learning_rate": 9.252631578947367e-05, + "loss": 0.0008, + "step": 7417 + }, + { + "epoch": 110.71, + "learning_rate": 9.249122807017544e-05, + "loss": 0.0013, + "step": 7418 + }, + { + "epoch": 110.73, + "learning_rate": 9.245614035087718e-05, + "loss": 0.0007, + "step": 7419 + }, + { + "epoch": 110.74, + "learning_rate": 9.242105263157893e-05, + "loss": 0.0007, + "step": 7420 + }, + { + "epoch": 110.76, + "learning_rate": 9.238596491228068e-05, + "loss": 0.0004, + "step": 7421 + }, + { + "epoch": 110.77, + "learning_rate": 9.235087719298244e-05, + "loss": 0.1436, + "step": 7422 + }, + { + "epoch": 110.79, + "learning_rate": 9.231578947368421e-05, + "loss": 0.0007, + "step": 7423 + }, + { + "epoch": 110.8, + "learning_rate": 9.228070175438596e-05, + "loss": 0.0032, + "step": 7424 + }, + { + "epoch": 110.82, + "learning_rate": 9.224561403508772e-05, + "loss": 0.0004, + "step": 7425 + }, + { + "epoch": 110.83, + "learning_rate": 9.221052631578947e-05, + "loss": 0.0006, + "step": 7426 + }, + { + "epoch": 110.85, + "learning_rate": 9.217543859649123e-05, + "loss": 0.0038, + "step": 7427 + }, + { + "epoch": 110.86, + "learning_rate": 9.214035087719296e-05, + "loss": 0.0006, + "step": 7428 + }, + { + "epoch": 110.88, + "learning_rate": 9.210526315789473e-05, + "loss": 0.0011, + "step": 7429 + }, + { + "epoch": 110.89, + "learning_rate": 9.207017543859648e-05, + "loss": 0.0004, + "step": 7430 + }, + { + "epoch": 110.91, + "learning_rate": 9.203508771929824e-05, + "loss": 0.003, + "step": 7431 + }, + { + "epoch": 110.92, + "learning_rate": 9.199999999999999e-05, + "loss": 0.0009, + "step": 7432 + }, + { + "epoch": 110.94, + "learning_rate": 9.196491228070175e-05, + "loss": 0.0006, + "step": 7433 + }, + { + "epoch": 110.95, + "learning_rate": 9.192982456140351e-05, + "loss": 0.0026, + "step": 7434 + }, + { + "epoch": 110.97, + "learning_rate": 9.189473684210526e-05, + "loss": 0.0029, + "step": 7435 + }, + { + "epoch": 110.98, + "learning_rate": 9.185964912280702e-05, + "loss": 0.0007, + "step": 7436 + }, + { + "epoch": 111.0, + "learning_rate": 9.182456140350876e-05, + "loss": 0.0004, + "step": 7437 + }, + { + "epoch": 111.01, + "learning_rate": 9.178947368421052e-05, + "loss": 0.0081, + "step": 7438 + }, + { + "epoch": 111.03, + "learning_rate": 9.175438596491227e-05, + "loss": 0.0344, + "step": 7439 + }, + { + "epoch": 111.04, + "learning_rate": 9.171929824561403e-05, + "loss": 0.0006, + "step": 7440 + }, + { + "epoch": 111.06, + "learning_rate": 9.168421052631578e-05, + "loss": 0.002, + "step": 7441 + }, + { + "epoch": 111.07, + "learning_rate": 9.164912280701754e-05, + "loss": 0.0005, + "step": 7442 + }, + { + "epoch": 111.09, + "learning_rate": 9.161403508771929e-05, + "loss": 0.1795, + "step": 7443 + }, + { + "epoch": 111.1, + "learning_rate": 9.157894736842104e-05, + "loss": 0.0006, + "step": 7444 + }, + { + "epoch": 111.12, + "learning_rate": 9.154385964912279e-05, + "loss": 0.1568, + "step": 7445 + }, + { + "epoch": 111.13, + "learning_rate": 9.150877192982455e-05, + "loss": 0.0005, + "step": 7446 + }, + { + "epoch": 111.15, + "learning_rate": 9.147368421052631e-05, + "loss": 0.0023, + "step": 7447 + }, + { + "epoch": 111.16, + "learning_rate": 9.143859649122806e-05, + "loss": 0.0008, + "step": 7448 + }, + { + "epoch": 111.18, + "learning_rate": 9.140350877192982e-05, + "loss": 0.0029, + "step": 7449 + }, + { + "epoch": 111.19, + "learning_rate": 9.136842105263157e-05, + "loss": 0.0005, + "step": 7450 + }, + { + "epoch": 111.21, + "learning_rate": 9.133333333333334e-05, + "loss": 0.0005, + "step": 7451 + }, + { + "epoch": 111.22, + "learning_rate": 9.129824561403507e-05, + "loss": 0.0669, + "step": 7452 + }, + { + "epoch": 111.24, + "learning_rate": 9.126315789473683e-05, + "loss": 0.0004, + "step": 7453 + }, + { + "epoch": 111.25, + "learning_rate": 9.122807017543858e-05, + "loss": 0.0005, + "step": 7454 + }, + { + "epoch": 111.27, + "learning_rate": 9.119298245614034e-05, + "loss": 0.0004, + "step": 7455 + }, + { + "epoch": 111.28, + "learning_rate": 9.115789473684209e-05, + "loss": 0.0004, + "step": 7456 + }, + { + "epoch": 111.3, + "learning_rate": 9.112280701754386e-05, + "loss": 0.0012, + "step": 7457 + }, + { + "epoch": 111.31, + "learning_rate": 9.10877192982456e-05, + "loss": 0.0005, + "step": 7458 + }, + { + "epoch": 111.33, + "learning_rate": 9.105263157894737e-05, + "loss": 0.0006, + "step": 7459 + }, + { + "epoch": 111.34, + "learning_rate": 9.101754385964913e-05, + "loss": 0.0266, + "step": 7460 + }, + { + "epoch": 111.36, + "learning_rate": 9.098245614035086e-05, + "loss": 0.0007, + "step": 7461 + }, + { + "epoch": 111.37, + "learning_rate": 9.094736842105263e-05, + "loss": 0.0006, + "step": 7462 + }, + { + "epoch": 111.39, + "learning_rate": 9.091228070175437e-05, + "loss": 0.0004, + "step": 7463 + }, + { + "epoch": 111.4, + "learning_rate": 9.087719298245614e-05, + "loss": 0.0004, + "step": 7464 + }, + { + "epoch": 111.42, + "learning_rate": 9.084210526315789e-05, + "loss": 0.0004, + "step": 7465 + }, + { + "epoch": 111.43, + "learning_rate": 9.080701754385965e-05, + "loss": 0.0008, + "step": 7466 + }, + { + "epoch": 111.45, + "learning_rate": 9.077192982456138e-05, + "loss": 0.0007, + "step": 7467 + }, + { + "epoch": 111.46, + "learning_rate": 9.073684210526315e-05, + "loss": 0.0005, + "step": 7468 + }, + { + "epoch": 111.48, + "learning_rate": 9.07017543859649e-05, + "loss": 0.0004, + "step": 7469 + }, + { + "epoch": 111.49, + "learning_rate": 9.066666666666666e-05, + "loss": 0.0004, + "step": 7470 + }, + { + "epoch": 111.51, + "learning_rate": 9.06315789473684e-05, + "loss": 0.0129, + "step": 7471 + }, + { + "epoch": 111.52, + "learning_rate": 9.059649122807017e-05, + "loss": 0.002, + "step": 7472 + }, + { + "epoch": 111.54, + "learning_rate": 9.056140350877193e-05, + "loss": 0.0004, + "step": 7473 + }, + { + "epoch": 111.55, + "learning_rate": 9.052631578947368e-05, + "loss": 0.0004, + "step": 7474 + }, + { + "epoch": 111.57, + "learning_rate": 9.049122807017544e-05, + "loss": 0.0006, + "step": 7475 + }, + { + "epoch": 111.58, + "learning_rate": 9.045614035087718e-05, + "loss": 0.0004, + "step": 7476 + }, + { + "epoch": 111.59, + "learning_rate": 9.042105263157894e-05, + "loss": 0.001, + "step": 7477 + }, + { + "epoch": 111.61, + "learning_rate": 9.038596491228069e-05, + "loss": 0.0004, + "step": 7478 + }, + { + "epoch": 111.62, + "learning_rate": 9.035087719298245e-05, + "loss": 0.0007, + "step": 7479 + }, + { + "epoch": 111.64, + "learning_rate": 9.03157894736842e-05, + "loss": 0.0004, + "step": 7480 + }, + { + "epoch": 111.65, + "learning_rate": 9.028070175438596e-05, + "loss": 0.0005, + "step": 7481 + }, + { + "epoch": 111.67, + "learning_rate": 9.024561403508771e-05, + "loss": 0.0073, + "step": 7482 + }, + { + "epoch": 111.68, + "learning_rate": 9.021052631578947e-05, + "loss": 0.0004, + "step": 7483 + }, + { + "epoch": 111.7, + "learning_rate": 9.017543859649123e-05, + "loss": 0.0004, + "step": 7484 + }, + { + "epoch": 111.71, + "learning_rate": 9.014035087719297e-05, + "loss": 0.0154, + "step": 7485 + }, + { + "epoch": 111.73, + "learning_rate": 9.010526315789473e-05, + "loss": 0.003, + "step": 7486 + }, + { + "epoch": 111.74, + "learning_rate": 9.007017543859648e-05, + "loss": 0.0004, + "step": 7487 + }, + { + "epoch": 111.76, + "learning_rate": 9.003508771929824e-05, + "loss": 0.0004, + "step": 7488 + }, + { + "epoch": 111.77, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0466, + "step": 7489 + }, + { + "epoch": 111.79, + "learning_rate": 8.996491228070175e-05, + "loss": 0.0004, + "step": 7490 + }, + { + "epoch": 111.8, + "learning_rate": 8.992982456140349e-05, + "loss": 0.0006, + "step": 7491 + }, + { + "epoch": 111.82, + "learning_rate": 8.989473684210525e-05, + "loss": 0.1431, + "step": 7492 + }, + { + "epoch": 111.83, + "learning_rate": 8.9859649122807e-05, + "loss": 0.0005, + "step": 7493 + }, + { + "epoch": 111.85, + "learning_rate": 8.982456140350876e-05, + "loss": 0.0008, + "step": 7494 + }, + { + "epoch": 111.86, + "learning_rate": 8.978947368421051e-05, + "loss": 0.0003, + "step": 7495 + }, + { + "epoch": 111.88, + "learning_rate": 8.975438596491227e-05, + "loss": 0.0004, + "step": 7496 + }, + { + "epoch": 111.89, + "learning_rate": 8.971929824561404e-05, + "loss": 0.0014, + "step": 7497 + }, + { + "epoch": 111.91, + "learning_rate": 8.968421052631579e-05, + "loss": 0.0004, + "step": 7498 + }, + { + "epoch": 111.92, + "learning_rate": 8.964912280701755e-05, + "loss": 0.0004, + "step": 7499 + }, + { + "epoch": 111.94, + "learning_rate": 8.961403508771928e-05, + "loss": 0.4048, + "step": 7500 + }, + { + "epoch": 111.95, + "learning_rate": 8.957894736842105e-05, + "loss": 0.0005, + "step": 7501 + }, + { + "epoch": 111.97, + "learning_rate": 8.95438596491228e-05, + "loss": 0.0007, + "step": 7502 + }, + { + "epoch": 111.98, + "learning_rate": 8.950877192982456e-05, + "loss": 0.0003, + "step": 7503 + }, + { + "epoch": 112.0, + "learning_rate": 8.94736842105263e-05, + "loss": 0.0005, + "step": 7504 + }, + { + "epoch": 112.01, + "learning_rate": 8.943859649122807e-05, + "loss": 0.0006, + "step": 7505 + }, + { + "epoch": 112.03, + "learning_rate": 8.940350877192982e-05, + "loss": 0.0004, + "step": 7506 + }, + { + "epoch": 112.04, + "learning_rate": 8.936842105263158e-05, + "loss": 0.0005, + "step": 7507 + }, + { + "epoch": 112.06, + "learning_rate": 8.933333333333331e-05, + "loss": 0.0004, + "step": 7508 + }, + { + "epoch": 112.07, + "learning_rate": 8.929824561403508e-05, + "loss": 0.0005, + "step": 7509 + }, + { + "epoch": 112.09, + "learning_rate": 8.926315789473684e-05, + "loss": 0.001, + "step": 7510 + }, + { + "epoch": 112.1, + "learning_rate": 8.922807017543859e-05, + "loss": 0.0007, + "step": 7511 + }, + { + "epoch": 112.12, + "learning_rate": 8.919298245614035e-05, + "loss": 0.0019, + "step": 7512 + }, + { + "epoch": 112.13, + "learning_rate": 8.91578947368421e-05, + "loss": 0.002, + "step": 7513 + }, + { + "epoch": 112.15, + "learning_rate": 8.912280701754386e-05, + "loss": 0.0004, + "step": 7514 + }, + { + "epoch": 112.16, + "learning_rate": 8.90877192982456e-05, + "loss": 0.0005, + "step": 7515 + }, + { + "epoch": 112.18, + "learning_rate": 8.905263157894736e-05, + "loss": 0.0004, + "step": 7516 + }, + { + "epoch": 112.19, + "learning_rate": 8.901754385964911e-05, + "loss": 0.0071, + "step": 7517 + }, + { + "epoch": 112.21, + "learning_rate": 8.898245614035087e-05, + "loss": 0.0004, + "step": 7518 + }, + { + "epoch": 112.22, + "learning_rate": 8.894736842105262e-05, + "loss": 0.0008, + "step": 7519 + }, + { + "epoch": 112.24, + "learning_rate": 8.891228070175438e-05, + "loss": 0.0004, + "step": 7520 + }, + { + "epoch": 112.25, + "learning_rate": 8.887719298245613e-05, + "loss": 0.0005, + "step": 7521 + }, + { + "epoch": 112.27, + "learning_rate": 8.884210526315789e-05, + "loss": 0.0009, + "step": 7522 + }, + { + "epoch": 112.28, + "learning_rate": 8.880701754385965e-05, + "loss": 0.0004, + "step": 7523 + }, + { + "epoch": 112.3, + "learning_rate": 8.877192982456139e-05, + "loss": 0.0004, + "step": 7524 + }, + { + "epoch": 112.31, + "learning_rate": 8.873684210526315e-05, + "loss": 0.0005, + "step": 7525 + }, + { + "epoch": 112.33, + "learning_rate": 8.87017543859649e-05, + "loss": 0.0004, + "step": 7526 + }, + { + "epoch": 112.34, + "learning_rate": 8.866666666666666e-05, + "loss": 0.0005, + "step": 7527 + }, + { + "epoch": 112.36, + "learning_rate": 8.863157894736841e-05, + "loss": 0.0006, + "step": 7528 + }, + { + "epoch": 112.37, + "learning_rate": 8.859649122807017e-05, + "loss": 0.0008, + "step": 7529 + }, + { + "epoch": 112.39, + "learning_rate": 8.856140350877192e-05, + "loss": 0.0005, + "step": 7530 + }, + { + "epoch": 112.4, + "learning_rate": 8.852631578947368e-05, + "loss": 0.0005, + "step": 7531 + }, + { + "epoch": 112.42, + "learning_rate": 8.849122807017542e-05, + "loss": 0.0004, + "step": 7532 + }, + { + "epoch": 112.43, + "learning_rate": 8.845614035087718e-05, + "loss": 0.0004, + "step": 7533 + }, + { + "epoch": 112.45, + "learning_rate": 8.842105263157893e-05, + "loss": 0.0003, + "step": 7534 + }, + { + "epoch": 112.46, + "learning_rate": 8.83859649122807e-05, + "loss": 0.0011, + "step": 7535 + }, + { + "epoch": 112.48, + "learning_rate": 8.835087719298246e-05, + "loss": 0.0003, + "step": 7536 + }, + { + "epoch": 112.49, + "learning_rate": 8.83157894736842e-05, + "loss": 0.0102, + "step": 7537 + }, + { + "epoch": 112.51, + "learning_rate": 8.828070175438597e-05, + "loss": 0.0007, + "step": 7538 + }, + { + "epoch": 112.52, + "learning_rate": 8.82456140350877e-05, + "loss": 0.0003, + "step": 7539 + }, + { + "epoch": 112.54, + "learning_rate": 8.821052631578946e-05, + "loss": 0.0003, + "step": 7540 + }, + { + "epoch": 112.55, + "learning_rate": 8.817543859649121e-05, + "loss": 0.0004, + "step": 7541 + }, + { + "epoch": 112.57, + "learning_rate": 8.814035087719298e-05, + "loss": 0.0003, + "step": 7542 + }, + { + "epoch": 112.58, + "learning_rate": 8.810526315789472e-05, + "loss": 0.0011, + "step": 7543 + }, + { + "epoch": 112.59, + "learning_rate": 8.807017543859649e-05, + "loss": 0.0056, + "step": 7544 + }, + { + "epoch": 112.61, + "learning_rate": 8.803508771929824e-05, + "loss": 0.0765, + "step": 7545 + }, + { + "epoch": 112.62, + "learning_rate": 8.8e-05, + "loss": 0.0003, + "step": 7546 + }, + { + "epoch": 112.64, + "learning_rate": 8.796491228070176e-05, + "loss": 0.0005, + "step": 7547 + }, + { + "epoch": 112.65, + "learning_rate": 8.79298245614035e-05, + "loss": 0.0004, + "step": 7548 + }, + { + "epoch": 112.67, + "learning_rate": 8.789473684210526e-05, + "loss": 0.0004, + "step": 7549 + }, + { + "epoch": 112.68, + "learning_rate": 8.7859649122807e-05, + "loss": 0.0005, + "step": 7550 + }, + { + "epoch": 112.7, + "learning_rate": 8.782456140350877e-05, + "loss": 0.0004, + "step": 7551 + }, + { + "epoch": 112.71, + "learning_rate": 8.778947368421052e-05, + "loss": 0.0004, + "step": 7552 + }, + { + "epoch": 112.73, + "learning_rate": 8.775438596491228e-05, + "loss": 0.0005, + "step": 7553 + }, + { + "epoch": 112.74, + "learning_rate": 8.771929824561403e-05, + "loss": 0.0005, + "step": 7554 + }, + { + "epoch": 112.76, + "learning_rate": 8.768421052631579e-05, + "loss": 0.0005, + "step": 7555 + }, + { + "epoch": 112.77, + "learning_rate": 8.764912280701753e-05, + "loss": 0.0003, + "step": 7556 + }, + { + "epoch": 112.79, + "learning_rate": 8.761403508771929e-05, + "loss": 0.0042, + "step": 7557 + }, + { + "epoch": 112.8, + "learning_rate": 8.757894736842104e-05, + "loss": 0.0004, + "step": 7558 + }, + { + "epoch": 112.82, + "learning_rate": 8.75438596491228e-05, + "loss": 0.0007, + "step": 7559 + }, + { + "epoch": 112.83, + "learning_rate": 8.750877192982456e-05, + "loss": 0.0003, + "step": 7560 + }, + { + "epoch": 112.85, + "learning_rate": 8.747368421052631e-05, + "loss": 0.0005, + "step": 7561 + }, + { + "epoch": 112.86, + "learning_rate": 8.743859649122807e-05, + "loss": 0.0005, + "step": 7562 + }, + { + "epoch": 112.88, + "learning_rate": 8.740350877192981e-05, + "loss": 0.0003, + "step": 7563 + }, + { + "epoch": 112.89, + "learning_rate": 8.736842105263157e-05, + "loss": 0.0005, + "step": 7564 + }, + { + "epoch": 112.91, + "learning_rate": 8.733333333333332e-05, + "loss": 0.0011, + "step": 7565 + }, + { + "epoch": 112.92, + "learning_rate": 8.729824561403508e-05, + "loss": 0.0009, + "step": 7566 + }, + { + "epoch": 112.94, + "learning_rate": 8.726315789473683e-05, + "loss": 0.0004, + "step": 7567 + }, + { + "epoch": 112.95, + "learning_rate": 8.722807017543859e-05, + "loss": 0.097, + "step": 7568 + }, + { + "epoch": 112.97, + "learning_rate": 8.719298245614034e-05, + "loss": 0.0004, + "step": 7569 + }, + { + "epoch": 112.98, + "learning_rate": 8.71578947368421e-05, + "loss": 0.0003, + "step": 7570 + }, + { + "epoch": 113.0, + "learning_rate": 8.712280701754384e-05, + "loss": 0.0004, + "step": 7571 + }, + { + "epoch": 113.01, + "learning_rate": 8.70877192982456e-05, + "loss": 0.0076, + "step": 7572 + }, + { + "epoch": 113.03, + "learning_rate": 8.705263157894736e-05, + "loss": 0.0003, + "step": 7573 + }, + { + "epoch": 113.04, + "learning_rate": 8.701754385964911e-05, + "loss": 0.0057, + "step": 7574 + }, + { + "epoch": 113.06, + "learning_rate": 8.698245614035087e-05, + "loss": 0.0005, + "step": 7575 + }, + { + "epoch": 113.07, + "learning_rate": 8.694736842105262e-05, + "loss": 0.0037, + "step": 7576 + }, + { + "epoch": 113.09, + "learning_rate": 8.691228070175439e-05, + "loss": 0.2041, + "step": 7577 + }, + { + "epoch": 113.1, + "learning_rate": 8.687719298245613e-05, + "loss": 0.0026, + "step": 7578 + }, + { + "epoch": 113.12, + "learning_rate": 8.68421052631579e-05, + "loss": 0.0009, + "step": 7579 + }, + { + "epoch": 113.13, + "learning_rate": 8.680701754385963e-05, + "loss": 0.0004, + "step": 7580 + }, + { + "epoch": 113.15, + "learning_rate": 8.67719298245614e-05, + "loss": 0.0008, + "step": 7581 + }, + { + "epoch": 113.16, + "learning_rate": 8.673684210526314e-05, + "loss": 0.0005, + "step": 7582 + }, + { + "epoch": 113.18, + "learning_rate": 8.67017543859649e-05, + "loss": 0.0003, + "step": 7583 + }, + { + "epoch": 113.19, + "learning_rate": 8.666666666666665e-05, + "loss": 0.0112, + "step": 7584 + }, + { + "epoch": 113.21, + "learning_rate": 8.663157894736842e-05, + "loss": 0.0011, + "step": 7585 + }, + { + "epoch": 113.22, + "learning_rate": 8.659649122807018e-05, + "loss": 0.0004, + "step": 7586 + }, + { + "epoch": 113.24, + "learning_rate": 8.656140350877191e-05, + "loss": 0.0005, + "step": 7587 + }, + { + "epoch": 113.25, + "learning_rate": 8.652631578947368e-05, + "loss": 0.0005, + "step": 7588 + }, + { + "epoch": 113.27, + "learning_rate": 8.649122807017543e-05, + "loss": 0.0046, + "step": 7589 + }, + { + "epoch": 113.28, + "learning_rate": 8.645614035087719e-05, + "loss": 0.0006, + "step": 7590 + }, + { + "epoch": 113.3, + "learning_rate": 8.642105263157894e-05, + "loss": 0.0848, + "step": 7591 + }, + { + "epoch": 113.31, + "learning_rate": 8.63859649122807e-05, + "loss": 0.0004, + "step": 7592 + }, + { + "epoch": 113.33, + "learning_rate": 8.635087719298245e-05, + "loss": 0.0401, + "step": 7593 + }, + { + "epoch": 113.34, + "learning_rate": 8.631578947368421e-05, + "loss": 0.2326, + "step": 7594 + }, + { + "epoch": 113.36, + "learning_rate": 8.628070175438595e-05, + "loss": 0.0007, + "step": 7595 + }, + { + "epoch": 113.37, + "learning_rate": 8.624561403508771e-05, + "loss": 0.0007, + "step": 7596 + }, + { + "epoch": 113.39, + "learning_rate": 8.621052631578947e-05, + "loss": 0.0086, + "step": 7597 + }, + { + "epoch": 113.4, + "learning_rate": 8.617543859649122e-05, + "loss": 0.0022, + "step": 7598 + }, + { + "epoch": 113.42, + "learning_rate": 8.614035087719298e-05, + "loss": 0.0006, + "step": 7599 + }, + { + "epoch": 113.43, + "learning_rate": 8.610526315789473e-05, + "loss": 0.0023, + "step": 7600 + }, + { + "epoch": 113.43, + "eval_accuracy": 0.8705335291238375, + "eval_f1": 0.8699635174956661, + "eval_loss": 0.7108378410339355, + "eval_runtime": 344.4393, + "eval_samples_per_second": 11.863, + "eval_steps_per_second": 0.743, + "step": 7600 + }, + { + "epoch": 113.45, + "learning_rate": 8.607017543859649e-05, + "loss": 0.0003, + "step": 7601 + }, + { + "epoch": 113.46, + "learning_rate": 8.603508771929824e-05, + "loss": 0.1926, + "step": 7602 + }, + { + "epoch": 113.48, + "learning_rate": 8.6e-05, + "loss": 0.0004, + "step": 7603 + }, + { + "epoch": 113.49, + "learning_rate": 8.596491228070174e-05, + "loss": 0.0003, + "step": 7604 + }, + { + "epoch": 113.51, + "learning_rate": 8.59298245614035e-05, + "loss": 0.0006, + "step": 7605 + }, + { + "epoch": 113.52, + "learning_rate": 8.589473684210525e-05, + "loss": 0.0004, + "step": 7606 + }, + { + "epoch": 113.54, + "learning_rate": 8.585964912280701e-05, + "loss": 0.0003, + "step": 7607 + }, + { + "epoch": 113.55, + "learning_rate": 8.582456140350876e-05, + "loss": 0.0006, + "step": 7608 + }, + { + "epoch": 113.57, + "learning_rate": 8.578947368421052e-05, + "loss": 0.0024, + "step": 7609 + }, + { + "epoch": 113.58, + "learning_rate": 8.575438596491229e-05, + "loss": 0.0123, + "step": 7610 + }, + { + "epoch": 113.59, + "learning_rate": 8.571929824561402e-05, + "loss": 0.0013, + "step": 7611 + }, + { + "epoch": 113.61, + "learning_rate": 8.568421052631578e-05, + "loss": 0.0004, + "step": 7612 + }, + { + "epoch": 113.62, + "learning_rate": 8.564912280701753e-05, + "loss": 0.1429, + "step": 7613 + }, + { + "epoch": 113.64, + "learning_rate": 8.56140350877193e-05, + "loss": 0.0117, + "step": 7614 + }, + { + "epoch": 113.65, + "learning_rate": 8.557894736842104e-05, + "loss": 0.0003, + "step": 7615 + }, + { + "epoch": 113.67, + "learning_rate": 8.55438596491228e-05, + "loss": 0.0345, + "step": 7616 + }, + { + "epoch": 113.68, + "learning_rate": 8.550877192982455e-05, + "loss": 0.0004, + "step": 7617 + }, + { + "epoch": 113.7, + "learning_rate": 8.547368421052632e-05, + "loss": 0.0004, + "step": 7618 + }, + { + "epoch": 113.71, + "learning_rate": 8.543859649122805e-05, + "loss": 0.0004, + "step": 7619 + }, + { + "epoch": 113.73, + "learning_rate": 8.540350877192981e-05, + "loss": 0.0003, + "step": 7620 + }, + { + "epoch": 113.74, + "learning_rate": 8.536842105263156e-05, + "loss": 0.0003, + "step": 7621 + }, + { + "epoch": 113.76, + "learning_rate": 8.533333333333332e-05, + "loss": 0.0004, + "step": 7622 + }, + { + "epoch": 113.77, + "learning_rate": 8.529824561403509e-05, + "loss": 0.0003, + "step": 7623 + }, + { + "epoch": 113.79, + "learning_rate": 8.526315789473684e-05, + "loss": 0.0011, + "step": 7624 + }, + { + "epoch": 113.8, + "learning_rate": 8.52280701754386e-05, + "loss": 0.0003, + "step": 7625 + }, + { + "epoch": 113.82, + "learning_rate": 8.519298245614035e-05, + "loss": 0.0003, + "step": 7626 + }, + { + "epoch": 113.83, + "learning_rate": 8.515789473684211e-05, + "loss": 0.0004, + "step": 7627 + }, + { + "epoch": 113.85, + "learning_rate": 8.512280701754384e-05, + "loss": 0.0003, + "step": 7628 + }, + { + "epoch": 113.86, + "learning_rate": 8.508771929824561e-05, + "loss": 0.0007, + "step": 7629 + }, + { + "epoch": 113.88, + "learning_rate": 8.505263157894736e-05, + "loss": 0.0004, + "step": 7630 + }, + { + "epoch": 113.89, + "learning_rate": 8.501754385964912e-05, + "loss": 0.0186, + "step": 7631 + }, + { + "epoch": 113.91, + "learning_rate": 8.498245614035087e-05, + "loss": 0.0004, + "step": 7632 + }, + { + "epoch": 113.92, + "learning_rate": 8.494736842105263e-05, + "loss": 0.0003, + "step": 7633 + }, + { + "epoch": 113.94, + "learning_rate": 8.491228070175436e-05, + "loss": 0.0003, + "step": 7634 + }, + { + "epoch": 113.95, + "learning_rate": 8.487719298245613e-05, + "loss": 0.0003, + "step": 7635 + }, + { + "epoch": 113.97, + "learning_rate": 8.484210526315789e-05, + "loss": 0.0005, + "step": 7636 + }, + { + "epoch": 113.98, + "learning_rate": 8.480701754385964e-05, + "loss": 0.0005, + "step": 7637 + }, + { + "epoch": 114.0, + "learning_rate": 8.47719298245614e-05, + "loss": 0.0003, + "step": 7638 + }, + { + "epoch": 114.01, + "learning_rate": 8.473684210526315e-05, + "loss": 0.0042, + "step": 7639 + }, + { + "epoch": 114.03, + "learning_rate": 8.470175438596491e-05, + "loss": 0.0004, + "step": 7640 + }, + { + "epoch": 114.04, + "learning_rate": 8.466666666666666e-05, + "loss": 0.0004, + "step": 7641 + }, + { + "epoch": 114.06, + "learning_rate": 8.463157894736842e-05, + "loss": 0.0256, + "step": 7642 + }, + { + "epoch": 114.07, + "learning_rate": 8.459649122807016e-05, + "loss": 0.0015, + "step": 7643 + }, + { + "epoch": 114.09, + "learning_rate": 8.456140350877192e-05, + "loss": 0.0005, + "step": 7644 + }, + { + "epoch": 114.1, + "learning_rate": 8.452631578947367e-05, + "loss": 0.0006, + "step": 7645 + }, + { + "epoch": 114.12, + "learning_rate": 8.449122807017543e-05, + "loss": 0.0003, + "step": 7646 + }, + { + "epoch": 114.13, + "learning_rate": 8.44561403508772e-05, + "loss": 0.0003, + "step": 7647 + }, + { + "epoch": 114.15, + "learning_rate": 8.442105263157894e-05, + "loss": 0.0051, + "step": 7648 + }, + { + "epoch": 114.16, + "learning_rate": 8.43859649122807e-05, + "loss": 0.0266, + "step": 7649 + }, + { + "epoch": 114.18, + "learning_rate": 8.435087719298245e-05, + "loss": 0.0007, + "step": 7650 + }, + { + "epoch": 114.19, + "learning_rate": 8.43157894736842e-05, + "loss": 0.097, + "step": 7651 + }, + { + "epoch": 114.21, + "learning_rate": 8.428070175438595e-05, + "loss": 0.0003, + "step": 7652 + }, + { + "epoch": 114.22, + "learning_rate": 8.424561403508771e-05, + "loss": 0.0006, + "step": 7653 + }, + { + "epoch": 114.24, + "learning_rate": 8.421052631578946e-05, + "loss": 0.2054, + "step": 7654 + }, + { + "epoch": 114.25, + "learning_rate": 8.417543859649122e-05, + "loss": 0.0003, + "step": 7655 + }, + { + "epoch": 114.27, + "learning_rate": 8.414035087719297e-05, + "loss": 0.0113, + "step": 7656 + }, + { + "epoch": 114.28, + "learning_rate": 8.410526315789474e-05, + "loss": 0.0003, + "step": 7657 + }, + { + "epoch": 114.3, + "learning_rate": 8.407017543859647e-05, + "loss": 0.1907, + "step": 7658 + }, + { + "epoch": 114.31, + "learning_rate": 8.403508771929823e-05, + "loss": 0.0003, + "step": 7659 + }, + { + "epoch": 114.33, + "learning_rate": 8.4e-05, + "loss": 0.0004, + "step": 7660 + }, + { + "epoch": 114.34, + "learning_rate": 8.396491228070174e-05, + "loss": 0.0004, + "step": 7661 + }, + { + "epoch": 114.36, + "learning_rate": 8.39298245614035e-05, + "loss": 0.0003, + "step": 7662 + }, + { + "epoch": 114.37, + "learning_rate": 8.389473684210526e-05, + "loss": 0.0192, + "step": 7663 + }, + { + "epoch": 114.39, + "learning_rate": 8.385964912280702e-05, + "loss": 0.0004, + "step": 7664 + }, + { + "epoch": 114.4, + "learning_rate": 8.382456140350877e-05, + "loss": 0.0003, + "step": 7665 + }, + { + "epoch": 114.42, + "learning_rate": 8.378947368421053e-05, + "loss": 0.0018, + "step": 7666 + }, + { + "epoch": 114.43, + "learning_rate": 8.375438596491226e-05, + "loss": 0.0003, + "step": 7667 + }, + { + "epoch": 114.45, + "learning_rate": 8.371929824561403e-05, + "loss": 0.0003, + "step": 7668 + }, + { + "epoch": 114.46, + "learning_rate": 8.368421052631578e-05, + "loss": 0.0003, + "step": 7669 + }, + { + "epoch": 114.48, + "learning_rate": 8.364912280701754e-05, + "loss": 0.0003, + "step": 7670 + }, + { + "epoch": 114.49, + "learning_rate": 8.361403508771929e-05, + "loss": 0.0004, + "step": 7671 + }, + { + "epoch": 114.51, + "learning_rate": 8.357894736842105e-05, + "loss": 0.0003, + "step": 7672 + }, + { + "epoch": 114.52, + "learning_rate": 8.354385964912281e-05, + "loss": 0.0003, + "step": 7673 + }, + { + "epoch": 114.54, + "learning_rate": 8.350877192982456e-05, + "loss": 0.0003, + "step": 7674 + }, + { + "epoch": 114.55, + "learning_rate": 8.347368421052631e-05, + "loss": 0.0629, + "step": 7675 + }, + { + "epoch": 114.57, + "learning_rate": 8.343859649122806e-05, + "loss": 0.0005, + "step": 7676 + }, + { + "epoch": 114.58, + "learning_rate": 8.340350877192982e-05, + "loss": 0.1229, + "step": 7677 + }, + { + "epoch": 114.59, + "learning_rate": 8.336842105263157e-05, + "loss": 0.0004, + "step": 7678 + }, + { + "epoch": 114.61, + "learning_rate": 8.333333333333333e-05, + "loss": 0.0455, + "step": 7679 + }, + { + "epoch": 114.62, + "learning_rate": 8.329824561403508e-05, + "loss": 0.2398, + "step": 7680 + }, + { + "epoch": 114.64, + "learning_rate": 8.326315789473684e-05, + "loss": 0.0346, + "step": 7681 + }, + { + "epoch": 114.65, + "learning_rate": 8.322807017543858e-05, + "loss": 0.0005, + "step": 7682 + }, + { + "epoch": 114.67, + "learning_rate": 8.319298245614034e-05, + "loss": 0.0003, + "step": 7683 + }, + { + "epoch": 114.68, + "learning_rate": 8.315789473684209e-05, + "loss": 0.0004, + "step": 7684 + }, + { + "epoch": 114.7, + "learning_rate": 8.312280701754385e-05, + "loss": 0.0018, + "step": 7685 + }, + { + "epoch": 114.71, + "learning_rate": 8.308771929824561e-05, + "loss": 0.0004, + "step": 7686 + }, + { + "epoch": 114.73, + "learning_rate": 8.305263157894736e-05, + "loss": 0.0023, + "step": 7687 + }, + { + "epoch": 114.74, + "learning_rate": 8.301754385964912e-05, + "loss": 0.0005, + "step": 7688 + }, + { + "epoch": 114.76, + "learning_rate": 8.298245614035087e-05, + "loss": 0.0005, + "step": 7689 + }, + { + "epoch": 114.77, + "learning_rate": 8.294736842105263e-05, + "loss": 0.0004, + "step": 7690 + }, + { + "epoch": 114.79, + "learning_rate": 8.291228070175437e-05, + "loss": 0.0004, + "step": 7691 + }, + { + "epoch": 114.8, + "learning_rate": 8.287719298245613e-05, + "loss": 0.0004, + "step": 7692 + }, + { + "epoch": 114.82, + "learning_rate": 8.284210526315788e-05, + "loss": 0.0004, + "step": 7693 + }, + { + "epoch": 114.83, + "learning_rate": 8.280701754385964e-05, + "loss": 0.0004, + "step": 7694 + }, + { + "epoch": 114.85, + "learning_rate": 8.277192982456139e-05, + "loss": 0.0003, + "step": 7695 + }, + { + "epoch": 114.86, + "learning_rate": 8.273684210526315e-05, + "loss": 0.1994, + "step": 7696 + }, + { + "epoch": 114.88, + "learning_rate": 8.27017543859649e-05, + "loss": 0.0004, + "step": 7697 + }, + { + "epoch": 114.89, + "learning_rate": 8.266666666666665e-05, + "loss": 0.0005, + "step": 7698 + }, + { + "epoch": 114.91, + "learning_rate": 8.263157894736841e-05, + "loss": 0.0003, + "step": 7699 + }, + { + "epoch": 114.92, + "learning_rate": 8.259649122807016e-05, + "loss": 0.0004, + "step": 7700 + }, + { + "epoch": 114.94, + "learning_rate": 8.256140350877193e-05, + "loss": 0.0004, + "step": 7701 + }, + { + "epoch": 114.95, + "learning_rate": 8.252631578947367e-05, + "loss": 0.0044, + "step": 7702 + }, + { + "epoch": 114.97, + "learning_rate": 8.249122807017544e-05, + "loss": 0.003, + "step": 7703 + }, + { + "epoch": 114.98, + "learning_rate": 8.245614035087719e-05, + "loss": 0.0004, + "step": 7704 + }, + { + "epoch": 115.0, + "learning_rate": 8.242105263157895e-05, + "loss": 0.0003, + "step": 7705 + }, + { + "epoch": 115.01, + "learning_rate": 8.238596491228068e-05, + "loss": 0.0008, + "step": 7706 + }, + { + "epoch": 115.03, + "learning_rate": 8.235087719298245e-05, + "loss": 0.0015, + "step": 7707 + }, + { + "epoch": 115.04, + "learning_rate": 8.23157894736842e-05, + "loss": 0.0003, + "step": 7708 + }, + { + "epoch": 115.06, + "learning_rate": 8.228070175438596e-05, + "loss": 0.0005, + "step": 7709 + }, + { + "epoch": 115.07, + "learning_rate": 8.224561403508772e-05, + "loss": 0.0029, + "step": 7710 + }, + { + "epoch": 115.09, + "learning_rate": 8.221052631578947e-05, + "loss": 0.0004, + "step": 7711 + }, + { + "epoch": 115.1, + "learning_rate": 8.217543859649123e-05, + "loss": 0.0007, + "step": 7712 + }, + { + "epoch": 115.12, + "learning_rate": 8.214035087719298e-05, + "loss": 0.0005, + "step": 7713 + }, + { + "epoch": 115.13, + "learning_rate": 8.210526315789474e-05, + "loss": 0.0004, + "step": 7714 + }, + { + "epoch": 115.15, + "learning_rate": 8.207017543859648e-05, + "loss": 0.0004, + "step": 7715 + }, + { + "epoch": 115.16, + "learning_rate": 8.203508771929824e-05, + "loss": 0.0008, + "step": 7716 + }, + { + "epoch": 115.18, + "learning_rate": 8.199999999999999e-05, + "loss": 0.1419, + "step": 7717 + }, + { + "epoch": 115.19, + "learning_rate": 8.196491228070175e-05, + "loss": 0.079, + "step": 7718 + }, + { + "epoch": 115.21, + "learning_rate": 8.19298245614035e-05, + "loss": 0.0003, + "step": 7719 + }, + { + "epoch": 115.22, + "learning_rate": 8.189473684210526e-05, + "loss": 0.0013, + "step": 7720 + }, + { + "epoch": 115.24, + "learning_rate": 8.1859649122807e-05, + "loss": 0.0007, + "step": 7721 + }, + { + "epoch": 115.25, + "learning_rate": 8.182456140350876e-05, + "loss": 0.0007, + "step": 7722 + }, + { + "epoch": 115.27, + "learning_rate": 8.178947368421052e-05, + "loss": 0.0003, + "step": 7723 + }, + { + "epoch": 115.28, + "learning_rate": 8.175438596491227e-05, + "loss": 0.0003, + "step": 7724 + }, + { + "epoch": 115.3, + "learning_rate": 8.171929824561403e-05, + "loss": 0.0004, + "step": 7725 + }, + { + "epoch": 115.31, + "learning_rate": 8.168421052631578e-05, + "loss": 0.0004, + "step": 7726 + }, + { + "epoch": 115.33, + "learning_rate": 8.164912280701754e-05, + "loss": 0.0043, + "step": 7727 + }, + { + "epoch": 115.34, + "learning_rate": 8.161403508771929e-05, + "loss": 0.0025, + "step": 7728 + }, + { + "epoch": 115.36, + "learning_rate": 8.157894736842105e-05, + "loss": 0.0004, + "step": 7729 + }, + { + "epoch": 115.37, + "learning_rate": 8.154385964912279e-05, + "loss": 0.055, + "step": 7730 + }, + { + "epoch": 115.39, + "learning_rate": 8.150877192982455e-05, + "loss": 0.0006, + "step": 7731 + }, + { + "epoch": 115.4, + "learning_rate": 8.14736842105263e-05, + "loss": 0.0004, + "step": 7732 + }, + { + "epoch": 115.42, + "learning_rate": 8.143859649122806e-05, + "loss": 0.0003, + "step": 7733 + }, + { + "epoch": 115.43, + "learning_rate": 8.140350877192981e-05, + "loss": 0.0011, + "step": 7734 + }, + { + "epoch": 115.45, + "learning_rate": 8.136842105263157e-05, + "loss": 0.006, + "step": 7735 + }, + { + "epoch": 115.46, + "learning_rate": 8.133333333333334e-05, + "loss": 0.0004, + "step": 7736 + }, + { + "epoch": 115.48, + "learning_rate": 8.129824561403508e-05, + "loss": 0.0711, + "step": 7737 + }, + { + "epoch": 115.49, + "learning_rate": 8.126315789473685e-05, + "loss": 0.0136, + "step": 7738 + }, + { + "epoch": 115.51, + "learning_rate": 8.122807017543858e-05, + "loss": 0.0005, + "step": 7739 + }, + { + "epoch": 115.52, + "learning_rate": 8.119298245614034e-05, + "loss": 0.0004, + "step": 7740 + }, + { + "epoch": 115.54, + "learning_rate": 8.11578947368421e-05, + "loss": 0.0004, + "step": 7741 + }, + { + "epoch": 115.55, + "learning_rate": 8.112280701754386e-05, + "loss": 0.0003, + "step": 7742 + }, + { + "epoch": 115.57, + "learning_rate": 8.10877192982456e-05, + "loss": 0.0004, + "step": 7743 + }, + { + "epoch": 115.58, + "learning_rate": 8.105263157894737e-05, + "loss": 0.0028, + "step": 7744 + }, + { + "epoch": 115.59, + "learning_rate": 8.10175438596491e-05, + "loss": 0.0022, + "step": 7745 + }, + { + "epoch": 115.61, + "learning_rate": 8.098245614035086e-05, + "loss": 0.0003, + "step": 7746 + }, + { + "epoch": 115.62, + "learning_rate": 8.094736842105261e-05, + "loss": 0.0004, + "step": 7747 + }, + { + "epoch": 115.64, + "learning_rate": 8.091228070175438e-05, + "loss": 0.0004, + "step": 7748 + }, + { + "epoch": 115.65, + "learning_rate": 8.087719298245614e-05, + "loss": 0.0003, + "step": 7749 + }, + { + "epoch": 115.67, + "learning_rate": 8.084210526315789e-05, + "loss": 0.0003, + "step": 7750 + }, + { + "epoch": 115.68, + "learning_rate": 8.080701754385965e-05, + "loss": 0.0003, + "step": 7751 + }, + { + "epoch": 115.7, + "learning_rate": 8.07719298245614e-05, + "loss": 0.0003, + "step": 7752 + }, + { + "epoch": 115.71, + "learning_rate": 8.073684210526316e-05, + "loss": 0.0003, + "step": 7753 + }, + { + "epoch": 115.73, + "learning_rate": 8.07017543859649e-05, + "loss": 0.0003, + "step": 7754 + }, + { + "epoch": 115.74, + "learning_rate": 8.066666666666666e-05, + "loss": 0.0003, + "step": 7755 + }, + { + "epoch": 115.76, + "learning_rate": 8.06315789473684e-05, + "loss": 0.0003, + "step": 7756 + }, + { + "epoch": 115.77, + "learning_rate": 8.059649122807017e-05, + "loss": 0.0017, + "step": 7757 + }, + { + "epoch": 115.79, + "learning_rate": 8.056140350877192e-05, + "loss": 0.001, + "step": 7758 + }, + { + "epoch": 115.8, + "learning_rate": 8.052631578947368e-05, + "loss": 0.0004, + "step": 7759 + }, + { + "epoch": 115.82, + "learning_rate": 8.049122807017544e-05, + "loss": 0.0004, + "step": 7760 + }, + { + "epoch": 115.83, + "learning_rate": 8.045614035087719e-05, + "loss": 0.0263, + "step": 7761 + }, + { + "epoch": 115.85, + "learning_rate": 8.042105263157895e-05, + "loss": 0.0004, + "step": 7762 + }, + { + "epoch": 115.86, + "learning_rate": 8.038596491228069e-05, + "loss": 0.0003, + "step": 7763 + }, + { + "epoch": 115.88, + "learning_rate": 8.035087719298245e-05, + "loss": 0.0004, + "step": 7764 + }, + { + "epoch": 115.89, + "learning_rate": 8.03157894736842e-05, + "loss": 0.0003, + "step": 7765 + }, + { + "epoch": 115.91, + "learning_rate": 8.028070175438596e-05, + "loss": 0.0015, + "step": 7766 + }, + { + "epoch": 115.92, + "learning_rate": 8.024561403508771e-05, + "loss": 0.0004, + "step": 7767 + }, + { + "epoch": 115.94, + "learning_rate": 8.021052631578947e-05, + "loss": 0.0003, + "step": 7768 + }, + { + "epoch": 115.95, + "learning_rate": 8.017543859649121e-05, + "loss": 0.0005, + "step": 7769 + }, + { + "epoch": 115.97, + "learning_rate": 8.014035087719297e-05, + "loss": 0.0004, + "step": 7770 + }, + { + "epoch": 115.98, + "learning_rate": 8.010526315789472e-05, + "loss": 0.0026, + "step": 7771 + }, + { + "epoch": 116.0, + "learning_rate": 8.007017543859648e-05, + "loss": 0.0012, + "step": 7772 + }, + { + "epoch": 116.01, + "learning_rate": 8.003508771929824e-05, + "loss": 0.0028, + "step": 7773 + }, + { + "epoch": 116.03, + "learning_rate": 7.999999999999999e-05, + "loss": 0.0005, + "step": 7774 + }, + { + "epoch": 116.04, + "learning_rate": 7.996491228070176e-05, + "loss": 0.0403, + "step": 7775 + }, + { + "epoch": 116.06, + "learning_rate": 7.99298245614035e-05, + "loss": 0.0003, + "step": 7776 + }, + { + "epoch": 116.07, + "learning_rate": 7.989473684210527e-05, + "loss": 0.0007, + "step": 7777 + }, + { + "epoch": 116.09, + "learning_rate": 7.9859649122807e-05, + "loss": 0.0003, + "step": 7778 + }, + { + "epoch": 116.1, + "learning_rate": 7.982456140350876e-05, + "loss": 0.0004, + "step": 7779 + }, + { + "epoch": 116.12, + "learning_rate": 7.978947368421051e-05, + "loss": 0.0011, + "step": 7780 + }, + { + "epoch": 116.13, + "learning_rate": 7.975438596491228e-05, + "loss": 0.0004, + "step": 7781 + }, + { + "epoch": 116.15, + "learning_rate": 7.971929824561402e-05, + "loss": 0.0003, + "step": 7782 + }, + { + "epoch": 116.16, + "learning_rate": 7.968421052631579e-05, + "loss": 0.0004, + "step": 7783 + }, + { + "epoch": 116.18, + "learning_rate": 7.964912280701753e-05, + "loss": 0.0037, + "step": 7784 + }, + { + "epoch": 116.19, + "learning_rate": 7.96140350877193e-05, + "loss": 0.0003, + "step": 7785 + }, + { + "epoch": 116.21, + "learning_rate": 7.957894736842106e-05, + "loss": 0.0003, + "step": 7786 + }, + { + "epoch": 116.22, + "learning_rate": 7.95438596491228e-05, + "loss": 0.0406, + "step": 7787 + }, + { + "epoch": 116.24, + "learning_rate": 7.950877192982456e-05, + "loss": 0.0003, + "step": 7788 + }, + { + "epoch": 116.25, + "learning_rate": 7.94736842105263e-05, + "loss": 0.0003, + "step": 7789 + }, + { + "epoch": 116.27, + "learning_rate": 7.943859649122807e-05, + "loss": 0.0103, + "step": 7790 + }, + { + "epoch": 116.28, + "learning_rate": 7.940350877192982e-05, + "loss": 0.0003, + "step": 7791 + }, + { + "epoch": 116.3, + "learning_rate": 7.936842105263158e-05, + "loss": 0.0003, + "step": 7792 + }, + { + "epoch": 116.31, + "learning_rate": 7.933333333333331e-05, + "loss": 0.0003, + "step": 7793 + }, + { + "epoch": 116.33, + "learning_rate": 7.929824561403508e-05, + "loss": 0.0004, + "step": 7794 + }, + { + "epoch": 116.34, + "learning_rate": 7.926315789473683e-05, + "loss": 0.0003, + "step": 7795 + }, + { + "epoch": 116.36, + "learning_rate": 7.922807017543859e-05, + "loss": 0.0003, + "step": 7796 + }, + { + "epoch": 116.37, + "learning_rate": 7.919298245614034e-05, + "loss": 0.0003, + "step": 7797 + }, + { + "epoch": 116.39, + "learning_rate": 7.91578947368421e-05, + "loss": 0.0027, + "step": 7798 + }, + { + "epoch": 116.4, + "learning_rate": 7.912280701754386e-05, + "loss": 0.0003, + "step": 7799 + }, + { + "epoch": 116.42, + "learning_rate": 7.908771929824561e-05, + "loss": 0.0025, + "step": 7800 + }, + { + "epoch": 116.42, + "eval_accuracy": 0.8820362212432697, + "eval_f1": 0.8817875307257204, + "eval_loss": 0.6464086174964905, + "eval_runtime": 346.6255, + "eval_samples_per_second": 11.788, + "eval_steps_per_second": 0.739, + "step": 7800 + }, + { + "epoch": 116.43, + "learning_rate": 7.905263157894737e-05, + "loss": 0.0002, + "step": 7801 + }, + { + "epoch": 116.45, + "learning_rate": 7.901754385964911e-05, + "loss": 0.0003, + "step": 7802 + }, + { + "epoch": 116.46, + "learning_rate": 7.898245614035087e-05, + "loss": 0.0003, + "step": 7803 + }, + { + "epoch": 116.48, + "learning_rate": 7.894736842105262e-05, + "loss": 0.0003, + "step": 7804 + }, + { + "epoch": 116.49, + "learning_rate": 7.891228070175438e-05, + "loss": 0.0005, + "step": 7805 + }, + { + "epoch": 116.51, + "learning_rate": 7.887719298245613e-05, + "loss": 0.0004, + "step": 7806 + }, + { + "epoch": 116.52, + "learning_rate": 7.884210526315789e-05, + "loss": 0.0003, + "step": 7807 + }, + { + "epoch": 116.54, + "learning_rate": 7.880701754385964e-05, + "loss": 0.0076, + "step": 7808 + }, + { + "epoch": 116.55, + "learning_rate": 7.87719298245614e-05, + "loss": 0.0132, + "step": 7809 + }, + { + "epoch": 116.57, + "learning_rate": 7.873684210526317e-05, + "loss": 0.0007, + "step": 7810 + }, + { + "epoch": 116.58, + "learning_rate": 7.87017543859649e-05, + "loss": 0.0003, + "step": 7811 + }, + { + "epoch": 116.59, + "learning_rate": 7.866666666666666e-05, + "loss": 0.0003, + "step": 7812 + }, + { + "epoch": 116.61, + "learning_rate": 7.863157894736841e-05, + "loss": 0.0003, + "step": 7813 + }, + { + "epoch": 116.62, + "learning_rate": 7.859649122807017e-05, + "loss": 0.0002, + "step": 7814 + }, + { + "epoch": 116.64, + "learning_rate": 7.856140350877192e-05, + "loss": 0.0003, + "step": 7815 + }, + { + "epoch": 116.65, + "learning_rate": 7.852631578947369e-05, + "loss": 0.0263, + "step": 7816 + }, + { + "epoch": 116.67, + "learning_rate": 7.849122807017542e-05, + "loss": 0.0082, + "step": 7817 + }, + { + "epoch": 116.68, + "learning_rate": 7.845614035087718e-05, + "loss": 0.0004, + "step": 7818 + }, + { + "epoch": 116.7, + "learning_rate": 7.842105263157893e-05, + "loss": 0.0004, + "step": 7819 + }, + { + "epoch": 116.71, + "learning_rate": 7.83859649122807e-05, + "loss": 0.0003, + "step": 7820 + }, + { + "epoch": 116.73, + "learning_rate": 7.835087719298244e-05, + "loss": 0.0003, + "step": 7821 + }, + { + "epoch": 116.74, + "learning_rate": 7.83157894736842e-05, + "loss": 0.0003, + "step": 7822 + }, + { + "epoch": 116.76, + "learning_rate": 7.828070175438597e-05, + "loss": 0.0003, + "step": 7823 + }, + { + "epoch": 116.77, + "learning_rate": 7.824561403508772e-05, + "loss": 0.0003, + "step": 7824 + }, + { + "epoch": 116.79, + "learning_rate": 7.821052631578948e-05, + "loss": 0.0002, + "step": 7825 + }, + { + "epoch": 116.8, + "learning_rate": 7.817543859649121e-05, + "loss": 0.0003, + "step": 7826 + }, + { + "epoch": 116.82, + "learning_rate": 7.814035087719298e-05, + "loss": 0.0002, + "step": 7827 + }, + { + "epoch": 116.83, + "learning_rate": 7.810526315789473e-05, + "loss": 0.0003, + "step": 7828 + }, + { + "epoch": 116.85, + "learning_rate": 7.807017543859649e-05, + "loss": 0.001, + "step": 7829 + }, + { + "epoch": 116.86, + "learning_rate": 7.803508771929824e-05, + "loss": 0.0007, + "step": 7830 + }, + { + "epoch": 116.88, + "learning_rate": 7.8e-05, + "loss": 0.0003, + "step": 7831 + }, + { + "epoch": 116.89, + "learning_rate": 7.796491228070175e-05, + "loss": 0.0004, + "step": 7832 + }, + { + "epoch": 116.91, + "learning_rate": 7.792982456140351e-05, + "loss": 0.0002, + "step": 7833 + }, + { + "epoch": 116.92, + "learning_rate": 7.789473684210524e-05, + "loss": 0.0145, + "step": 7834 + }, + { + "epoch": 116.94, + "learning_rate": 7.785964912280701e-05, + "loss": 0.0039, + "step": 7835 + }, + { + "epoch": 116.95, + "learning_rate": 7.782456140350877e-05, + "loss": 0.0012, + "step": 7836 + }, + { + "epoch": 116.97, + "learning_rate": 7.778947368421052e-05, + "loss": 0.0005, + "step": 7837 + }, + { + "epoch": 116.98, + "learning_rate": 7.775438596491228e-05, + "loss": 0.0003, + "step": 7838 + }, + { + "epoch": 117.0, + "learning_rate": 7.771929824561403e-05, + "loss": 0.0109, + "step": 7839 + }, + { + "epoch": 117.01, + "learning_rate": 7.768421052631579e-05, + "loss": 0.0004, + "step": 7840 + }, + { + "epoch": 117.03, + "learning_rate": 7.764912280701753e-05, + "loss": 0.0003, + "step": 7841 + }, + { + "epoch": 117.04, + "learning_rate": 7.761403508771929e-05, + "loss": 0.0003, + "step": 7842 + }, + { + "epoch": 117.06, + "learning_rate": 7.757894736842104e-05, + "loss": 0.2597, + "step": 7843 + }, + { + "epoch": 117.07, + "learning_rate": 7.75438596491228e-05, + "loss": 0.0002, + "step": 7844 + }, + { + "epoch": 117.09, + "learning_rate": 7.750877192982455e-05, + "loss": 0.0002, + "step": 7845 + }, + { + "epoch": 117.1, + "learning_rate": 7.747368421052631e-05, + "loss": 0.0003, + "step": 7846 + }, + { + "epoch": 117.12, + "learning_rate": 7.743859649122806e-05, + "loss": 0.0002, + "step": 7847 + }, + { + "epoch": 117.13, + "learning_rate": 7.740350877192982e-05, + "loss": 0.0002, + "step": 7848 + }, + { + "epoch": 117.15, + "learning_rate": 7.736842105263159e-05, + "loss": 0.0002, + "step": 7849 + }, + { + "epoch": 117.16, + "learning_rate": 7.733333333333332e-05, + "loss": 0.0172, + "step": 7850 + }, + { + "epoch": 117.18, + "learning_rate": 7.729824561403508e-05, + "loss": 0.0003, + "step": 7851 + }, + { + "epoch": 117.19, + "learning_rate": 7.726315789473683e-05, + "loss": 0.0009, + "step": 7852 + }, + { + "epoch": 117.21, + "learning_rate": 7.72280701754386e-05, + "loss": 0.0002, + "step": 7853 + }, + { + "epoch": 117.22, + "learning_rate": 7.719298245614034e-05, + "loss": 0.0002, + "step": 7854 + }, + { + "epoch": 117.24, + "learning_rate": 7.71578947368421e-05, + "loss": 0.0003, + "step": 7855 + }, + { + "epoch": 117.25, + "learning_rate": 7.712280701754385e-05, + "loss": 0.0021, + "step": 7856 + }, + { + "epoch": 117.27, + "learning_rate": 7.708771929824562e-05, + "loss": 0.0101, + "step": 7857 + }, + { + "epoch": 117.28, + "learning_rate": 7.705263157894735e-05, + "loss": 0.0003, + "step": 7858 + }, + { + "epoch": 117.3, + "learning_rate": 7.701754385964911e-05, + "loss": 0.1783, + "step": 7859 + }, + { + "epoch": 117.31, + "learning_rate": 7.698245614035086e-05, + "loss": 0.0002, + "step": 7860 + }, + { + "epoch": 117.33, + "learning_rate": 7.694736842105262e-05, + "loss": 0.1317, + "step": 7861 + }, + { + "epoch": 117.34, + "learning_rate": 7.691228070175439e-05, + "loss": 0.0003, + "step": 7862 + }, + { + "epoch": 117.36, + "learning_rate": 7.687719298245614e-05, + "loss": 0.0003, + "step": 7863 + }, + { + "epoch": 117.37, + "learning_rate": 7.68421052631579e-05, + "loss": 0.0002, + "step": 7864 + }, + { + "epoch": 117.39, + "learning_rate": 7.680701754385963e-05, + "loss": 0.0005, + "step": 7865 + }, + { + "epoch": 117.4, + "learning_rate": 7.67719298245614e-05, + "loss": 0.0003, + "step": 7866 + }, + { + "epoch": 117.42, + "learning_rate": 7.673684210526314e-05, + "loss": 0.0003, + "step": 7867 + }, + { + "epoch": 117.43, + "learning_rate": 7.67017543859649e-05, + "loss": 0.0005, + "step": 7868 + }, + { + "epoch": 117.45, + "learning_rate": 7.666666666666666e-05, + "loss": 0.0005, + "step": 7869 + }, + { + "epoch": 117.46, + "learning_rate": 7.663157894736842e-05, + "loss": 0.0002, + "step": 7870 + }, + { + "epoch": 117.48, + "learning_rate": 7.659649122807017e-05, + "loss": 0.0003, + "step": 7871 + }, + { + "epoch": 117.49, + "learning_rate": 7.656140350877193e-05, + "loss": 0.0002, + "step": 7872 + }, + { + "epoch": 117.51, + "learning_rate": 7.652631578947369e-05, + "loss": 0.0003, + "step": 7873 + }, + { + "epoch": 117.52, + "learning_rate": 7.649122807017543e-05, + "loss": 0.012, + "step": 7874 + }, + { + "epoch": 117.54, + "learning_rate": 7.645614035087719e-05, + "loss": 0.0005, + "step": 7875 + }, + { + "epoch": 117.55, + "learning_rate": 7.642105263157894e-05, + "loss": 0.0002, + "step": 7876 + }, + { + "epoch": 117.57, + "learning_rate": 7.63859649122807e-05, + "loss": 0.1101, + "step": 7877 + }, + { + "epoch": 117.58, + "learning_rate": 7.635087719298245e-05, + "loss": 0.048, + "step": 7878 + }, + { + "epoch": 117.59, + "learning_rate": 7.631578947368421e-05, + "loss": 0.0002, + "step": 7879 + }, + { + "epoch": 117.61, + "learning_rate": 7.628070175438596e-05, + "loss": 0.0004, + "step": 7880 + }, + { + "epoch": 117.62, + "learning_rate": 7.624561403508772e-05, + "loss": 0.0004, + "step": 7881 + }, + { + "epoch": 117.64, + "learning_rate": 7.621052631578946e-05, + "loss": 0.0018, + "step": 7882 + }, + { + "epoch": 117.65, + "learning_rate": 7.617543859649122e-05, + "loss": 0.0002, + "step": 7883 + }, + { + "epoch": 117.67, + "learning_rate": 7.614035087719297e-05, + "loss": 0.001, + "step": 7884 + }, + { + "epoch": 117.68, + "learning_rate": 7.610526315789473e-05, + "loss": 0.0003, + "step": 7885 + }, + { + "epoch": 117.7, + "learning_rate": 7.607017543859649e-05, + "loss": 0.0004, + "step": 7886 + }, + { + "epoch": 117.71, + "learning_rate": 7.603508771929824e-05, + "loss": 0.0002, + "step": 7887 + }, + { + "epoch": 117.73, + "learning_rate": 7.6e-05, + "loss": 0.0065, + "step": 7888 + }, + { + "epoch": 117.74, + "learning_rate": 7.596491228070174e-05, + "loss": 0.0003, + "step": 7889 + }, + { + "epoch": 117.76, + "learning_rate": 7.59298245614035e-05, + "loss": 0.0003, + "step": 7890 + }, + { + "epoch": 117.77, + "learning_rate": 7.589473684210525e-05, + "loss": 0.0003, + "step": 7891 + }, + { + "epoch": 117.79, + "learning_rate": 7.585964912280701e-05, + "loss": 0.0004, + "step": 7892 + }, + { + "epoch": 117.8, + "learning_rate": 7.582456140350876e-05, + "loss": 0.0922, + "step": 7893 + }, + { + "epoch": 117.82, + "learning_rate": 7.578947368421052e-05, + "loss": 0.0002, + "step": 7894 + }, + { + "epoch": 117.83, + "learning_rate": 7.575438596491227e-05, + "loss": 0.0011, + "step": 7895 + }, + { + "epoch": 117.85, + "learning_rate": 7.571929824561404e-05, + "loss": 0.0003, + "step": 7896 + }, + { + "epoch": 117.86, + "learning_rate": 7.568421052631577e-05, + "loss": 0.0011, + "step": 7897 + }, + { + "epoch": 117.88, + "learning_rate": 7.564912280701753e-05, + "loss": 0.0003, + "step": 7898 + }, + { + "epoch": 117.89, + "learning_rate": 7.56140350877193e-05, + "loss": 0.0002, + "step": 7899 + }, + { + "epoch": 117.91, + "learning_rate": 7.557894736842104e-05, + "loss": 0.1365, + "step": 7900 + }, + { + "epoch": 117.92, + "learning_rate": 7.55438596491228e-05, + "loss": 0.0007, + "step": 7901 + }, + { + "epoch": 117.94, + "learning_rate": 7.550877192982455e-05, + "loss": 0.0003, + "step": 7902 + }, + { + "epoch": 117.95, + "learning_rate": 7.547368421052632e-05, + "loss": 0.0003, + "step": 7903 + }, + { + "epoch": 117.97, + "learning_rate": 7.543859649122807e-05, + "loss": 0.0002, + "step": 7904 + }, + { + "epoch": 117.98, + "learning_rate": 7.540350877192981e-05, + "loss": 0.0002, + "step": 7905 + }, + { + "epoch": 118.0, + "learning_rate": 7.536842105263156e-05, + "loss": 0.0023, + "step": 7906 + }, + { + "epoch": 118.01, + "learning_rate": 7.533333333333333e-05, + "loss": 0.0019, + "step": 7907 + }, + { + "epoch": 118.03, + "learning_rate": 7.529824561403507e-05, + "loss": 0.0003, + "step": 7908 + }, + { + "epoch": 118.04, + "learning_rate": 7.526315789473684e-05, + "loss": 0.0003, + "step": 7909 + }, + { + "epoch": 118.06, + "learning_rate": 7.522807017543859e-05, + "loss": 0.0929, + "step": 7910 + }, + { + "epoch": 118.07, + "learning_rate": 7.519298245614035e-05, + "loss": 0.0003, + "step": 7911 + }, + { + "epoch": 118.09, + "learning_rate": 7.515789473684211e-05, + "loss": 0.0003, + "step": 7912 + }, + { + "epoch": 118.1, + "learning_rate": 7.512280701754385e-05, + "loss": 0.0003, + "step": 7913 + }, + { + "epoch": 118.12, + "learning_rate": 7.508771929824561e-05, + "loss": 0.0003, + "step": 7914 + }, + { + "epoch": 118.13, + "learning_rate": 7.505263157894736e-05, + "loss": 0.0005, + "step": 7915 + }, + { + "epoch": 118.15, + "learning_rate": 7.501754385964912e-05, + "loss": 0.0003, + "step": 7916 + }, + { + "epoch": 118.16, + "learning_rate": 7.498245614035087e-05, + "loss": 0.0002, + "step": 7917 + }, + { + "epoch": 118.18, + "learning_rate": 7.494736842105263e-05, + "loss": 0.0004, + "step": 7918 + }, + { + "epoch": 118.19, + "learning_rate": 7.491228070175438e-05, + "loss": 0.0237, + "step": 7919 + }, + { + "epoch": 118.21, + "learning_rate": 7.487719298245614e-05, + "loss": 0.0017, + "step": 7920 + }, + { + "epoch": 118.22, + "learning_rate": 7.484210526315789e-05, + "loss": 0.0004, + "step": 7921 + }, + { + "epoch": 118.24, + "learning_rate": 7.480701754385964e-05, + "loss": 0.0038, + "step": 7922 + }, + { + "epoch": 118.25, + "learning_rate": 7.47719298245614e-05, + "loss": 0.0003, + "step": 7923 + }, + { + "epoch": 118.27, + "learning_rate": 7.473684210526315e-05, + "loss": 0.2341, + "step": 7924 + }, + { + "epoch": 118.28, + "learning_rate": 7.47017543859649e-05, + "loss": 0.0003, + "step": 7925 + }, + { + "epoch": 118.3, + "learning_rate": 7.466666666666666e-05, + "loss": 0.0003, + "step": 7926 + }, + { + "epoch": 118.31, + "learning_rate": 7.463157894736841e-05, + "loss": 0.0008, + "step": 7927 + }, + { + "epoch": 118.33, + "learning_rate": 7.459649122807017e-05, + "loss": 0.0004, + "step": 7928 + }, + { + "epoch": 118.34, + "learning_rate": 7.456140350877192e-05, + "loss": 0.0003, + "step": 7929 + }, + { + "epoch": 118.36, + "learning_rate": 7.452631578947368e-05, + "loss": 0.0003, + "step": 7930 + }, + { + "epoch": 118.37, + "learning_rate": 7.449122807017543e-05, + "loss": 0.0003, + "step": 7931 + }, + { + "epoch": 118.39, + "learning_rate": 7.44561403508772e-05, + "loss": 0.0002, + "step": 7932 + }, + { + "epoch": 118.4, + "learning_rate": 7.442105263157894e-05, + "loss": 0.0003, + "step": 7933 + }, + { + "epoch": 118.42, + "learning_rate": 7.438596491228069e-05, + "loss": 0.0002, + "step": 7934 + }, + { + "epoch": 118.43, + "learning_rate": 7.435087719298245e-05, + "loss": 0.0367, + "step": 7935 + }, + { + "epoch": 118.45, + "learning_rate": 7.43157894736842e-05, + "loss": 0.0005, + "step": 7936 + }, + { + "epoch": 118.46, + "learning_rate": 7.428070175438595e-05, + "loss": 0.0003, + "step": 7937 + }, + { + "epoch": 118.48, + "learning_rate": 7.424561403508771e-05, + "loss": 0.0002, + "step": 7938 + }, + { + "epoch": 118.49, + "learning_rate": 7.421052631578946e-05, + "loss": 0.0002, + "step": 7939 + }, + { + "epoch": 118.51, + "learning_rate": 7.417543859649121e-05, + "loss": 0.0004, + "step": 7940 + }, + { + "epoch": 118.52, + "learning_rate": 7.414035087719297e-05, + "loss": 0.001, + "step": 7941 + }, + { + "epoch": 118.54, + "learning_rate": 7.410526315789474e-05, + "loss": 0.0004, + "step": 7942 + }, + { + "epoch": 118.55, + "learning_rate": 7.407017543859649e-05, + "loss": 0.0642, + "step": 7943 + }, + { + "epoch": 118.57, + "learning_rate": 7.403508771929825e-05, + "loss": 0.0002, + "step": 7944 + }, + { + "epoch": 118.58, + "learning_rate": 7.4e-05, + "loss": 0.0374, + "step": 7945 + }, + { + "epoch": 118.59, + "learning_rate": 7.396491228070175e-05, + "loss": 0.0003, + "step": 7946 + }, + { + "epoch": 118.61, + "learning_rate": 7.392982456140351e-05, + "loss": 0.0002, + "step": 7947 + }, + { + "epoch": 118.62, + "learning_rate": 7.389473684210526e-05, + "loss": 0.0003, + "step": 7948 + }, + { + "epoch": 118.64, + "learning_rate": 7.3859649122807e-05, + "loss": 0.0004, + "step": 7949 + }, + { + "epoch": 118.65, + "learning_rate": 7.382456140350877e-05, + "loss": 0.0002, + "step": 7950 + }, + { + "epoch": 118.67, + "learning_rate": 7.378947368421052e-05, + "loss": 0.2289, + "step": 7951 + }, + { + "epoch": 118.68, + "learning_rate": 7.375438596491226e-05, + "loss": 0.0002, + "step": 7952 + }, + { + "epoch": 118.7, + "learning_rate": 7.371929824561403e-05, + "loss": 0.0003, + "step": 7953 + }, + { + "epoch": 118.71, + "learning_rate": 7.368421052631578e-05, + "loss": 0.0003, + "step": 7954 + }, + { + "epoch": 118.73, + "learning_rate": 7.364912280701754e-05, + "loss": 0.0003, + "step": 7955 + }, + { + "epoch": 118.74, + "learning_rate": 7.36140350877193e-05, + "loss": 0.0003, + "step": 7956 + }, + { + "epoch": 118.76, + "learning_rate": 7.357894736842105e-05, + "loss": 0.0021, + "step": 7957 + }, + { + "epoch": 118.77, + "learning_rate": 7.35438596491228e-05, + "loss": 0.0127, + "step": 7958 + }, + { + "epoch": 118.79, + "learning_rate": 7.350877192982456e-05, + "loss": 0.0003, + "step": 7959 + }, + { + "epoch": 118.8, + "learning_rate": 7.347368421052631e-05, + "loss": 0.1613, + "step": 7960 + }, + { + "epoch": 118.82, + "learning_rate": 7.343859649122806e-05, + "loss": 0.0002, + "step": 7961 + }, + { + "epoch": 118.83, + "learning_rate": 7.340350877192982e-05, + "loss": 0.0003, + "step": 7962 + }, + { + "epoch": 118.85, + "learning_rate": 7.336842105263157e-05, + "loss": 0.0003, + "step": 7963 + }, + { + "epoch": 118.86, + "learning_rate": 7.333333333333332e-05, + "loss": 0.1024, + "step": 7964 + }, + { + "epoch": 118.88, + "learning_rate": 7.329824561403508e-05, + "loss": 0.0002, + "step": 7965 + }, + { + "epoch": 118.89, + "learning_rate": 7.326315789473683e-05, + "loss": 0.0003, + "step": 7966 + }, + { + "epoch": 118.91, + "learning_rate": 7.322807017543859e-05, + "loss": 0.0904, + "step": 7967 + }, + { + "epoch": 118.92, + "learning_rate": 7.319298245614035e-05, + "loss": 0.0003, + "step": 7968 + }, + { + "epoch": 118.94, + "learning_rate": 7.31578947368421e-05, + "loss": 0.0006, + "step": 7969 + }, + { + "epoch": 118.95, + "learning_rate": 7.312280701754385e-05, + "loss": 0.0004, + "step": 7970 + }, + { + "epoch": 118.97, + "learning_rate": 7.308771929824561e-05, + "loss": 0.0002, + "step": 7971 + }, + { + "epoch": 118.98, + "learning_rate": 7.305263157894736e-05, + "loss": 0.0002, + "step": 7972 + }, + { + "epoch": 119.0, + "learning_rate": 7.301754385964911e-05, + "loss": 0.057, + "step": 7973 + }, + { + "epoch": 119.01, + "learning_rate": 7.298245614035087e-05, + "loss": 0.0006, + "step": 7974 + }, + { + "epoch": 119.03, + "learning_rate": 7.294736842105262e-05, + "loss": 0.0041, + "step": 7975 + }, + { + "epoch": 119.04, + "learning_rate": 7.291228070175437e-05, + "loss": 0.0006, + "step": 7976 + }, + { + "epoch": 119.06, + "learning_rate": 7.287719298245613e-05, + "loss": 0.0002, + "step": 7977 + }, + { + "epoch": 119.07, + "learning_rate": 7.284210526315788e-05, + "loss": 0.2424, + "step": 7978 + }, + { + "epoch": 119.09, + "learning_rate": 7.280701754385964e-05, + "loss": 0.0006, + "step": 7979 + }, + { + "epoch": 119.1, + "learning_rate": 7.27719298245614e-05, + "loss": 0.1771, + "step": 7980 + }, + { + "epoch": 119.12, + "learning_rate": 7.273684210526316e-05, + "loss": 0.0004, + "step": 7981 + }, + { + "epoch": 119.13, + "learning_rate": 7.27017543859649e-05, + "loss": 0.0022, + "step": 7982 + }, + { + "epoch": 119.15, + "learning_rate": 7.266666666666667e-05, + "loss": 0.0002, + "step": 7983 + }, + { + "epoch": 119.16, + "learning_rate": 7.263157894736842e-05, + "loss": 0.0002, + "step": 7984 + }, + { + "epoch": 119.18, + "learning_rate": 7.259649122807016e-05, + "loss": 0.0003, + "step": 7985 + }, + { + "epoch": 119.19, + "learning_rate": 7.256140350877193e-05, + "loss": 0.0003, + "step": 7986 + }, + { + "epoch": 119.21, + "learning_rate": 7.252631578947368e-05, + "loss": 0.0191, + "step": 7987 + }, + { + "epoch": 119.22, + "learning_rate": 7.249122807017542e-05, + "loss": 0.0004, + "step": 7988 + }, + { + "epoch": 119.24, + "learning_rate": 7.245614035087719e-05, + "loss": 0.0003, + "step": 7989 + }, + { + "epoch": 119.25, + "learning_rate": 7.242105263157894e-05, + "loss": 0.0003, + "step": 7990 + }, + { + "epoch": 119.27, + "learning_rate": 7.23859649122807e-05, + "loss": 0.0003, + "step": 7991 + }, + { + "epoch": 119.28, + "learning_rate": 7.235087719298246e-05, + "loss": 0.0003, + "step": 7992 + }, + { + "epoch": 119.3, + "learning_rate": 7.231578947368421e-05, + "loss": 0.0003, + "step": 7993 + }, + { + "epoch": 119.31, + "learning_rate": 7.228070175438596e-05, + "loss": 0.0003, + "step": 7994 + }, + { + "epoch": 119.33, + "learning_rate": 7.224561403508772e-05, + "loss": 0.0002, + "step": 7995 + }, + { + "epoch": 119.34, + "learning_rate": 7.221052631578947e-05, + "loss": 0.0003, + "step": 7996 + }, + { + "epoch": 119.36, + "learning_rate": 7.217543859649122e-05, + "loss": 0.0003, + "step": 7997 + }, + { + "epoch": 119.37, + "learning_rate": 7.214035087719298e-05, + "loss": 0.0007, + "step": 7998 + }, + { + "epoch": 119.39, + "learning_rate": 7.210526315789473e-05, + "loss": 0.0004, + "step": 7999 + }, + { + "epoch": 119.4, + "learning_rate": 7.207017543859648e-05, + "loss": 0.0003, + "step": 8000 + }, + { + "epoch": 119.4, + "eval_accuracy": 0.8712677435144396, + "eval_f1": 0.8705504000781962, + "eval_loss": 0.698488175868988, + "eval_runtime": 343.8989, + "eval_samples_per_second": 11.881, + "eval_steps_per_second": 0.744, + "step": 8000 + }, + { + "epoch": 119.42, + "learning_rate": 7.203508771929824e-05, + "loss": 0.0003, + "step": 8001 + }, + { + "epoch": 119.43, + "learning_rate": 7.199999999999999e-05, + "loss": 0.0019, + "step": 8002 + }, + { + "epoch": 119.45, + "learning_rate": 7.196491228070175e-05, + "loss": 0.0002, + "step": 8003 + }, + { + "epoch": 119.46, + "learning_rate": 7.19298245614035e-05, + "loss": 0.0014, + "step": 8004 + }, + { + "epoch": 119.48, + "learning_rate": 7.189473684210526e-05, + "loss": 0.0002, + "step": 8005 + }, + { + "epoch": 119.49, + "learning_rate": 7.185964912280701e-05, + "loss": 0.0025, + "step": 8006 + }, + { + "epoch": 119.51, + "learning_rate": 7.182456140350877e-05, + "loss": 0.0003, + "step": 8007 + }, + { + "epoch": 119.52, + "learning_rate": 7.178947368421052e-05, + "loss": 0.1452, + "step": 8008 + }, + { + "epoch": 119.54, + "learning_rate": 7.175438596491227e-05, + "loss": 0.0008, + "step": 8009 + }, + { + "epoch": 119.55, + "learning_rate": 7.171929824561403e-05, + "loss": 0.0003, + "step": 8010 + }, + { + "epoch": 119.57, + "learning_rate": 7.168421052631578e-05, + "loss": 0.0041, + "step": 8011 + }, + { + "epoch": 119.58, + "learning_rate": 7.164912280701753e-05, + "loss": 0.0018, + "step": 8012 + }, + { + "epoch": 119.59, + "learning_rate": 7.161403508771929e-05, + "loss": 0.0003, + "step": 8013 + }, + { + "epoch": 119.61, + "learning_rate": 7.157894736842104e-05, + "loss": 0.0003, + "step": 8014 + }, + { + "epoch": 119.62, + "learning_rate": 7.15438596491228e-05, + "loss": 0.107, + "step": 8015 + }, + { + "epoch": 119.64, + "learning_rate": 7.150877192982455e-05, + "loss": 0.0005, + "step": 8016 + }, + { + "epoch": 119.65, + "learning_rate": 7.147368421052631e-05, + "loss": 0.0003, + "step": 8017 + }, + { + "epoch": 119.67, + "learning_rate": 7.143859649122806e-05, + "loss": 0.0003, + "step": 8018 + }, + { + "epoch": 119.68, + "learning_rate": 7.140350877192983e-05, + "loss": 0.0003, + "step": 8019 + }, + { + "epoch": 119.7, + "learning_rate": 7.136842105263157e-05, + "loss": 0.0003, + "step": 8020 + }, + { + "epoch": 119.71, + "learning_rate": 7.133333333333332e-05, + "loss": 0.0006, + "step": 8021 + }, + { + "epoch": 119.73, + "learning_rate": 7.129824561403509e-05, + "loss": 0.0002, + "step": 8022 + }, + { + "epoch": 119.74, + "learning_rate": 7.126315789473683e-05, + "loss": 0.0003, + "step": 8023 + }, + { + "epoch": 119.76, + "learning_rate": 7.122807017543858e-05, + "loss": 0.0003, + "step": 8024 + }, + { + "epoch": 119.77, + "learning_rate": 7.119298245614035e-05, + "loss": 0.0004, + "step": 8025 + }, + { + "epoch": 119.79, + "learning_rate": 7.11578947368421e-05, + "loss": 0.0003, + "step": 8026 + }, + { + "epoch": 119.8, + "learning_rate": 7.112280701754386e-05, + "loss": 0.0003, + "step": 8027 + }, + { + "epoch": 119.82, + "learning_rate": 7.10877192982456e-05, + "loss": 0.0029, + "step": 8028 + }, + { + "epoch": 119.83, + "learning_rate": 7.105263157894735e-05, + "loss": 0.0003, + "step": 8029 + }, + { + "epoch": 119.85, + "learning_rate": 7.101754385964912e-05, + "loss": 0.0003, + "step": 8030 + }, + { + "epoch": 119.86, + "learning_rate": 7.098245614035088e-05, + "loss": 0.0008, + "step": 8031 + }, + { + "epoch": 119.88, + "learning_rate": 7.094736842105263e-05, + "loss": 0.0009, + "step": 8032 + }, + { + "epoch": 119.89, + "learning_rate": 7.091228070175438e-05, + "loss": 0.0009, + "step": 8033 + }, + { + "epoch": 119.91, + "learning_rate": 7.087719298245614e-05, + "loss": 0.0003, + "step": 8034 + }, + { + "epoch": 119.92, + "learning_rate": 7.084210526315789e-05, + "loss": 0.0002, + "step": 8035 + }, + { + "epoch": 119.94, + "learning_rate": 7.080701754385964e-05, + "loss": 0.0003, + "step": 8036 + }, + { + "epoch": 119.95, + "learning_rate": 7.07719298245614e-05, + "loss": 0.0011, + "step": 8037 + }, + { + "epoch": 119.97, + "learning_rate": 7.073684210526315e-05, + "loss": 0.0002, + "step": 8038 + }, + { + "epoch": 119.98, + "learning_rate": 7.070175438596491e-05, + "loss": 0.0066, + "step": 8039 + }, + { + "epoch": 120.0, + "learning_rate": 7.066666666666666e-05, + "loss": 0.0071, + "step": 8040 + }, + { + "epoch": 120.01, + "learning_rate": 7.063157894736841e-05, + "loss": 0.0009, + "step": 8041 + }, + { + "epoch": 120.03, + "learning_rate": 7.059649122807017e-05, + "loss": 0.0003, + "step": 8042 + }, + { + "epoch": 120.04, + "learning_rate": 7.056140350877193e-05, + "loss": 0.0008, + "step": 8043 + }, + { + "epoch": 120.06, + "learning_rate": 7.052631578947368e-05, + "loss": 0.0004, + "step": 8044 + }, + { + "epoch": 120.07, + "learning_rate": 7.049122807017543e-05, + "loss": 0.0003, + "step": 8045 + }, + { + "epoch": 120.09, + "learning_rate": 7.045614035087719e-05, + "loss": 0.001, + "step": 8046 + }, + { + "epoch": 120.1, + "learning_rate": 7.042105263157894e-05, + "loss": 0.0188, + "step": 8047 + }, + { + "epoch": 120.12, + "learning_rate": 7.038596491228069e-05, + "loss": 0.0004, + "step": 8048 + }, + { + "epoch": 120.13, + "learning_rate": 7.035087719298245e-05, + "loss": 0.0003, + "step": 8049 + }, + { + "epoch": 120.15, + "learning_rate": 7.03157894736842e-05, + "loss": 0.0002, + "step": 8050 + }, + { + "epoch": 120.16, + "learning_rate": 7.028070175438596e-05, + "loss": 0.0002, + "step": 8051 + }, + { + "epoch": 120.18, + "learning_rate": 7.024561403508771e-05, + "loss": 0.0003, + "step": 8052 + }, + { + "epoch": 120.19, + "learning_rate": 7.021052631578946e-05, + "loss": 0.0003, + "step": 8053 + }, + { + "epoch": 120.21, + "learning_rate": 7.017543859649122e-05, + "loss": 0.0003, + "step": 8054 + }, + { + "epoch": 120.22, + "learning_rate": 7.014035087719299e-05, + "loss": 0.0003, + "step": 8055 + }, + { + "epoch": 120.24, + "learning_rate": 7.010526315789473e-05, + "loss": 0.2835, + "step": 8056 + }, + { + "epoch": 120.25, + "learning_rate": 7.007017543859648e-05, + "loss": 0.0002, + "step": 8057 + }, + { + "epoch": 120.27, + "learning_rate": 7.003508771929825e-05, + "loss": 0.0003, + "step": 8058 + }, + { + "epoch": 120.28, + "learning_rate": 7e-05, + "loss": 0.0004, + "step": 8059 + }, + { + "epoch": 120.3, + "learning_rate": 6.996491228070174e-05, + "loss": 0.0002, + "step": 8060 + }, + { + "epoch": 120.31, + "learning_rate": 6.99298245614035e-05, + "loss": 0.0003, + "step": 8061 + }, + { + "epoch": 120.33, + "learning_rate": 6.989473684210525e-05, + "loss": 0.0003, + "step": 8062 + }, + { + "epoch": 120.34, + "learning_rate": 6.985964912280702e-05, + "loss": 0.0009, + "step": 8063 + }, + { + "epoch": 120.36, + "learning_rate": 6.982456140350876e-05, + "loss": 0.0044, + "step": 8064 + }, + { + "epoch": 120.37, + "learning_rate": 6.978947368421051e-05, + "loss": 0.0004, + "step": 8065 + }, + { + "epoch": 120.39, + "learning_rate": 6.975438596491228e-05, + "loss": 0.0003, + "step": 8066 + }, + { + "epoch": 120.4, + "learning_rate": 6.971929824561404e-05, + "loss": 0.0003, + "step": 8067 + }, + { + "epoch": 120.42, + "learning_rate": 6.968421052631579e-05, + "loss": 0.0003, + "step": 8068 + }, + { + "epoch": 120.43, + "learning_rate": 6.964912280701754e-05, + "loss": 0.0015, + "step": 8069 + }, + { + "epoch": 120.45, + "learning_rate": 6.96140350877193e-05, + "loss": 0.0004, + "step": 8070 + }, + { + "epoch": 120.46, + "learning_rate": 6.957894736842105e-05, + "loss": 0.0004, + "step": 8071 + }, + { + "epoch": 120.48, + "learning_rate": 6.95438596491228e-05, + "loss": 0.0004, + "step": 8072 + }, + { + "epoch": 120.49, + "learning_rate": 6.950877192982456e-05, + "loss": 0.0011, + "step": 8073 + }, + { + "epoch": 120.51, + "learning_rate": 6.947368421052631e-05, + "loss": 0.0021, + "step": 8074 + }, + { + "epoch": 120.52, + "learning_rate": 6.943859649122807e-05, + "loss": 0.0526, + "step": 8075 + }, + { + "epoch": 120.54, + "learning_rate": 6.940350877192982e-05, + "loss": 0.006, + "step": 8076 + }, + { + "epoch": 120.55, + "learning_rate": 6.936842105263157e-05, + "loss": 0.0005, + "step": 8077 + }, + { + "epoch": 120.57, + "learning_rate": 6.933333333333333e-05, + "loss": 0.0004, + "step": 8078 + }, + { + "epoch": 120.58, + "learning_rate": 6.929824561403508e-05, + "loss": 0.0048, + "step": 8079 + }, + { + "epoch": 120.59, + "learning_rate": 6.926315789473684e-05, + "loss": 0.0005, + "step": 8080 + }, + { + "epoch": 120.61, + "learning_rate": 6.922807017543859e-05, + "loss": 0.0004, + "step": 8081 + }, + { + "epoch": 120.62, + "learning_rate": 6.919298245614035e-05, + "loss": 0.0125, + "step": 8082 + }, + { + "epoch": 120.64, + "learning_rate": 6.91578947368421e-05, + "loss": 0.0353, + "step": 8083 + }, + { + "epoch": 120.65, + "learning_rate": 6.912280701754385e-05, + "loss": 0.015, + "step": 8084 + }, + { + "epoch": 120.67, + "learning_rate": 6.908771929824561e-05, + "loss": 0.0004, + "step": 8085 + }, + { + "epoch": 120.68, + "learning_rate": 6.905263157894736e-05, + "loss": 0.0691, + "step": 8086 + }, + { + "epoch": 120.7, + "learning_rate": 6.901754385964912e-05, + "loss": 0.0005, + "step": 8087 + }, + { + "epoch": 120.71, + "learning_rate": 6.898245614035087e-05, + "loss": 0.0014, + "step": 8088 + }, + { + "epoch": 120.73, + "learning_rate": 6.894736842105262e-05, + "loss": 0.3338, + "step": 8089 + }, + { + "epoch": 120.74, + "learning_rate": 6.891228070175438e-05, + "loss": 0.0003, + "step": 8090 + }, + { + "epoch": 120.76, + "learning_rate": 6.887719298245613e-05, + "loss": 0.0003, + "step": 8091 + }, + { + "epoch": 120.77, + "learning_rate": 6.884210526315788e-05, + "loss": 0.0003, + "step": 8092 + }, + { + "epoch": 120.79, + "learning_rate": 6.880701754385964e-05, + "loss": 0.0298, + "step": 8093 + }, + { + "epoch": 120.8, + "learning_rate": 6.87719298245614e-05, + "loss": 0.001, + "step": 8094 + }, + { + "epoch": 120.82, + "learning_rate": 6.873684210526315e-05, + "loss": 0.0004, + "step": 8095 + }, + { + "epoch": 120.83, + "learning_rate": 6.87017543859649e-05, + "loss": 0.0004, + "step": 8096 + }, + { + "epoch": 120.85, + "learning_rate": 6.866666666666666e-05, + "loss": 0.0509, + "step": 8097 + }, + { + "epoch": 120.86, + "learning_rate": 6.863157894736841e-05, + "loss": 0.1401, + "step": 8098 + }, + { + "epoch": 120.88, + "learning_rate": 6.859649122807018e-05, + "loss": 0.0007, + "step": 8099 + }, + { + "epoch": 120.89, + "learning_rate": 6.856140350877192e-05, + "loss": 0.0005, + "step": 8100 + }, + { + "epoch": 120.91, + "learning_rate": 6.852631578947367e-05, + "loss": 0.0005, + "step": 8101 + }, + { + "epoch": 120.92, + "learning_rate": 6.849122807017544e-05, + "loss": 0.001, + "step": 8102 + }, + { + "epoch": 120.94, + "learning_rate": 6.845614035087718e-05, + "loss": 0.0003, + "step": 8103 + }, + { + "epoch": 120.95, + "learning_rate": 6.842105263157893e-05, + "loss": 0.174, + "step": 8104 + }, + { + "epoch": 120.97, + "learning_rate": 6.83859649122807e-05, + "loss": 0.001, + "step": 8105 + }, + { + "epoch": 120.98, + "learning_rate": 6.835087719298246e-05, + "loss": 0.0006, + "step": 8106 + }, + { + "epoch": 121.0, + "learning_rate": 6.83157894736842e-05, + "loss": 0.0003, + "step": 8107 + }, + { + "epoch": 121.01, + "learning_rate": 6.828070175438596e-05, + "loss": 0.0004, + "step": 8108 + }, + { + "epoch": 121.03, + "learning_rate": 6.824561403508772e-05, + "loss": 0.0005, + "step": 8109 + }, + { + "epoch": 121.04, + "learning_rate": 6.821052631578947e-05, + "loss": 0.0006, + "step": 8110 + }, + { + "epoch": 121.06, + "learning_rate": 6.817543859649123e-05, + "loss": 0.0008, + "step": 8111 + }, + { + "epoch": 121.07, + "learning_rate": 6.814035087719298e-05, + "loss": 0.0035, + "step": 8112 + }, + { + "epoch": 121.09, + "learning_rate": 6.810526315789473e-05, + "loss": 0.0006, + "step": 8113 + }, + { + "epoch": 121.1, + "learning_rate": 6.807017543859649e-05, + "loss": 0.0008, + "step": 8114 + }, + { + "epoch": 121.12, + "learning_rate": 6.803508771929824e-05, + "loss": 0.001, + "step": 8115 + }, + { + "epoch": 121.13, + "learning_rate": 6.799999999999999e-05, + "loss": 0.0007, + "step": 8116 + }, + { + "epoch": 121.15, + "learning_rate": 6.796491228070175e-05, + "loss": 0.0006, + "step": 8117 + }, + { + "epoch": 121.16, + "learning_rate": 6.792982456140351e-05, + "loss": 0.0007, + "step": 8118 + }, + { + "epoch": 121.18, + "learning_rate": 6.789473684210526e-05, + "loss": 0.0272, + "step": 8119 + }, + { + "epoch": 121.19, + "learning_rate": 6.785964912280701e-05, + "loss": 0.0264, + "step": 8120 + }, + { + "epoch": 121.21, + "learning_rate": 6.782456140350877e-05, + "loss": 0.0007, + "step": 8121 + }, + { + "epoch": 121.22, + "learning_rate": 6.778947368421052e-05, + "loss": 0.0007, + "step": 8122 + }, + { + "epoch": 121.24, + "learning_rate": 6.775438596491228e-05, + "loss": 0.0009, + "step": 8123 + }, + { + "epoch": 121.25, + "learning_rate": 6.771929824561403e-05, + "loss": 0.0006, + "step": 8124 + }, + { + "epoch": 121.27, + "learning_rate": 6.768421052631578e-05, + "loss": 0.0005, + "step": 8125 + }, + { + "epoch": 121.28, + "learning_rate": 6.764912280701754e-05, + "loss": 0.0193, + "step": 8126 + }, + { + "epoch": 121.3, + "learning_rate": 6.761403508771929e-05, + "loss": 0.0092, + "step": 8127 + }, + { + "epoch": 121.31, + "learning_rate": 6.757894736842104e-05, + "loss": 0.0099, + "step": 8128 + }, + { + "epoch": 121.33, + "learning_rate": 6.75438596491228e-05, + "loss": 0.0004, + "step": 8129 + }, + { + "epoch": 121.34, + "learning_rate": 6.750877192982456e-05, + "loss": 0.0005, + "step": 8130 + }, + { + "epoch": 121.36, + "learning_rate": 6.747368421052631e-05, + "loss": 0.0005, + "step": 8131 + }, + { + "epoch": 121.37, + "learning_rate": 6.743859649122806e-05, + "loss": 0.0004, + "step": 8132 + }, + { + "epoch": 121.39, + "learning_rate": 6.740350877192982e-05, + "loss": 0.001, + "step": 8133 + }, + { + "epoch": 121.4, + "learning_rate": 6.736842105263157e-05, + "loss": 0.0005, + "step": 8134 + }, + { + "epoch": 121.42, + "learning_rate": 6.733333333333333e-05, + "loss": 0.0004, + "step": 8135 + }, + { + "epoch": 121.43, + "learning_rate": 6.729824561403508e-05, + "loss": 0.0005, + "step": 8136 + }, + { + "epoch": 121.45, + "learning_rate": 6.726315789473683e-05, + "loss": 0.4882, + "step": 8137 + }, + { + "epoch": 121.46, + "learning_rate": 6.72280701754386e-05, + "loss": 0.0033, + "step": 8138 + }, + { + "epoch": 121.48, + "learning_rate": 6.719298245614034e-05, + "loss": 0.0003, + "step": 8139 + }, + { + "epoch": 121.49, + "learning_rate": 6.715789473684209e-05, + "loss": 0.0005, + "step": 8140 + }, + { + "epoch": 121.51, + "learning_rate": 6.712280701754385e-05, + "loss": 0.0008, + "step": 8141 + }, + { + "epoch": 121.52, + "learning_rate": 6.70877192982456e-05, + "loss": 0.0311, + "step": 8142 + }, + { + "epoch": 121.54, + "learning_rate": 6.705263157894737e-05, + "loss": 0.0003, + "step": 8143 + }, + { + "epoch": 121.55, + "learning_rate": 6.701754385964911e-05, + "loss": 0.0004, + "step": 8144 + }, + { + "epoch": 121.57, + "learning_rate": 6.698245614035088e-05, + "loss": 0.0012, + "step": 8145 + }, + { + "epoch": 121.58, + "learning_rate": 6.694736842105263e-05, + "loss": 0.0004, + "step": 8146 + }, + { + "epoch": 121.59, + "learning_rate": 6.691228070175439e-05, + "loss": 0.0003, + "step": 8147 + }, + { + "epoch": 121.61, + "learning_rate": 6.687719298245614e-05, + "loss": 0.0031, + "step": 8148 + }, + { + "epoch": 121.62, + "learning_rate": 6.684210526315789e-05, + "loss": 0.0031, + "step": 8149 + }, + { + "epoch": 121.64, + "learning_rate": 6.680701754385965e-05, + "loss": 0.0003, + "step": 8150 + }, + { + "epoch": 121.65, + "learning_rate": 6.67719298245614e-05, + "loss": 0.0003, + "step": 8151 + }, + { + "epoch": 121.67, + "learning_rate": 6.673684210526315e-05, + "loss": 0.0006, + "step": 8152 + }, + { + "epoch": 121.68, + "learning_rate": 6.670175438596491e-05, + "loss": 0.0005, + "step": 8153 + }, + { + "epoch": 121.7, + "learning_rate": 6.666666666666666e-05, + "loss": 0.0005, + "step": 8154 + }, + { + "epoch": 121.71, + "learning_rate": 6.663157894736842e-05, + "loss": 0.0006, + "step": 8155 + }, + { + "epoch": 121.73, + "learning_rate": 6.659649122807017e-05, + "loss": 0.0005, + "step": 8156 + }, + { + "epoch": 121.74, + "learning_rate": 6.656140350877193e-05, + "loss": 0.0004, + "step": 8157 + }, + { + "epoch": 121.76, + "learning_rate": 6.652631578947368e-05, + "loss": 0.0384, + "step": 8158 + }, + { + "epoch": 121.77, + "learning_rate": 6.649122807017543e-05, + "loss": 0.0011, + "step": 8159 + }, + { + "epoch": 121.79, + "learning_rate": 6.645614035087719e-05, + "loss": 0.0004, + "step": 8160 + }, + { + "epoch": 121.8, + "learning_rate": 6.642105263157894e-05, + "loss": 0.0032, + "step": 8161 + }, + { + "epoch": 121.82, + "learning_rate": 6.63859649122807e-05, + "loss": 0.0018, + "step": 8162 + }, + { + "epoch": 121.83, + "learning_rate": 6.635087719298245e-05, + "loss": 0.0003, + "step": 8163 + }, + { + "epoch": 121.85, + "learning_rate": 6.63157894736842e-05, + "loss": 0.0392, + "step": 8164 + }, + { + "epoch": 121.86, + "learning_rate": 6.628070175438596e-05, + "loss": 0.0005, + "step": 8165 + }, + { + "epoch": 121.88, + "learning_rate": 6.624561403508771e-05, + "loss": 0.0014, + "step": 8166 + }, + { + "epoch": 121.89, + "learning_rate": 6.621052631578946e-05, + "loss": 0.0004, + "step": 8167 + }, + { + "epoch": 121.91, + "learning_rate": 6.617543859649122e-05, + "loss": 0.0008, + "step": 8168 + }, + { + "epoch": 121.92, + "learning_rate": 6.614035087719298e-05, + "loss": 0.0003, + "step": 8169 + }, + { + "epoch": 121.94, + "learning_rate": 6.610526315789473e-05, + "loss": 0.001, + "step": 8170 + }, + { + "epoch": 121.95, + "learning_rate": 6.607017543859648e-05, + "loss": 0.0003, + "step": 8171 + }, + { + "epoch": 121.97, + "learning_rate": 6.603508771929824e-05, + "loss": 0.0003, + "step": 8172 + }, + { + "epoch": 121.98, + "learning_rate": 6.599999999999999e-05, + "loss": 0.0005, + "step": 8173 + }, + { + "epoch": 122.0, + "learning_rate": 6.596491228070175e-05, + "loss": 0.0004, + "step": 8174 + }, + { + "epoch": 122.01, + "learning_rate": 6.59298245614035e-05, + "loss": 0.0211, + "step": 8175 + }, + { + "epoch": 122.03, + "learning_rate": 6.589473684210525e-05, + "loss": 0.0003, + "step": 8176 + }, + { + "epoch": 122.04, + "learning_rate": 6.585964912280701e-05, + "loss": 0.002, + "step": 8177 + }, + { + "epoch": 122.06, + "learning_rate": 6.582456140350876e-05, + "loss": 0.0005, + "step": 8178 + }, + { + "epoch": 122.07, + "learning_rate": 6.578947368421051e-05, + "loss": 0.0003, + "step": 8179 + }, + { + "epoch": 122.09, + "learning_rate": 6.575438596491227e-05, + "loss": 0.0008, + "step": 8180 + }, + { + "epoch": 122.1, + "learning_rate": 6.571929824561404e-05, + "loss": 0.0044, + "step": 8181 + }, + { + "epoch": 122.12, + "learning_rate": 6.568421052631578e-05, + "loss": 0.0005, + "step": 8182 + }, + { + "epoch": 122.13, + "learning_rate": 6.564912280701753e-05, + "loss": 0.0003, + "step": 8183 + }, + { + "epoch": 122.15, + "learning_rate": 6.56140350877193e-05, + "loss": 0.0019, + "step": 8184 + }, + { + "epoch": 122.16, + "learning_rate": 6.557894736842104e-05, + "loss": 0.0017, + "step": 8185 + }, + { + "epoch": 122.18, + "learning_rate": 6.554385964912281e-05, + "loss": 0.0003, + "step": 8186 + }, + { + "epoch": 122.19, + "learning_rate": 6.550877192982456e-05, + "loss": 0.0005, + "step": 8187 + }, + { + "epoch": 122.21, + "learning_rate": 6.54736842105263e-05, + "loss": 0.0005, + "step": 8188 + }, + { + "epoch": 122.22, + "learning_rate": 6.543859649122807e-05, + "loss": 0.0096, + "step": 8189 + }, + { + "epoch": 122.24, + "learning_rate": 6.540350877192982e-05, + "loss": 0.0004, + "step": 8190 + }, + { + "epoch": 122.25, + "learning_rate": 6.536842105263156e-05, + "loss": 0.0006, + "step": 8191 + }, + { + "epoch": 122.27, + "learning_rate": 6.533333333333333e-05, + "loss": 0.0184, + "step": 8192 + }, + { + "epoch": 122.28, + "learning_rate": 6.529824561403509e-05, + "loss": 0.0003, + "step": 8193 + }, + { + "epoch": 122.3, + "learning_rate": 6.526315789473684e-05, + "loss": 0.0005, + "step": 8194 + }, + { + "epoch": 122.31, + "learning_rate": 6.522807017543859e-05, + "loss": 0.0003, + "step": 8195 + }, + { + "epoch": 122.33, + "learning_rate": 6.519298245614035e-05, + "loss": 0.0007, + "step": 8196 + }, + { + "epoch": 122.34, + "learning_rate": 6.51578947368421e-05, + "loss": 0.004, + "step": 8197 + }, + { + "epoch": 122.36, + "learning_rate": 6.512280701754386e-05, + "loss": 0.0003, + "step": 8198 + }, + { + "epoch": 122.37, + "learning_rate": 6.508771929824561e-05, + "loss": 0.0003, + "step": 8199 + }, + { + "epoch": 122.39, + "learning_rate": 6.505263157894736e-05, + "loss": 0.0048, + "step": 8200 + }, + { + "epoch": 122.39, + "eval_accuracy": 0.8739598629466471, + "eval_f1": 0.8764769996728009, + "eval_loss": 0.6620244979858398, + "eval_runtime": 344.6899, + "eval_samples_per_second": 11.854, + "eval_steps_per_second": 0.743, + "step": 8200 + }, + { + "epoch": 122.4, + "learning_rate": 6.501754385964912e-05, + "loss": 0.001, + "step": 8201 + }, + { + "epoch": 122.42, + "learning_rate": 6.498245614035087e-05, + "loss": 0.0004, + "step": 8202 + }, + { + "epoch": 122.43, + "learning_rate": 6.494736842105262e-05, + "loss": 0.0022, + "step": 8203 + }, + { + "epoch": 122.45, + "learning_rate": 6.491228070175438e-05, + "loss": 0.0004, + "step": 8204 + }, + { + "epoch": 122.46, + "learning_rate": 6.487719298245614e-05, + "loss": 0.0034, + "step": 8205 + }, + { + "epoch": 122.48, + "learning_rate": 6.484210526315789e-05, + "loss": 0.0013, + "step": 8206 + }, + { + "epoch": 122.49, + "learning_rate": 6.480701754385964e-05, + "loss": 0.0004, + "step": 8207 + }, + { + "epoch": 122.51, + "learning_rate": 6.47719298245614e-05, + "loss": 0.0004, + "step": 8208 + }, + { + "epoch": 122.52, + "learning_rate": 6.473684210526315e-05, + "loss": 0.0003, + "step": 8209 + }, + { + "epoch": 122.54, + "learning_rate": 6.470175438596491e-05, + "loss": 0.0003, + "step": 8210 + }, + { + "epoch": 122.55, + "learning_rate": 6.466666666666666e-05, + "loss": 0.0003, + "step": 8211 + }, + { + "epoch": 122.57, + "learning_rate": 6.463157894736841e-05, + "loss": 0.0003, + "step": 8212 + }, + { + "epoch": 122.58, + "learning_rate": 6.459649122807017e-05, + "loss": 0.1371, + "step": 8213 + }, + { + "epoch": 122.59, + "learning_rate": 6.456140350877192e-05, + "loss": 0.0026, + "step": 8214 + }, + { + "epoch": 122.61, + "learning_rate": 6.452631578947367e-05, + "loss": 0.0022, + "step": 8215 + }, + { + "epoch": 122.62, + "learning_rate": 6.449122807017543e-05, + "loss": 0.031, + "step": 8216 + }, + { + "epoch": 122.64, + "learning_rate": 6.445614035087718e-05, + "loss": 0.0003, + "step": 8217 + }, + { + "epoch": 122.65, + "learning_rate": 6.442105263157894e-05, + "loss": 0.0004, + "step": 8218 + }, + { + "epoch": 122.67, + "learning_rate": 6.438596491228069e-05, + "loss": 0.177, + "step": 8219 + }, + { + "epoch": 122.68, + "learning_rate": 6.435087719298246e-05, + "loss": 0.0004, + "step": 8220 + }, + { + "epoch": 122.7, + "learning_rate": 6.43157894736842e-05, + "loss": 0.0004, + "step": 8221 + }, + { + "epoch": 122.71, + "learning_rate": 6.428070175438597e-05, + "loss": 0.0004, + "step": 8222 + }, + { + "epoch": 122.73, + "learning_rate": 6.424561403508772e-05, + "loss": 0.0004, + "step": 8223 + }, + { + "epoch": 122.74, + "learning_rate": 6.421052631578946e-05, + "loss": 0.0002, + "step": 8224 + }, + { + "epoch": 122.76, + "learning_rate": 6.417543859649123e-05, + "loss": 0.0923, + "step": 8225 + }, + { + "epoch": 122.77, + "learning_rate": 6.414035087719297e-05, + "loss": 0.0009, + "step": 8226 + }, + { + "epoch": 122.79, + "learning_rate": 6.410526315789472e-05, + "loss": 0.0002, + "step": 8227 + }, + { + "epoch": 122.8, + "learning_rate": 6.407017543859649e-05, + "loss": 0.0006, + "step": 8228 + }, + { + "epoch": 122.82, + "learning_rate": 6.403508771929823e-05, + "loss": 0.0004, + "step": 8229 + }, + { + "epoch": 122.83, + "learning_rate": 6.4e-05, + "loss": 0.0003, + "step": 8230 + }, + { + "epoch": 122.85, + "learning_rate": 6.396491228070175e-05, + "loss": 0.0004, + "step": 8231 + }, + { + "epoch": 122.86, + "learning_rate": 6.392982456140351e-05, + "loss": 0.0664, + "step": 8232 + }, + { + "epoch": 122.88, + "learning_rate": 6.389473684210526e-05, + "loss": 0.0004, + "step": 8233 + }, + { + "epoch": 122.89, + "learning_rate": 6.385964912280702e-05, + "loss": 0.0002, + "step": 8234 + }, + { + "epoch": 122.91, + "learning_rate": 6.382456140350877e-05, + "loss": 0.0003, + "step": 8235 + }, + { + "epoch": 122.92, + "learning_rate": 6.378947368421052e-05, + "loss": 0.0003, + "step": 8236 + }, + { + "epoch": 122.94, + "learning_rate": 6.375438596491228e-05, + "loss": 0.0005, + "step": 8237 + }, + { + "epoch": 122.95, + "learning_rate": 6.371929824561403e-05, + "loss": 0.0081, + "step": 8238 + }, + { + "epoch": 122.97, + "learning_rate": 6.368421052631578e-05, + "loss": 0.0003, + "step": 8239 + }, + { + "epoch": 122.98, + "learning_rate": 6.364912280701754e-05, + "loss": 0.0004, + "step": 8240 + }, + { + "epoch": 123.0, + "learning_rate": 6.361403508771929e-05, + "loss": 0.0003, + "step": 8241 + }, + { + "epoch": 123.01, + "learning_rate": 6.357894736842104e-05, + "loss": 0.0003, + "step": 8242 + }, + { + "epoch": 123.03, + "learning_rate": 6.35438596491228e-05, + "loss": 0.0003, + "step": 8243 + }, + { + "epoch": 123.04, + "learning_rate": 6.350877192982456e-05, + "loss": 0.0072, + "step": 8244 + }, + { + "epoch": 123.06, + "learning_rate": 6.347368421052631e-05, + "loss": 0.0005, + "step": 8245 + }, + { + "epoch": 123.07, + "learning_rate": 6.343859649122807e-05, + "loss": 0.0003, + "step": 8246 + }, + { + "epoch": 123.09, + "learning_rate": 6.340350877192982e-05, + "loss": 0.0003, + "step": 8247 + }, + { + "epoch": 123.1, + "learning_rate": 6.336842105263157e-05, + "loss": 0.0015, + "step": 8248 + }, + { + "epoch": 123.12, + "learning_rate": 6.333333333333333e-05, + "loss": 0.0003, + "step": 8249 + }, + { + "epoch": 123.13, + "learning_rate": 6.329824561403508e-05, + "loss": 0.0004, + "step": 8250 + }, + { + "epoch": 123.15, + "learning_rate": 6.326315789473683e-05, + "loss": 0.0003, + "step": 8251 + }, + { + "epoch": 123.16, + "learning_rate": 6.322807017543859e-05, + "loss": 0.1065, + "step": 8252 + }, + { + "epoch": 123.18, + "learning_rate": 6.319298245614034e-05, + "loss": 0.0002, + "step": 8253 + }, + { + "epoch": 123.19, + "learning_rate": 6.315789473684209e-05, + "loss": 0.0003, + "step": 8254 + }, + { + "epoch": 123.21, + "learning_rate": 6.312280701754385e-05, + "loss": 0.0321, + "step": 8255 + }, + { + "epoch": 123.22, + "learning_rate": 6.308771929824561e-05, + "loss": 0.0003, + "step": 8256 + }, + { + "epoch": 123.24, + "learning_rate": 6.305263157894736e-05, + "loss": 0.0003, + "step": 8257 + }, + { + "epoch": 123.25, + "learning_rate": 6.301754385964913e-05, + "loss": 0.0003, + "step": 8258 + }, + { + "epoch": 123.27, + "learning_rate": 6.298245614035087e-05, + "loss": 0.0003, + "step": 8259 + }, + { + "epoch": 123.28, + "learning_rate": 6.294736842105262e-05, + "loss": 0.0002, + "step": 8260 + }, + { + "epoch": 123.3, + "learning_rate": 6.291228070175439e-05, + "loss": 0.0002, + "step": 8261 + }, + { + "epoch": 123.31, + "learning_rate": 6.287719298245613e-05, + "loss": 0.0003, + "step": 8262 + }, + { + "epoch": 123.33, + "learning_rate": 6.284210526315788e-05, + "loss": 0.0004, + "step": 8263 + }, + { + "epoch": 123.34, + "learning_rate": 6.280701754385965e-05, + "loss": 0.0018, + "step": 8264 + }, + { + "epoch": 123.36, + "learning_rate": 6.27719298245614e-05, + "loss": 0.0003, + "step": 8265 + }, + { + "epoch": 123.37, + "learning_rate": 6.273684210526314e-05, + "loss": 0.0003, + "step": 8266 + }, + { + "epoch": 123.39, + "learning_rate": 6.27017543859649e-05, + "loss": 0.0002, + "step": 8267 + }, + { + "epoch": 123.4, + "learning_rate": 6.266666666666667e-05, + "loss": 0.0226, + "step": 8268 + }, + { + "epoch": 123.42, + "learning_rate": 6.263157894736842e-05, + "loss": 0.0003, + "step": 8269 + }, + { + "epoch": 123.43, + "learning_rate": 6.259649122807018e-05, + "loss": 0.0127, + "step": 8270 + }, + { + "epoch": 123.45, + "learning_rate": 6.256140350877193e-05, + "loss": 0.0004, + "step": 8271 + }, + { + "epoch": 123.46, + "learning_rate": 6.252631578947368e-05, + "loss": 0.0003, + "step": 8272 + }, + { + "epoch": 123.48, + "learning_rate": 6.249122807017544e-05, + "loss": 0.0003, + "step": 8273 + }, + { + "epoch": 123.49, + "learning_rate": 6.245614035087719e-05, + "loss": 0.0002, + "step": 8274 + }, + { + "epoch": 123.51, + "learning_rate": 6.242105263157894e-05, + "loss": 0.0004, + "step": 8275 + }, + { + "epoch": 123.52, + "learning_rate": 6.23859649122807e-05, + "loss": 0.0002, + "step": 8276 + }, + { + "epoch": 123.54, + "learning_rate": 6.235087719298245e-05, + "loss": 0.0003, + "step": 8277 + }, + { + "epoch": 123.55, + "learning_rate": 6.23157894736842e-05, + "loss": 0.0004, + "step": 8278 + }, + { + "epoch": 123.57, + "learning_rate": 6.228070175438596e-05, + "loss": 0.0002, + "step": 8279 + }, + { + "epoch": 123.58, + "learning_rate": 6.224561403508771e-05, + "loss": 0.0003, + "step": 8280 + }, + { + "epoch": 123.59, + "learning_rate": 6.221052631578947e-05, + "loss": 0.0018, + "step": 8281 + }, + { + "epoch": 123.61, + "learning_rate": 6.217543859649123e-05, + "loss": 0.0004, + "step": 8282 + }, + { + "epoch": 123.62, + "learning_rate": 6.214035087719298e-05, + "loss": 0.0003, + "step": 8283 + }, + { + "epoch": 123.64, + "learning_rate": 6.210526315789473e-05, + "loss": 0.0025, + "step": 8284 + }, + { + "epoch": 123.65, + "learning_rate": 6.207017543859649e-05, + "loss": 0.0004, + "step": 8285 + }, + { + "epoch": 123.67, + "learning_rate": 6.203508771929824e-05, + "loss": 0.0005, + "step": 8286 + }, + { + "epoch": 123.68, + "learning_rate": 6.199999999999999e-05, + "loss": 0.0003, + "step": 8287 + }, + { + "epoch": 123.7, + "learning_rate": 6.196491228070175e-05, + "loss": 0.0003, + "step": 8288 + }, + { + "epoch": 123.71, + "learning_rate": 6.19298245614035e-05, + "loss": 0.0253, + "step": 8289 + }, + { + "epoch": 123.73, + "learning_rate": 6.189473684210525e-05, + "loss": 0.0003, + "step": 8290 + }, + { + "epoch": 123.74, + "learning_rate": 6.185964912280701e-05, + "loss": 0.0003, + "step": 8291 + }, + { + "epoch": 123.76, + "learning_rate": 6.182456140350876e-05, + "loss": 0.0003, + "step": 8292 + }, + { + "epoch": 123.77, + "learning_rate": 6.178947368421052e-05, + "loss": 0.0007, + "step": 8293 + }, + { + "epoch": 123.79, + "learning_rate": 6.175438596491228e-05, + "loss": 0.0003, + "step": 8294 + }, + { + "epoch": 123.8, + "learning_rate": 6.171929824561403e-05, + "loss": 0.0003, + "step": 8295 + }, + { + "epoch": 123.82, + "learning_rate": 6.168421052631578e-05, + "loss": 0.0003, + "step": 8296 + }, + { + "epoch": 123.83, + "learning_rate": 6.164912280701754e-05, + "loss": 0.0003, + "step": 8297 + }, + { + "epoch": 123.85, + "learning_rate": 6.16140350877193e-05, + "loss": 0.0003, + "step": 8298 + }, + { + "epoch": 123.86, + "learning_rate": 6.157894736842104e-05, + "loss": 0.0003, + "step": 8299 + }, + { + "epoch": 123.88, + "learning_rate": 6.15438596491228e-05, + "loss": 0.0003, + "step": 8300 + }, + { + "epoch": 123.89, + "learning_rate": 6.150877192982455e-05, + "loss": 0.0002, + "step": 8301 + }, + { + "epoch": 123.91, + "learning_rate": 6.14736842105263e-05, + "loss": 0.0002, + "step": 8302 + }, + { + "epoch": 123.92, + "learning_rate": 6.143859649122806e-05, + "loss": 0.0003, + "step": 8303 + }, + { + "epoch": 123.94, + "learning_rate": 6.140350877192981e-05, + "loss": 0.0003, + "step": 8304 + }, + { + "epoch": 123.95, + "learning_rate": 6.136842105263158e-05, + "loss": 0.0003, + "step": 8305 + }, + { + "epoch": 123.97, + "learning_rate": 6.133333333333334e-05, + "loss": 0.0003, + "step": 8306 + }, + { + "epoch": 123.98, + "learning_rate": 6.129824561403509e-05, + "loss": 0.0003, + "step": 8307 + }, + { + "epoch": 124.0, + "learning_rate": 6.126315789473684e-05, + "loss": 0.0002, + "step": 8308 + }, + { + "epoch": 124.01, + "learning_rate": 6.12280701754386e-05, + "loss": 0.0016, + "step": 8309 + }, + { + "epoch": 124.03, + "learning_rate": 6.119298245614035e-05, + "loss": 0.0002, + "step": 8310 + }, + { + "epoch": 124.04, + "learning_rate": 6.11578947368421e-05, + "loss": 0.0002, + "step": 8311 + }, + { + "epoch": 124.06, + "learning_rate": 6.112280701754386e-05, + "loss": 0.0002, + "step": 8312 + }, + { + "epoch": 124.07, + "learning_rate": 6.10877192982456e-05, + "loss": 0.0003, + "step": 8313 + }, + { + "epoch": 124.09, + "learning_rate": 6.105263157894736e-05, + "loss": 0.0004, + "step": 8314 + }, + { + "epoch": 124.1, + "learning_rate": 6.101754385964912e-05, + "loss": 0.0002, + "step": 8315 + }, + { + "epoch": 124.12, + "learning_rate": 6.0982456140350866e-05, + "loss": 0.0003, + "step": 8316 + }, + { + "epoch": 124.13, + "learning_rate": 6.094736842105262e-05, + "loss": 0.0003, + "step": 8317 + }, + { + "epoch": 124.15, + "learning_rate": 6.0912280701754384e-05, + "loss": 0.0002, + "step": 8318 + }, + { + "epoch": 124.16, + "learning_rate": 6.087719298245614e-05, + "loss": 0.0003, + "step": 8319 + }, + { + "epoch": 124.18, + "learning_rate": 6.0842105263157895e-05, + "loss": 0.0003, + "step": 8320 + }, + { + "epoch": 124.19, + "learning_rate": 6.0807017543859644e-05, + "loss": 0.0035, + "step": 8321 + }, + { + "epoch": 124.21, + "learning_rate": 6.07719298245614e-05, + "loss": 0.0002, + "step": 8322 + }, + { + "epoch": 124.22, + "learning_rate": 6.0736842105263155e-05, + "loss": 0.0031, + "step": 8323 + }, + { + "epoch": 124.24, + "learning_rate": 6.0701754385964904e-05, + "loss": 0.1618, + "step": 8324 + }, + { + "epoch": 124.25, + "learning_rate": 6.066666666666666e-05, + "loss": 0.0002, + "step": 8325 + }, + { + "epoch": 124.27, + "learning_rate": 6.0631578947368415e-05, + "loss": 0.0002, + "step": 8326 + }, + { + "epoch": 124.28, + "learning_rate": 6.059649122807017e-05, + "loss": 0.0002, + "step": 8327 + }, + { + "epoch": 124.3, + "learning_rate": 6.056140350877192e-05, + "loss": 0.0004, + "step": 8328 + }, + { + "epoch": 124.31, + "learning_rate": 6.0526315789473675e-05, + "loss": 0.0002, + "step": 8329 + }, + { + "epoch": 124.33, + "learning_rate": 6.049122807017543e-05, + "loss": 0.0002, + "step": 8330 + }, + { + "epoch": 124.34, + "learning_rate": 6.045614035087719e-05, + "loss": 0.0003, + "step": 8331 + }, + { + "epoch": 124.36, + "learning_rate": 6.042105263157895e-05, + "loss": 0.0002, + "step": 8332 + }, + { + "epoch": 124.37, + "learning_rate": 6.03859649122807e-05, + "loss": 0.0002, + "step": 8333 + }, + { + "epoch": 124.39, + "learning_rate": 6.035087719298245e-05, + "loss": 0.0003, + "step": 8334 + }, + { + "epoch": 124.4, + "learning_rate": 6.031578947368421e-05, + "loss": 0.0009, + "step": 8335 + }, + { + "epoch": 124.42, + "learning_rate": 6.028070175438596e-05, + "loss": 0.0004, + "step": 8336 + }, + { + "epoch": 124.43, + "learning_rate": 6.024561403508771e-05, + "loss": 0.0003, + "step": 8337 + }, + { + "epoch": 124.45, + "learning_rate": 6.021052631578947e-05, + "loss": 0.0004, + "step": 8338 + }, + { + "epoch": 124.46, + "learning_rate": 6.0175438596491224e-05, + "loss": 0.0003, + "step": 8339 + }, + { + "epoch": 124.48, + "learning_rate": 6.014035087719297e-05, + "loss": 0.0002, + "step": 8340 + }, + { + "epoch": 124.49, + "learning_rate": 6.010526315789473e-05, + "loss": 0.008, + "step": 8341 + }, + { + "epoch": 124.51, + "learning_rate": 6.0070175438596484e-05, + "loss": 0.0002, + "step": 8342 + }, + { + "epoch": 124.52, + "learning_rate": 6.0035087719298246e-05, + "loss": 0.0002, + "step": 8343 + }, + { + "epoch": 124.54, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.0002, + "step": 8344 + }, + { + "epoch": 124.55, + "learning_rate": 5.996491228070175e-05, + "loss": 0.0002, + "step": 8345 + }, + { + "epoch": 124.57, + "learning_rate": 5.9929824561403506e-05, + "loss": 0.0002, + "step": 8346 + }, + { + "epoch": 124.58, + "learning_rate": 5.989473684210526e-05, + "loss": 0.0005, + "step": 8347 + }, + { + "epoch": 124.59, + "learning_rate": 5.985964912280701e-05, + "loss": 0.0002, + "step": 8348 + }, + { + "epoch": 124.61, + "learning_rate": 5.9824561403508766e-05, + "loss": 0.0002, + "step": 8349 + }, + { + "epoch": 124.62, + "learning_rate": 5.978947368421052e-05, + "loss": 0.0003, + "step": 8350 + }, + { + "epoch": 124.64, + "learning_rate": 5.975438596491228e-05, + "loss": 0.0004, + "step": 8351 + }, + { + "epoch": 124.65, + "learning_rate": 5.9719298245614026e-05, + "loss": 0.0002, + "step": 8352 + }, + { + "epoch": 124.67, + "learning_rate": 5.968421052631578e-05, + "loss": 0.0002, + "step": 8353 + }, + { + "epoch": 124.68, + "learning_rate": 5.964912280701754e-05, + "loss": 0.0061, + "step": 8354 + }, + { + "epoch": 124.7, + "learning_rate": 5.961403508771929e-05, + "loss": 0.0002, + "step": 8355 + }, + { + "epoch": 124.71, + "learning_rate": 5.957894736842105e-05, + "loss": 0.0002, + "step": 8356 + }, + { + "epoch": 124.73, + "learning_rate": 5.9543859649122803e-05, + "loss": 0.0002, + "step": 8357 + }, + { + "epoch": 124.74, + "learning_rate": 5.950877192982456e-05, + "loss": 0.0003, + "step": 8358 + }, + { + "epoch": 124.76, + "learning_rate": 5.9473684210526315e-05, + "loss": 0.0002, + "step": 8359 + }, + { + "epoch": 124.77, + "learning_rate": 5.943859649122806e-05, + "loss": 0.0002, + "step": 8360 + }, + { + "epoch": 124.79, + "learning_rate": 5.940350877192982e-05, + "loss": 0.0098, + "step": 8361 + }, + { + "epoch": 124.8, + "learning_rate": 5.9368421052631574e-05, + "loss": 0.0003, + "step": 8362 + }, + { + "epoch": 124.82, + "learning_rate": 5.933333333333333e-05, + "loss": 0.0002, + "step": 8363 + }, + { + "epoch": 124.83, + "learning_rate": 5.929824561403508e-05, + "loss": 0.0034, + "step": 8364 + }, + { + "epoch": 124.85, + "learning_rate": 5.9263157894736834e-05, + "loss": 0.0003, + "step": 8365 + }, + { + "epoch": 124.86, + "learning_rate": 5.922807017543859e-05, + "loss": 0.0002, + "step": 8366 + }, + { + "epoch": 124.88, + "learning_rate": 5.9192982456140345e-05, + "loss": 0.0004, + "step": 8367 + }, + { + "epoch": 124.89, + "learning_rate": 5.91578947368421e-05, + "loss": 0.0002, + "step": 8368 + }, + { + "epoch": 124.91, + "learning_rate": 5.9122807017543856e-05, + "loss": 0.0003, + "step": 8369 + }, + { + "epoch": 124.92, + "learning_rate": 5.908771929824561e-05, + "loss": 0.0002, + "step": 8370 + }, + { + "epoch": 124.94, + "learning_rate": 5.905263157894737e-05, + "loss": 0.0003, + "step": 8371 + }, + { + "epoch": 124.95, + "learning_rate": 5.9017543859649116e-05, + "loss": 0.0002, + "step": 8372 + }, + { + "epoch": 124.97, + "learning_rate": 5.898245614035087e-05, + "loss": 0.0002, + "step": 8373 + }, + { + "epoch": 124.98, + "learning_rate": 5.894736842105263e-05, + "loss": 0.0002, + "step": 8374 + }, + { + "epoch": 125.0, + "learning_rate": 5.891228070175438e-05, + "loss": 0.0002, + "step": 8375 + }, + { + "epoch": 125.01, + "learning_rate": 5.887719298245613e-05, + "loss": 0.0003, + "step": 8376 + }, + { + "epoch": 125.03, + "learning_rate": 5.884210526315789e-05, + "loss": 0.0006, + "step": 8377 + }, + { + "epoch": 125.04, + "learning_rate": 5.880701754385964e-05, + "loss": 0.0002, + "step": 8378 + }, + { + "epoch": 125.06, + "learning_rate": 5.87719298245614e-05, + "loss": 0.0002, + "step": 8379 + }, + { + "epoch": 125.07, + "learning_rate": 5.873684210526315e-05, + "loss": 0.0002, + "step": 8380 + }, + { + "epoch": 125.09, + "learning_rate": 5.870175438596491e-05, + "loss": 0.1178, + "step": 8381 + }, + { + "epoch": 125.1, + "learning_rate": 5.8666666666666665e-05, + "loss": 0.0002, + "step": 8382 + }, + { + "epoch": 125.12, + "learning_rate": 5.863157894736842e-05, + "loss": 0.0002, + "step": 8383 + }, + { + "epoch": 125.13, + "learning_rate": 5.859649122807017e-05, + "loss": 0.0003, + "step": 8384 + }, + { + "epoch": 125.15, + "learning_rate": 5.8561403508771925e-05, + "loss": 0.0006, + "step": 8385 + }, + { + "epoch": 125.16, + "learning_rate": 5.852631578947368e-05, + "loss": 0.0002, + "step": 8386 + }, + { + "epoch": 125.18, + "learning_rate": 5.8491228070175436e-05, + "loss": 0.0027, + "step": 8387 + }, + { + "epoch": 125.19, + "learning_rate": 5.8456140350877185e-05, + "loss": 0.0004, + "step": 8388 + }, + { + "epoch": 125.21, + "learning_rate": 5.842105263157894e-05, + "loss": 0.0002, + "step": 8389 + }, + { + "epoch": 125.22, + "learning_rate": 5.8385964912280696e-05, + "loss": 0.0004, + "step": 8390 + }, + { + "epoch": 125.24, + "learning_rate": 5.8350877192982445e-05, + "loss": 0.0002, + "step": 8391 + }, + { + "epoch": 125.25, + "learning_rate": 5.83157894736842e-05, + "loss": 0.0002, + "step": 8392 + }, + { + "epoch": 125.27, + "learning_rate": 5.828070175438596e-05, + "loss": 0.0002, + "step": 8393 + }, + { + "epoch": 125.28, + "learning_rate": 5.824561403508772e-05, + "loss": 0.0007, + "step": 8394 + }, + { + "epoch": 125.3, + "learning_rate": 5.8210526315789474e-05, + "loss": 0.0002, + "step": 8395 + }, + { + "epoch": 125.31, + "learning_rate": 5.817543859649122e-05, + "loss": 0.0003, + "step": 8396 + }, + { + "epoch": 125.33, + "learning_rate": 5.814035087719298e-05, + "loss": 0.0009, + "step": 8397 + }, + { + "epoch": 125.34, + "learning_rate": 5.8105263157894734e-05, + "loss": 0.0002, + "step": 8398 + }, + { + "epoch": 125.36, + "learning_rate": 5.807017543859649e-05, + "loss": 0.0003, + "step": 8399 + }, + { + "epoch": 125.37, + "learning_rate": 5.803508771929824e-05, + "loss": 0.2335, + "step": 8400 + }, + { + "epoch": 125.37, + "eval_accuracy": 0.8827704356338718, + "eval_f1": 0.8831895482310441, + "eval_loss": 0.6515348553657532, + "eval_runtime": 348.7658, + "eval_samples_per_second": 11.716, + "eval_steps_per_second": 0.734, + "step": 8400 + }, + { + "epoch": 125.39, + "learning_rate": 5.7999999999999994e-05, + "loss": 0.0005, + "step": 8401 + }, + { + "epoch": 125.4, + "learning_rate": 5.796491228070175e-05, + "loss": 0.0002, + "step": 8402 + }, + { + "epoch": 125.42, + "learning_rate": 5.79298245614035e-05, + "loss": 0.0002, + "step": 8403 + }, + { + "epoch": 125.43, + "learning_rate": 5.7894736842105253e-05, + "loss": 0.0002, + "step": 8404 + }, + { + "epoch": 125.45, + "learning_rate": 5.785964912280701e-05, + "loss": 0.0003, + "step": 8405 + }, + { + "epoch": 125.46, + "learning_rate": 5.782456140350877e-05, + "loss": 0.0002, + "step": 8406 + }, + { + "epoch": 125.48, + "learning_rate": 5.778947368421053e-05, + "loss": 0.0008, + "step": 8407 + }, + { + "epoch": 125.49, + "learning_rate": 5.7754385964912276e-05, + "loss": 0.0002, + "step": 8408 + }, + { + "epoch": 125.51, + "learning_rate": 5.771929824561403e-05, + "loss": 0.0002, + "step": 8409 + }, + { + "epoch": 125.52, + "learning_rate": 5.768421052631579e-05, + "loss": 0.0029, + "step": 8410 + }, + { + "epoch": 125.54, + "learning_rate": 5.764912280701754e-05, + "loss": 0.0002, + "step": 8411 + }, + { + "epoch": 125.55, + "learning_rate": 5.761403508771929e-05, + "loss": 0.0044, + "step": 8412 + }, + { + "epoch": 125.57, + "learning_rate": 5.757894736842105e-05, + "loss": 0.0002, + "step": 8413 + }, + { + "epoch": 125.58, + "learning_rate": 5.75438596491228e-05, + "loss": 0.0003, + "step": 8414 + }, + { + "epoch": 125.59, + "learning_rate": 5.750877192982455e-05, + "loss": 0.0002, + "step": 8415 + }, + { + "epoch": 125.61, + "learning_rate": 5.7473684210526307e-05, + "loss": 0.0002, + "step": 8416 + }, + { + "epoch": 125.62, + "learning_rate": 5.743859649122806e-05, + "loss": 0.0199, + "step": 8417 + }, + { + "epoch": 125.64, + "learning_rate": 5.7403508771929824e-05, + "loss": 0.0004, + "step": 8418 + }, + { + "epoch": 125.65, + "learning_rate": 5.736842105263158e-05, + "loss": 0.0621, + "step": 8419 + }, + { + "epoch": 125.67, + "learning_rate": 5.733333333333333e-05, + "loss": 0.0003, + "step": 8420 + }, + { + "epoch": 125.68, + "learning_rate": 5.7298245614035084e-05, + "loss": 0.0002, + "step": 8421 + }, + { + "epoch": 125.7, + "learning_rate": 5.726315789473684e-05, + "loss": 0.0002, + "step": 8422 + }, + { + "epoch": 125.71, + "learning_rate": 5.7228070175438595e-05, + "loss": 0.0008, + "step": 8423 + }, + { + "epoch": 125.73, + "learning_rate": 5.7192982456140344e-05, + "loss": 0.0002, + "step": 8424 + }, + { + "epoch": 125.74, + "learning_rate": 5.71578947368421e-05, + "loss": 0.0014, + "step": 8425 + }, + { + "epoch": 125.76, + "learning_rate": 5.7122807017543855e-05, + "loss": 0.0848, + "step": 8426 + }, + { + "epoch": 125.77, + "learning_rate": 5.7087719298245604e-05, + "loss": 0.0004, + "step": 8427 + }, + { + "epoch": 125.79, + "learning_rate": 5.705263157894736e-05, + "loss": 0.0003, + "step": 8428 + }, + { + "epoch": 125.8, + "learning_rate": 5.7017543859649115e-05, + "loss": 0.0092, + "step": 8429 + }, + { + "epoch": 125.82, + "learning_rate": 5.698245614035087e-05, + "loss": 0.0004, + "step": 8430 + }, + { + "epoch": 125.83, + "learning_rate": 5.694736842105263e-05, + "loss": 0.0072, + "step": 8431 + }, + { + "epoch": 125.85, + "learning_rate": 5.691228070175438e-05, + "loss": 0.0002, + "step": 8432 + }, + { + "epoch": 125.86, + "learning_rate": 5.687719298245614e-05, + "loss": 0.0003, + "step": 8433 + }, + { + "epoch": 125.88, + "learning_rate": 5.684210526315789e-05, + "loss": 0.0002, + "step": 8434 + }, + { + "epoch": 125.89, + "learning_rate": 5.680701754385965e-05, + "loss": 0.0003, + "step": 8435 + }, + { + "epoch": 125.91, + "learning_rate": 5.67719298245614e-05, + "loss": 0.0002, + "step": 8436 + }, + { + "epoch": 125.92, + "learning_rate": 5.673684210526315e-05, + "loss": 0.0002, + "step": 8437 + }, + { + "epoch": 125.94, + "learning_rate": 5.670175438596491e-05, + "loss": 0.0012, + "step": 8438 + }, + { + "epoch": 125.95, + "learning_rate": 5.666666666666666e-05, + "loss": 0.0005, + "step": 8439 + }, + { + "epoch": 125.97, + "learning_rate": 5.663157894736841e-05, + "loss": 0.0003, + "step": 8440 + }, + { + "epoch": 125.98, + "learning_rate": 5.659649122807017e-05, + "loss": 0.0003, + "step": 8441 + }, + { + "epoch": 126.0, + "learning_rate": 5.6561403508771924e-05, + "loss": 0.0002, + "step": 8442 + }, + { + "epoch": 126.01, + "learning_rate": 5.652631578947367e-05, + "loss": 0.0003, + "step": 8443 + }, + { + "epoch": 126.03, + "learning_rate": 5.6491228070175435e-05, + "loss": 0.0004, + "step": 8444 + }, + { + "epoch": 126.04, + "learning_rate": 5.645614035087719e-05, + "loss": 0.0143, + "step": 8445 + }, + { + "epoch": 126.06, + "learning_rate": 5.6421052631578946e-05, + "loss": 0.0083, + "step": 8446 + }, + { + "epoch": 126.07, + "learning_rate": 5.63859649122807e-05, + "loss": 0.0032, + "step": 8447 + }, + { + "epoch": 126.09, + "learning_rate": 5.635087719298245e-05, + "loss": 0.0002, + "step": 8448 + }, + { + "epoch": 126.1, + "learning_rate": 5.6315789473684206e-05, + "loss": 0.0004, + "step": 8449 + }, + { + "epoch": 126.12, + "learning_rate": 5.628070175438596e-05, + "loss": 0.0003, + "step": 8450 + }, + { + "epoch": 126.13, + "learning_rate": 5.624561403508771e-05, + "loss": 0.0002, + "step": 8451 + }, + { + "epoch": 126.15, + "learning_rate": 5.6210526315789466e-05, + "loss": 0.0003, + "step": 8452 + }, + { + "epoch": 126.16, + "learning_rate": 5.617543859649122e-05, + "loss": 0.0002, + "step": 8453 + }, + { + "epoch": 126.18, + "learning_rate": 5.614035087719298e-05, + "loss": 0.0013, + "step": 8454 + }, + { + "epoch": 126.19, + "learning_rate": 5.6105263157894726e-05, + "loss": 0.0013, + "step": 8455 + }, + { + "epoch": 126.21, + "learning_rate": 5.607017543859649e-05, + "loss": 0.0003, + "step": 8456 + }, + { + "epoch": 126.22, + "learning_rate": 5.6035087719298244e-05, + "loss": 0.0002, + "step": 8457 + }, + { + "epoch": 126.24, + "learning_rate": 5.6e-05, + "loss": 0.0002, + "step": 8458 + }, + { + "epoch": 126.25, + "learning_rate": 5.5964912280701755e-05, + "loss": 0.0002, + "step": 8459 + }, + { + "epoch": 126.27, + "learning_rate": 5.5929824561403503e-05, + "loss": 0.0002, + "step": 8460 + }, + { + "epoch": 126.28, + "learning_rate": 5.589473684210526e-05, + "loss": 0.001, + "step": 8461 + }, + { + "epoch": 126.3, + "learning_rate": 5.5859649122807015e-05, + "loss": 0.001, + "step": 8462 + }, + { + "epoch": 126.31, + "learning_rate": 5.582456140350876e-05, + "loss": 0.0002, + "step": 8463 + }, + { + "epoch": 126.33, + "learning_rate": 5.578947368421052e-05, + "loss": 0.0002, + "step": 8464 + }, + { + "epoch": 126.34, + "learning_rate": 5.5754385964912274e-05, + "loss": 0.0002, + "step": 8465 + }, + { + "epoch": 126.36, + "learning_rate": 5.571929824561403e-05, + "loss": 0.0002, + "step": 8466 + }, + { + "epoch": 126.37, + "learning_rate": 5.568421052631578e-05, + "loss": 0.0003, + "step": 8467 + }, + { + "epoch": 126.39, + "learning_rate": 5.5649122807017534e-05, + "loss": 0.0002, + "step": 8468 + }, + { + "epoch": 126.4, + "learning_rate": 5.56140350877193e-05, + "loss": 0.0003, + "step": 8469 + }, + { + "epoch": 126.42, + "learning_rate": 5.557894736842105e-05, + "loss": 0.0002, + "step": 8470 + }, + { + "epoch": 126.43, + "learning_rate": 5.55438596491228e-05, + "loss": 0.0366, + "step": 8471 + }, + { + "epoch": 126.45, + "learning_rate": 5.5508771929824557e-05, + "loss": 0.0002, + "step": 8472 + }, + { + "epoch": 126.46, + "learning_rate": 5.547368421052631e-05, + "loss": 0.0002, + "step": 8473 + }, + { + "epoch": 126.48, + "learning_rate": 5.543859649122807e-05, + "loss": 0.0004, + "step": 8474 + }, + { + "epoch": 126.49, + "learning_rate": 5.5403508771929816e-05, + "loss": 0.0002, + "step": 8475 + }, + { + "epoch": 126.51, + "learning_rate": 5.536842105263157e-05, + "loss": 0.0003, + "step": 8476 + }, + { + "epoch": 126.52, + "learning_rate": 5.533333333333333e-05, + "loss": 0.0003, + "step": 8477 + }, + { + "epoch": 126.54, + "learning_rate": 5.529824561403508e-05, + "loss": 0.0002, + "step": 8478 + }, + { + "epoch": 126.55, + "learning_rate": 5.526315789473683e-05, + "loss": 0.0002, + "step": 8479 + }, + { + "epoch": 126.57, + "learning_rate": 5.522807017543859e-05, + "loss": 0.0002, + "step": 8480 + }, + { + "epoch": 126.58, + "learning_rate": 5.519298245614035e-05, + "loss": 0.0002, + "step": 8481 + }, + { + "epoch": 126.59, + "learning_rate": 5.5157894736842105e-05, + "loss": 0.0005, + "step": 8482 + }, + { + "epoch": 126.61, + "learning_rate": 5.5122807017543854e-05, + "loss": 0.0002, + "step": 8483 + }, + { + "epoch": 126.62, + "learning_rate": 5.508771929824561e-05, + "loss": 0.0003, + "step": 8484 + }, + { + "epoch": 126.64, + "learning_rate": 5.5052631578947365e-05, + "loss": 0.0002, + "step": 8485 + }, + { + "epoch": 126.65, + "learning_rate": 5.501754385964912e-05, + "loss": 0.0002, + "step": 8486 + }, + { + "epoch": 126.67, + "learning_rate": 5.498245614035087e-05, + "loss": 0.0002, + "step": 8487 + }, + { + "epoch": 126.68, + "learning_rate": 5.4947368421052625e-05, + "loss": 0.0002, + "step": 8488 + }, + { + "epoch": 126.7, + "learning_rate": 5.491228070175438e-05, + "loss": 0.0003, + "step": 8489 + }, + { + "epoch": 126.71, + "learning_rate": 5.4877192982456136e-05, + "loss": 0.0002, + "step": 8490 + }, + { + "epoch": 126.73, + "learning_rate": 5.4842105263157885e-05, + "loss": 0.0002, + "step": 8491 + }, + { + "epoch": 126.74, + "learning_rate": 5.480701754385964e-05, + "loss": 0.0003, + "step": 8492 + }, + { + "epoch": 126.76, + "learning_rate": 5.4771929824561396e-05, + "loss": 0.2602, + "step": 8493 + }, + { + "epoch": 126.77, + "learning_rate": 5.473684210526316e-05, + "loss": 0.0003, + "step": 8494 + }, + { + "epoch": 126.79, + "learning_rate": 5.470175438596491e-05, + "loss": 0.0002, + "step": 8495 + }, + { + "epoch": 126.8, + "learning_rate": 5.466666666666666e-05, + "loss": 0.0003, + "step": 8496 + }, + { + "epoch": 126.82, + "learning_rate": 5.463157894736842e-05, + "loss": 0.0137, + "step": 8497 + }, + { + "epoch": 126.83, + "learning_rate": 5.4596491228070174e-05, + "loss": 0.2638, + "step": 8498 + }, + { + "epoch": 126.85, + "learning_rate": 5.456140350877192e-05, + "loss": 0.0002, + "step": 8499 + }, + { + "epoch": 126.86, + "learning_rate": 5.452631578947368e-05, + "loss": 0.0003, + "step": 8500 + }, + { + "epoch": 126.88, + "learning_rate": 5.4491228070175434e-05, + "loss": 0.0002, + "step": 8501 + }, + { + "epoch": 126.89, + "learning_rate": 5.445614035087719e-05, + "loss": 0.0003, + "step": 8502 + }, + { + "epoch": 126.91, + "learning_rate": 5.442105263157894e-05, + "loss": 0.0003, + "step": 8503 + }, + { + "epoch": 126.92, + "learning_rate": 5.4385964912280694e-05, + "loss": 0.0051, + "step": 8504 + }, + { + "epoch": 126.94, + "learning_rate": 5.435087719298245e-05, + "loss": 0.0003, + "step": 8505 + }, + { + "epoch": 126.95, + "learning_rate": 5.431578947368421e-05, + "loss": 0.0003, + "step": 8506 + }, + { + "epoch": 126.97, + "learning_rate": 5.428070175438596e-05, + "loss": 0.0007, + "step": 8507 + }, + { + "epoch": 126.98, + "learning_rate": 5.4245614035087716e-05, + "loss": 0.0003, + "step": 8508 + }, + { + "epoch": 127.0, + "learning_rate": 5.421052631578947e-05, + "loss": 0.0002, + "step": 8509 + }, + { + "epoch": 127.01, + "learning_rate": 5.417543859649123e-05, + "loss": 0.0003, + "step": 8510 + }, + { + "epoch": 127.03, + "learning_rate": 5.4140350877192976e-05, + "loss": 0.0003, + "step": 8511 + }, + { + "epoch": 127.04, + "learning_rate": 5.410526315789473e-05, + "loss": 0.0003, + "step": 8512 + }, + { + "epoch": 127.06, + "learning_rate": 5.407017543859649e-05, + "loss": 0.0002, + "step": 8513 + }, + { + "epoch": 127.07, + "learning_rate": 5.403508771929824e-05, + "loss": 0.0004, + "step": 8514 + }, + { + "epoch": 127.09, + "learning_rate": 5.399999999999999e-05, + "loss": 0.0002, + "step": 8515 + }, + { + "epoch": 127.1, + "learning_rate": 5.396491228070175e-05, + "loss": 0.0003, + "step": 8516 + }, + { + "epoch": 127.12, + "learning_rate": 5.39298245614035e-05, + "loss": 0.0004, + "step": 8517 + }, + { + "epoch": 127.13, + "learning_rate": 5.389473684210525e-05, + "loss": 0.0003, + "step": 8518 + }, + { + "epoch": 127.15, + "learning_rate": 5.3859649122807013e-05, + "loss": 0.0004, + "step": 8519 + }, + { + "epoch": 127.16, + "learning_rate": 5.382456140350877e-05, + "loss": 0.0003, + "step": 8520 + }, + { + "epoch": 127.18, + "learning_rate": 5.3789473684210525e-05, + "loss": 0.0003, + "step": 8521 + }, + { + "epoch": 127.19, + "learning_rate": 5.375438596491228e-05, + "loss": 0.0003, + "step": 8522 + }, + { + "epoch": 127.21, + "learning_rate": 5.371929824561403e-05, + "loss": 0.0004, + "step": 8523 + }, + { + "epoch": 127.22, + "learning_rate": 5.3684210526315784e-05, + "loss": 0.1857, + "step": 8524 + }, + { + "epoch": 127.24, + "learning_rate": 5.364912280701754e-05, + "loss": 0.0003, + "step": 8525 + }, + { + "epoch": 127.25, + "learning_rate": 5.3614035087719296e-05, + "loss": 0.0003, + "step": 8526 + }, + { + "epoch": 127.27, + "learning_rate": 5.3578947368421044e-05, + "loss": 0.0005, + "step": 8527 + }, + { + "epoch": 127.28, + "learning_rate": 5.35438596491228e-05, + "loss": 0.0003, + "step": 8528 + }, + { + "epoch": 127.3, + "learning_rate": 5.3508771929824555e-05, + "loss": 0.0003, + "step": 8529 + }, + { + "epoch": 127.31, + "learning_rate": 5.3473684210526304e-05, + "loss": 0.0004, + "step": 8530 + }, + { + "epoch": 127.33, + "learning_rate": 5.3438596491228067e-05, + "loss": 0.0003, + "step": 8531 + }, + { + "epoch": 127.34, + "learning_rate": 5.340350877192982e-05, + "loss": 0.1111, + "step": 8532 + }, + { + "epoch": 127.36, + "learning_rate": 5.336842105263158e-05, + "loss": 0.0005, + "step": 8533 + }, + { + "epoch": 127.37, + "learning_rate": 5.333333333333333e-05, + "loss": 0.0003, + "step": 8534 + }, + { + "epoch": 127.39, + "learning_rate": 5.329824561403508e-05, + "loss": 0.0002, + "step": 8535 + }, + { + "epoch": 127.4, + "learning_rate": 5.326315789473684e-05, + "loss": 0.0004, + "step": 8536 + }, + { + "epoch": 127.42, + "learning_rate": 5.322807017543859e-05, + "loss": 0.0017, + "step": 8537 + }, + { + "epoch": 127.43, + "learning_rate": 5.319298245614035e-05, + "loss": 0.0004, + "step": 8538 + }, + { + "epoch": 127.45, + "learning_rate": 5.31578947368421e-05, + "loss": 0.0003, + "step": 8539 + }, + { + "epoch": 127.46, + "learning_rate": 5.312280701754385e-05, + "loss": 0.0003, + "step": 8540 + }, + { + "epoch": 127.48, + "learning_rate": 5.308771929824561e-05, + "loss": 0.0003, + "step": 8541 + }, + { + "epoch": 127.49, + "learning_rate": 5.305263157894736e-05, + "loss": 0.0003, + "step": 8542 + }, + { + "epoch": 127.51, + "learning_rate": 5.301754385964911e-05, + "loss": 0.0003, + "step": 8543 + }, + { + "epoch": 127.52, + "learning_rate": 5.2982456140350875e-05, + "loss": 0.0016, + "step": 8544 + }, + { + "epoch": 127.54, + "learning_rate": 5.294736842105263e-05, + "loss": 0.0003, + "step": 8545 + }, + { + "epoch": 127.55, + "learning_rate": 5.2912280701754386e-05, + "loss": 0.0004, + "step": 8546 + }, + { + "epoch": 127.57, + "learning_rate": 5.2877192982456135e-05, + "loss": 0.0003, + "step": 8547 + }, + { + "epoch": 127.58, + "learning_rate": 5.284210526315789e-05, + "loss": 0.0004, + "step": 8548 + }, + { + "epoch": 127.59, + "learning_rate": 5.2807017543859646e-05, + "loss": 0.0014, + "step": 8549 + }, + { + "epoch": 127.61, + "learning_rate": 5.27719298245614e-05, + "loss": 0.0004, + "step": 8550 + }, + { + "epoch": 127.62, + "learning_rate": 5.273684210526315e-05, + "loss": 0.0132, + "step": 8551 + }, + { + "epoch": 127.64, + "learning_rate": 5.2701754385964906e-05, + "loss": 0.0012, + "step": 8552 + }, + { + "epoch": 127.65, + "learning_rate": 5.266666666666666e-05, + "loss": 0.1389, + "step": 8553 + }, + { + "epoch": 127.67, + "learning_rate": 5.263157894736841e-05, + "loss": 0.0003, + "step": 8554 + }, + { + "epoch": 127.68, + "learning_rate": 5.2596491228070166e-05, + "loss": 0.0417, + "step": 8555 + }, + { + "epoch": 127.7, + "learning_rate": 5.256140350877193e-05, + "loss": 0.0004, + "step": 8556 + }, + { + "epoch": 127.71, + "learning_rate": 5.2526315789473684e-05, + "loss": 0.0003, + "step": 8557 + }, + { + "epoch": 127.73, + "learning_rate": 5.249122807017544e-05, + "loss": 0.0003, + "step": 8558 + }, + { + "epoch": 127.74, + "learning_rate": 5.245614035087719e-05, + "loss": 0.0005, + "step": 8559 + }, + { + "epoch": 127.76, + "learning_rate": 5.2421052631578944e-05, + "loss": 0.0002, + "step": 8560 + }, + { + "epoch": 127.77, + "learning_rate": 5.23859649122807e-05, + "loss": 0.0005, + "step": 8561 + }, + { + "epoch": 127.79, + "learning_rate": 5.2350877192982455e-05, + "loss": 0.0003, + "step": 8562 + }, + { + "epoch": 127.8, + "learning_rate": 5.2315789473684204e-05, + "loss": 0.0007, + "step": 8563 + }, + { + "epoch": 127.82, + "learning_rate": 5.228070175438596e-05, + "loss": 0.0002, + "step": 8564 + }, + { + "epoch": 127.83, + "learning_rate": 5.2245614035087715e-05, + "loss": 0.0004, + "step": 8565 + }, + { + "epoch": 127.85, + "learning_rate": 5.2210526315789463e-05, + "loss": 0.0015, + "step": 8566 + }, + { + "epoch": 127.86, + "learning_rate": 5.217543859649122e-05, + "loss": 0.0009, + "step": 8567 + }, + { + "epoch": 127.88, + "learning_rate": 5.2140350877192975e-05, + "loss": 0.0003, + "step": 8568 + }, + { + "epoch": 127.89, + "learning_rate": 5.210526315789474e-05, + "loss": 0.3038, + "step": 8569 + }, + { + "epoch": 127.91, + "learning_rate": 5.207017543859649e-05, + "loss": 0.0003, + "step": 8570 + }, + { + "epoch": 127.92, + "learning_rate": 5.203508771929824e-05, + "loss": 0.0012, + "step": 8571 + }, + { + "epoch": 127.94, + "learning_rate": 5.2e-05, + "loss": 0.0003, + "step": 8572 + }, + { + "epoch": 127.95, + "learning_rate": 5.196491228070175e-05, + "loss": 0.0003, + "step": 8573 + }, + { + "epoch": 127.97, + "learning_rate": 5.192982456140351e-05, + "loss": 0.0003, + "step": 8574 + }, + { + "epoch": 127.98, + "learning_rate": 5.189473684210526e-05, + "loss": 0.1019, + "step": 8575 + }, + { + "epoch": 128.0, + "learning_rate": 5.185964912280701e-05, + "loss": 0.0004, + "step": 8576 + }, + { + "epoch": 128.01, + "learning_rate": 5.182456140350877e-05, + "loss": 0.0004, + "step": 8577 + }, + { + "epoch": 128.03, + "learning_rate": 5.1789473684210517e-05, + "loss": 0.0119, + "step": 8578 + }, + { + "epoch": 128.04, + "learning_rate": 5.175438596491227e-05, + "loss": 0.0004, + "step": 8579 + }, + { + "epoch": 128.06, + "learning_rate": 5.171929824561403e-05, + "loss": 0.0018, + "step": 8580 + }, + { + "epoch": 128.07, + "learning_rate": 5.168421052631579e-05, + "loss": 0.078, + "step": 8581 + }, + { + "epoch": 128.09, + "learning_rate": 5.1649122807017546e-05, + "loss": 0.0004, + "step": 8582 + }, + { + "epoch": 128.1, + "learning_rate": 5.1614035087719294e-05, + "loss": 0.0005, + "step": 8583 + }, + { + "epoch": 128.12, + "learning_rate": 5.157894736842105e-05, + "loss": 0.2706, + "step": 8584 + }, + { + "epoch": 128.13, + "learning_rate": 5.1543859649122805e-05, + "loss": 0.0003, + "step": 8585 + }, + { + "epoch": 128.15, + "learning_rate": 5.150877192982456e-05, + "loss": 0.0099, + "step": 8586 + }, + { + "epoch": 128.16, + "learning_rate": 5.147368421052631e-05, + "loss": 0.0003, + "step": 8587 + }, + { + "epoch": 128.18, + "learning_rate": 5.1438596491228065e-05, + "loss": 0.0438, + "step": 8588 + }, + { + "epoch": 128.19, + "learning_rate": 5.140350877192982e-05, + "loss": 0.0003, + "step": 8589 + }, + { + "epoch": 128.21, + "learning_rate": 5.136842105263157e-05, + "loss": 0.0004, + "step": 8590 + }, + { + "epoch": 128.22, + "learning_rate": 5.1333333333333325e-05, + "loss": 0.0004, + "step": 8591 + }, + { + "epoch": 128.24, + "learning_rate": 5.129824561403508e-05, + "loss": 0.0004, + "step": 8592 + }, + { + "epoch": 128.25, + "learning_rate": 5.1263157894736836e-05, + "loss": 0.0004, + "step": 8593 + }, + { + "epoch": 128.27, + "learning_rate": 5.12280701754386e-05, + "loss": 0.0684, + "step": 8594 + }, + { + "epoch": 128.28, + "learning_rate": 5.119298245614035e-05, + "loss": 0.001, + "step": 8595 + }, + { + "epoch": 128.3, + "learning_rate": 5.11578947368421e-05, + "loss": 0.0004, + "step": 8596 + }, + { + "epoch": 128.31, + "learning_rate": 5.112280701754386e-05, + "loss": 0.0004, + "step": 8597 + }, + { + "epoch": 128.33, + "learning_rate": 5.1087719298245614e-05, + "loss": 0.0007, + "step": 8598 + }, + { + "epoch": 128.34, + "learning_rate": 5.105263157894736e-05, + "loss": 0.0036, + "step": 8599 + }, + { + "epoch": 128.36, + "learning_rate": 5.101754385964912e-05, + "loss": 0.0005, + "step": 8600 + }, + { + "epoch": 128.36, + "eval_accuracy": 0.8761625061184533, + "eval_f1": 0.8776023752481238, + "eval_loss": 0.6961445808410645, + "eval_runtime": 345.4659, + "eval_samples_per_second": 11.828, + "eval_steps_per_second": 0.741, + "step": 8600 + }, + { + "epoch": 128.37, + "learning_rate": 5.0982456140350874e-05, + "loss": 0.0066, + "step": 8601 + }, + { + "epoch": 128.39, + "learning_rate": 5.094736842105262e-05, + "loss": 0.0003, + "step": 8602 + }, + { + "epoch": 128.4, + "learning_rate": 5.091228070175438e-05, + "loss": 0.0004, + "step": 8603 + }, + { + "epoch": 128.42, + "learning_rate": 5.0877192982456134e-05, + "loss": 0.0009, + "step": 8604 + }, + { + "epoch": 128.43, + "learning_rate": 5.084210526315789e-05, + "loss": 0.0017, + "step": 8605 + }, + { + "epoch": 128.45, + "learning_rate": 5.080701754385964e-05, + "loss": 0.0003, + "step": 8606 + }, + { + "epoch": 128.46, + "learning_rate": 5.07719298245614e-05, + "loss": 0.0008, + "step": 8607 + }, + { + "epoch": 128.48, + "learning_rate": 5.0736842105263156e-05, + "loss": 0.0005, + "step": 8608 + }, + { + "epoch": 128.49, + "learning_rate": 5.070175438596491e-05, + "loss": 0.0004, + "step": 8609 + }, + { + "epoch": 128.51, + "learning_rate": 5.066666666666666e-05, + "loss": 0.0004, + "step": 8610 + }, + { + "epoch": 128.52, + "learning_rate": 5.0631578947368416e-05, + "loss": 0.0007, + "step": 8611 + }, + { + "epoch": 128.54, + "learning_rate": 5.059649122807017e-05, + "loss": 0.0006, + "step": 8612 + }, + { + "epoch": 128.55, + "learning_rate": 5.056140350877193e-05, + "loss": 0.0003, + "step": 8613 + }, + { + "epoch": 128.57, + "learning_rate": 5.0526315789473676e-05, + "loss": 0.0003, + "step": 8614 + }, + { + "epoch": 128.58, + "learning_rate": 5.049122807017543e-05, + "loss": 0.0002, + "step": 8615 + }, + { + "epoch": 128.59, + "learning_rate": 5.045614035087719e-05, + "loss": 0.0004, + "step": 8616 + }, + { + "epoch": 128.61, + "learning_rate": 5.042105263157894e-05, + "loss": 0.0194, + "step": 8617 + }, + { + "epoch": 128.62, + "learning_rate": 5.038596491228069e-05, + "loss": 0.0003, + "step": 8618 + }, + { + "epoch": 128.64, + "learning_rate": 5.0350877192982454e-05, + "loss": 0.001, + "step": 8619 + }, + { + "epoch": 128.65, + "learning_rate": 5.031578947368421e-05, + "loss": 0.0004, + "step": 8620 + }, + { + "epoch": 128.67, + "learning_rate": 5.0280701754385965e-05, + "loss": 0.0152, + "step": 8621 + }, + { + "epoch": 128.68, + "learning_rate": 5.0245614035087714e-05, + "loss": 0.0003, + "step": 8622 + }, + { + "epoch": 128.7, + "learning_rate": 5.021052631578947e-05, + "loss": 0.0003, + "step": 8623 + }, + { + "epoch": 128.71, + "learning_rate": 5.0175438596491225e-05, + "loss": 0.0004, + "step": 8624 + }, + { + "epoch": 128.73, + "learning_rate": 5.014035087719298e-05, + "loss": 0.0003, + "step": 8625 + }, + { + "epoch": 128.74, + "learning_rate": 5.010526315789473e-05, + "loss": 0.0013, + "step": 8626 + }, + { + "epoch": 128.76, + "learning_rate": 5.0070175438596485e-05, + "loss": 0.0003, + "step": 8627 + }, + { + "epoch": 128.77, + "learning_rate": 5.003508771929824e-05, + "loss": 0.0002, + "step": 8628 + }, + { + "epoch": 128.79, + "learning_rate": 4.9999999999999996e-05, + "loss": 0.0003, + "step": 8629 + }, + { + "epoch": 128.8, + "learning_rate": 4.9964912280701744e-05, + "loss": 0.0003, + "step": 8630 + }, + { + "epoch": 128.82, + "learning_rate": 4.99298245614035e-05, + "loss": 0.0007, + "step": 8631 + }, + { + "epoch": 128.83, + "learning_rate": 4.989473684210526e-05, + "loss": 0.0075, + "step": 8632 + }, + { + "epoch": 128.85, + "learning_rate": 4.985964912280702e-05, + "loss": 0.0009, + "step": 8633 + }, + { + "epoch": 128.86, + "learning_rate": 4.982456140350877e-05, + "loss": 0.0003, + "step": 8634 + }, + { + "epoch": 128.88, + "learning_rate": 4.978947368421052e-05, + "loss": 0.0003, + "step": 8635 + }, + { + "epoch": 128.89, + "learning_rate": 4.975438596491228e-05, + "loss": 0.0004, + "step": 8636 + }, + { + "epoch": 128.91, + "learning_rate": 4.971929824561403e-05, + "loss": 0.0004, + "step": 8637 + }, + { + "epoch": 128.92, + "learning_rate": 4.968421052631578e-05, + "loss": 0.0004, + "step": 8638 + }, + { + "epoch": 128.94, + "learning_rate": 4.964912280701754e-05, + "loss": 0.0003, + "step": 8639 + }, + { + "epoch": 128.95, + "learning_rate": 4.961403508771929e-05, + "loss": 0.0004, + "step": 8640 + }, + { + "epoch": 128.97, + "learning_rate": 4.957894736842105e-05, + "loss": 0.0004, + "step": 8641 + }, + { + "epoch": 128.98, + "learning_rate": 4.95438596491228e-05, + "loss": 0.0004, + "step": 8642 + }, + { + "epoch": 129.0, + "learning_rate": 4.950877192982455e-05, + "loss": 0.0004, + "step": 8643 + }, + { + "epoch": 129.01, + "learning_rate": 4.9473684210526315e-05, + "loss": 0.0003, + "step": 8644 + }, + { + "epoch": 129.03, + "learning_rate": 4.943859649122807e-05, + "loss": 0.0003, + "step": 8645 + }, + { + "epoch": 129.04, + "learning_rate": 4.940350877192982e-05, + "loss": 0.0002, + "step": 8646 + }, + { + "epoch": 129.06, + "learning_rate": 4.9368421052631575e-05, + "loss": 0.0003, + "step": 8647 + }, + { + "epoch": 129.07, + "learning_rate": 4.933333333333333e-05, + "loss": 0.0957, + "step": 8648 + }, + { + "epoch": 129.09, + "learning_rate": 4.9298245614035086e-05, + "loss": 0.0003, + "step": 8649 + }, + { + "epoch": 129.1, + "learning_rate": 4.9263157894736835e-05, + "loss": 0.0003, + "step": 8650 + }, + { + "epoch": 129.12, + "learning_rate": 4.922807017543859e-05, + "loss": 0.0009, + "step": 8651 + }, + { + "epoch": 129.13, + "learning_rate": 4.9192982456140346e-05, + "loss": 0.0003, + "step": 8652 + }, + { + "epoch": 129.15, + "learning_rate": 4.91578947368421e-05, + "loss": 0.0002, + "step": 8653 + }, + { + "epoch": 129.16, + "learning_rate": 4.912280701754385e-05, + "loss": 0.0002, + "step": 8654 + }, + { + "epoch": 129.18, + "learning_rate": 4.9087719298245606e-05, + "loss": 0.0002, + "step": 8655 + }, + { + "epoch": 129.19, + "learning_rate": 4.905263157894736e-05, + "loss": 0.0004, + "step": 8656 + }, + { + "epoch": 129.21, + "learning_rate": 4.9017543859649124e-05, + "loss": 0.0003, + "step": 8657 + }, + { + "epoch": 129.22, + "learning_rate": 4.898245614035087e-05, + "loss": 0.0003, + "step": 8658 + }, + { + "epoch": 129.24, + "learning_rate": 4.894736842105263e-05, + "loss": 0.0002, + "step": 8659 + }, + { + "epoch": 129.25, + "learning_rate": 4.8912280701754384e-05, + "loss": 0.0003, + "step": 8660 + }, + { + "epoch": 129.27, + "learning_rate": 4.887719298245614e-05, + "loss": 0.0005, + "step": 8661 + }, + { + "epoch": 129.28, + "learning_rate": 4.884210526315789e-05, + "loss": 0.1397, + "step": 8662 + }, + { + "epoch": 129.3, + "learning_rate": 4.8807017543859644e-05, + "loss": 0.0002, + "step": 8663 + }, + { + "epoch": 129.31, + "learning_rate": 4.87719298245614e-05, + "loss": 0.001, + "step": 8664 + }, + { + "epoch": 129.33, + "learning_rate": 4.8736842105263155e-05, + "loss": 0.0003, + "step": 8665 + }, + { + "epoch": 129.34, + "learning_rate": 4.8701754385964904e-05, + "loss": 0.0003, + "step": 8666 + }, + { + "epoch": 129.36, + "learning_rate": 4.866666666666666e-05, + "loss": 0.0002, + "step": 8667 + }, + { + "epoch": 129.37, + "learning_rate": 4.8631578947368415e-05, + "loss": 0.0002, + "step": 8668 + }, + { + "epoch": 129.39, + "learning_rate": 4.859649122807018e-05, + "loss": 0.0004, + "step": 8669 + }, + { + "epoch": 129.4, + "learning_rate": 4.8561403508771926e-05, + "loss": 0.0003, + "step": 8670 + }, + { + "epoch": 129.42, + "learning_rate": 4.852631578947368e-05, + "loss": 0.0003, + "step": 8671 + }, + { + "epoch": 129.43, + "learning_rate": 4.849122807017544e-05, + "loss": 0.0003, + "step": 8672 + }, + { + "epoch": 129.45, + "learning_rate": 4.845614035087719e-05, + "loss": 0.0002, + "step": 8673 + }, + { + "epoch": 129.46, + "learning_rate": 4.842105263157894e-05, + "loss": 0.0004, + "step": 8674 + }, + { + "epoch": 129.48, + "learning_rate": 4.83859649122807e-05, + "loss": 0.0006, + "step": 8675 + }, + { + "epoch": 129.49, + "learning_rate": 4.835087719298245e-05, + "loss": 0.0077, + "step": 8676 + }, + { + "epoch": 129.51, + "learning_rate": 4.831578947368421e-05, + "loss": 0.0002, + "step": 8677 + }, + { + "epoch": 129.52, + "learning_rate": 4.828070175438596e-05, + "loss": 0.0004, + "step": 8678 + }, + { + "epoch": 129.54, + "learning_rate": 4.824561403508771e-05, + "loss": 0.0003, + "step": 8679 + }, + { + "epoch": 129.55, + "learning_rate": 4.821052631578947e-05, + "loss": 0.0004, + "step": 8680 + }, + { + "epoch": 129.57, + "learning_rate": 4.817543859649122e-05, + "loss": 0.0004, + "step": 8681 + }, + { + "epoch": 129.58, + "learning_rate": 4.814035087719298e-05, + "loss": 0.0004, + "step": 8682 + }, + { + "epoch": 129.59, + "learning_rate": 4.8105263157894735e-05, + "loss": 0.1145, + "step": 8683 + }, + { + "epoch": 129.61, + "learning_rate": 4.807017543859649e-05, + "loss": 0.0003, + "step": 8684 + }, + { + "epoch": 129.62, + "learning_rate": 4.8035087719298246e-05, + "loss": 0.1198, + "step": 8685 + }, + { + "epoch": 129.64, + "learning_rate": 4.7999999999999994e-05, + "loss": 0.0003, + "step": 8686 + }, + { + "epoch": 129.65, + "learning_rate": 4.796491228070175e-05, + "loss": 0.0002, + "step": 8687 + }, + { + "epoch": 129.67, + "learning_rate": 4.7929824561403506e-05, + "loss": 0.0003, + "step": 8688 + }, + { + "epoch": 129.68, + "learning_rate": 4.789473684210526e-05, + "loss": 0.0002, + "step": 8689 + }, + { + "epoch": 129.7, + "learning_rate": 4.785964912280701e-05, + "loss": 0.0005, + "step": 8690 + }, + { + "epoch": 129.71, + "learning_rate": 4.7824561403508765e-05, + "loss": 0.0006, + "step": 8691 + }, + { + "epoch": 129.73, + "learning_rate": 4.778947368421052e-05, + "loss": 0.0003, + "step": 8692 + }, + { + "epoch": 129.74, + "learning_rate": 4.775438596491227e-05, + "loss": 0.0003, + "step": 8693 + }, + { + "epoch": 129.76, + "learning_rate": 4.771929824561403e-05, + "loss": 0.0003, + "step": 8694 + }, + { + "epoch": 129.77, + "learning_rate": 4.768421052631579e-05, + "loss": 0.0003, + "step": 8695 + }, + { + "epoch": 129.79, + "learning_rate": 4.764912280701754e-05, + "loss": 0.0003, + "step": 8696 + }, + { + "epoch": 129.8, + "learning_rate": 4.76140350877193e-05, + "loss": 0.03, + "step": 8697 + }, + { + "epoch": 129.82, + "learning_rate": 4.757894736842105e-05, + "loss": 0.0003, + "step": 8698 + }, + { + "epoch": 129.83, + "learning_rate": 4.75438596491228e-05, + "loss": 0.0003, + "step": 8699 + }, + { + "epoch": 129.85, + "learning_rate": 4.750877192982456e-05, + "loss": 0.0003, + "step": 8700 + }, + { + "epoch": 129.86, + "learning_rate": 4.7473684210526314e-05, + "loss": 0.0004, + "step": 8701 + }, + { + "epoch": 129.88, + "learning_rate": 4.743859649122806e-05, + "loss": 0.0003, + "step": 8702 + }, + { + "epoch": 129.89, + "learning_rate": 4.740350877192982e-05, + "loss": 0.0002, + "step": 8703 + }, + { + "epoch": 129.91, + "learning_rate": 4.7368421052631574e-05, + "loss": 0.0003, + "step": 8704 + }, + { + "epoch": 129.92, + "learning_rate": 4.733333333333332e-05, + "loss": 0.0004, + "step": 8705 + }, + { + "epoch": 129.94, + "learning_rate": 4.729824561403508e-05, + "loss": 0.0012, + "step": 8706 + }, + { + "epoch": 129.95, + "learning_rate": 4.726315789473684e-05, + "loss": 0.0003, + "step": 8707 + }, + { + "epoch": 129.97, + "learning_rate": 4.7228070175438596e-05, + "loss": 0.0024, + "step": 8708 + }, + { + "epoch": 129.98, + "learning_rate": 4.719298245614035e-05, + "loss": 0.0003, + "step": 8709 + }, + { + "epoch": 130.0, + "learning_rate": 4.71578947368421e-05, + "loss": 0.0003, + "step": 8710 + }, + { + "epoch": 130.01, + "learning_rate": 4.7122807017543856e-05, + "loss": 0.0006, + "step": 8711 + }, + { + "epoch": 130.03, + "learning_rate": 4.708771929824561e-05, + "loss": 0.0012, + "step": 8712 + }, + { + "epoch": 130.04, + "learning_rate": 4.705263157894737e-05, + "loss": 0.0002, + "step": 8713 + }, + { + "epoch": 130.06, + "learning_rate": 4.7017543859649116e-05, + "loss": 0.0005, + "step": 8714 + }, + { + "epoch": 130.07, + "learning_rate": 4.698245614035087e-05, + "loss": 0.0003, + "step": 8715 + }, + { + "epoch": 130.09, + "learning_rate": 4.694736842105263e-05, + "loss": 0.0003, + "step": 8716 + }, + { + "epoch": 130.1, + "learning_rate": 4.6912280701754376e-05, + "loss": 0.0004, + "step": 8717 + }, + { + "epoch": 130.12, + "learning_rate": 4.687719298245613e-05, + "loss": 0.0058, + "step": 8718 + }, + { + "epoch": 130.13, + "learning_rate": 4.6842105263157894e-05, + "loss": 0.0002, + "step": 8719 + }, + { + "epoch": 130.15, + "learning_rate": 4.680701754385965e-05, + "loss": 0.0009, + "step": 8720 + }, + { + "epoch": 130.16, + "learning_rate": 4.6771929824561405e-05, + "loss": 0.0003, + "step": 8721 + }, + { + "epoch": 130.18, + "learning_rate": 4.6736842105263154e-05, + "loss": 0.0002, + "step": 8722 + }, + { + "epoch": 130.19, + "learning_rate": 4.670175438596491e-05, + "loss": 0.1649, + "step": 8723 + }, + { + "epoch": 130.21, + "learning_rate": 4.6666666666666665e-05, + "loss": 0.0003, + "step": 8724 + }, + { + "epoch": 130.22, + "learning_rate": 4.663157894736842e-05, + "loss": 0.0006, + "step": 8725 + }, + { + "epoch": 130.24, + "learning_rate": 4.659649122807017e-05, + "loss": 0.0003, + "step": 8726 + }, + { + "epoch": 130.25, + "learning_rate": 4.6561403508771925e-05, + "loss": 0.0003, + "step": 8727 + }, + { + "epoch": 130.27, + "learning_rate": 4.652631578947368e-05, + "loss": 0.0002, + "step": 8728 + }, + { + "epoch": 130.28, + "learning_rate": 4.649122807017543e-05, + "loss": 0.0008, + "step": 8729 + }, + { + "epoch": 130.3, + "learning_rate": 4.6456140350877185e-05, + "loss": 0.0003, + "step": 8730 + }, + { + "epoch": 130.31, + "learning_rate": 4.642105263157894e-05, + "loss": 0.0005, + "step": 8731 + }, + { + "epoch": 130.33, + "learning_rate": 4.63859649122807e-05, + "loss": 0.0003, + "step": 8732 + }, + { + "epoch": 130.34, + "learning_rate": 4.635087719298246e-05, + "loss": 0.0004, + "step": 8733 + }, + { + "epoch": 130.36, + "learning_rate": 4.631578947368421e-05, + "loss": 0.2316, + "step": 8734 + }, + { + "epoch": 130.37, + "learning_rate": 4.628070175438596e-05, + "loss": 0.0004, + "step": 8735 + }, + { + "epoch": 130.39, + "learning_rate": 4.624561403508772e-05, + "loss": 0.0003, + "step": 8736 + }, + { + "epoch": 130.4, + "learning_rate": 4.621052631578947e-05, + "loss": 0.0002, + "step": 8737 + }, + { + "epoch": 130.42, + "learning_rate": 4.617543859649122e-05, + "loss": 0.0002, + "step": 8738 + }, + { + "epoch": 130.43, + "learning_rate": 4.614035087719298e-05, + "loss": 0.0881, + "step": 8739 + }, + { + "epoch": 130.45, + "learning_rate": 4.610526315789473e-05, + "loss": 0.0137, + "step": 8740 + }, + { + "epoch": 130.46, + "learning_rate": 4.607017543859648e-05, + "loss": 0.0003, + "step": 8741 + }, + { + "epoch": 130.48, + "learning_rate": 4.603508771929824e-05, + "loss": 0.0003, + "step": 8742 + }, + { + "epoch": 130.49, + "learning_rate": 4.599999999999999e-05, + "loss": 0.0003, + "step": 8743 + }, + { + "epoch": 130.51, + "learning_rate": 4.5964912280701756e-05, + "loss": 0.0003, + "step": 8744 + }, + { + "epoch": 130.52, + "learning_rate": 4.592982456140351e-05, + "loss": 0.0003, + "step": 8745 + }, + { + "epoch": 130.54, + "learning_rate": 4.589473684210526e-05, + "loss": 0.0013, + "step": 8746 + }, + { + "epoch": 130.55, + "learning_rate": 4.5859649122807015e-05, + "loss": 0.0004, + "step": 8747 + }, + { + "epoch": 130.57, + "learning_rate": 4.582456140350877e-05, + "loss": 0.0003, + "step": 8748 + }, + { + "epoch": 130.58, + "learning_rate": 4.578947368421052e-05, + "loss": 0.0011, + "step": 8749 + }, + { + "epoch": 130.59, + "learning_rate": 4.5754385964912275e-05, + "loss": 0.0002, + "step": 8750 + }, + { + "epoch": 130.61, + "learning_rate": 4.571929824561403e-05, + "loss": 0.0004, + "step": 8751 + }, + { + "epoch": 130.62, + "learning_rate": 4.5684210526315786e-05, + "loss": 0.0002, + "step": 8752 + }, + { + "epoch": 130.64, + "learning_rate": 4.5649122807017535e-05, + "loss": 0.0005, + "step": 8753 + }, + { + "epoch": 130.65, + "learning_rate": 4.561403508771929e-05, + "loss": 0.0003, + "step": 8754 + }, + { + "epoch": 130.67, + "learning_rate": 4.5578947368421046e-05, + "loss": 0.0003, + "step": 8755 + }, + { + "epoch": 130.68, + "learning_rate": 4.55438596491228e-05, + "loss": 0.0003, + "step": 8756 + }, + { + "epoch": 130.7, + "learning_rate": 4.5508771929824564e-05, + "loss": 0.0004, + "step": 8757 + }, + { + "epoch": 130.71, + "learning_rate": 4.547368421052631e-05, + "loss": 0.0004, + "step": 8758 + }, + { + "epoch": 130.73, + "learning_rate": 4.543859649122807e-05, + "loss": 0.0003, + "step": 8759 + }, + { + "epoch": 130.74, + "learning_rate": 4.5403508771929824e-05, + "loss": 0.0366, + "step": 8760 + }, + { + "epoch": 130.76, + "learning_rate": 4.536842105263157e-05, + "loss": 0.0002, + "step": 8761 + }, + { + "epoch": 130.77, + "learning_rate": 4.533333333333333e-05, + "loss": 0.0003, + "step": 8762 + }, + { + "epoch": 130.79, + "learning_rate": 4.5298245614035084e-05, + "loss": 0.0003, + "step": 8763 + }, + { + "epoch": 130.8, + "learning_rate": 4.526315789473684e-05, + "loss": 0.0003, + "step": 8764 + }, + { + "epoch": 130.82, + "learning_rate": 4.522807017543859e-05, + "loss": 0.0002, + "step": 8765 + }, + { + "epoch": 130.83, + "learning_rate": 4.5192982456140344e-05, + "loss": 0.0002, + "step": 8766 + }, + { + "epoch": 130.85, + "learning_rate": 4.51578947368421e-05, + "loss": 0.0065, + "step": 8767 + }, + { + "epoch": 130.86, + "learning_rate": 4.5122807017543855e-05, + "loss": 0.0002, + "step": 8768 + }, + { + "epoch": 130.88, + "learning_rate": 4.508771929824562e-05, + "loss": 0.0002, + "step": 8769 + }, + { + "epoch": 130.89, + "learning_rate": 4.5052631578947366e-05, + "loss": 0.0002, + "step": 8770 + }, + { + "epoch": 130.91, + "learning_rate": 4.501754385964912e-05, + "loss": 0.0002, + "step": 8771 + }, + { + "epoch": 130.92, + "learning_rate": 4.498245614035088e-05, + "loss": 0.0002, + "step": 8772 + }, + { + "epoch": 130.94, + "learning_rate": 4.4947368421052626e-05, + "loss": 0.0002, + "step": 8773 + }, + { + "epoch": 130.95, + "learning_rate": 4.491228070175438e-05, + "loss": 0.0003, + "step": 8774 + }, + { + "epoch": 130.97, + "learning_rate": 4.487719298245614e-05, + "loss": 0.0003, + "step": 8775 + }, + { + "epoch": 130.98, + "learning_rate": 4.484210526315789e-05, + "loss": 0.0007, + "step": 8776 + }, + { + "epoch": 131.0, + "learning_rate": 4.480701754385964e-05, + "loss": 0.0003, + "step": 8777 + }, + { + "epoch": 131.01, + "learning_rate": 4.47719298245614e-05, + "loss": 0.0003, + "step": 8778 + }, + { + "epoch": 131.03, + "learning_rate": 4.473684210526315e-05, + "loss": 0.0042, + "step": 8779 + }, + { + "epoch": 131.04, + "learning_rate": 4.470175438596491e-05, + "loss": 0.0002, + "step": 8780 + }, + { + "epoch": 131.06, + "learning_rate": 4.466666666666666e-05, + "loss": 0.0003, + "step": 8781 + }, + { + "epoch": 131.07, + "learning_rate": 4.463157894736842e-05, + "loss": 0.0002, + "step": 8782 + }, + { + "epoch": 131.09, + "learning_rate": 4.4596491228070175e-05, + "loss": 0.0002, + "step": 8783 + }, + { + "epoch": 131.1, + "learning_rate": 4.456140350877193e-05, + "loss": 0.0003, + "step": 8784 + }, + { + "epoch": 131.12, + "learning_rate": 4.452631578947368e-05, + "loss": 0.0004, + "step": 8785 + }, + { + "epoch": 131.13, + "learning_rate": 4.4491228070175435e-05, + "loss": 0.0002, + "step": 8786 + }, + { + "epoch": 131.15, + "learning_rate": 4.445614035087719e-05, + "loss": 0.0003, + "step": 8787 + }, + { + "epoch": 131.16, + "learning_rate": 4.4421052631578946e-05, + "loss": 0.0003, + "step": 8788 + }, + { + "epoch": 131.18, + "learning_rate": 4.4385964912280695e-05, + "loss": 0.0003, + "step": 8789 + }, + { + "epoch": 131.19, + "learning_rate": 4.435087719298245e-05, + "loss": 0.0013, + "step": 8790 + }, + { + "epoch": 131.21, + "learning_rate": 4.4315789473684206e-05, + "loss": 0.0467, + "step": 8791 + }, + { + "epoch": 131.22, + "learning_rate": 4.428070175438596e-05, + "loss": 0.0002, + "step": 8792 + }, + { + "epoch": 131.24, + "learning_rate": 4.424561403508771e-05, + "loss": 0.0002, + "step": 8793 + }, + { + "epoch": 131.25, + "learning_rate": 4.4210526315789466e-05, + "loss": 0.0003, + "step": 8794 + }, + { + "epoch": 131.27, + "learning_rate": 4.417543859649123e-05, + "loss": 0.0003, + "step": 8795 + }, + { + "epoch": 131.28, + "learning_rate": 4.4140350877192983e-05, + "loss": 0.0009, + "step": 8796 + }, + { + "epoch": 131.3, + "learning_rate": 4.410526315789473e-05, + "loss": 0.0005, + "step": 8797 + }, + { + "epoch": 131.31, + "learning_rate": 4.407017543859649e-05, + "loss": 0.0003, + "step": 8798 + }, + { + "epoch": 131.33, + "learning_rate": 4.403508771929824e-05, + "loss": 0.0019, + "step": 8799 + }, + { + "epoch": 131.34, + "learning_rate": 4.4e-05, + "loss": 0.0003, + "step": 8800 + }, + { + "epoch": 131.34, + "eval_accuracy": 0.8881546744982868, + "eval_f1": 0.887829841036129, + "eval_loss": 0.5989698171615601, + "eval_runtime": 343.81, + "eval_samples_per_second": 11.884, + "eval_steps_per_second": 0.745, + "step": 8800 + }, + { + "epoch": 131.36, + "learning_rate": 4.396491228070175e-05, + "loss": 0.0003, + "step": 8801 + }, + { + "epoch": 131.37, + "learning_rate": 4.39298245614035e-05, + "loss": 0.0002, + "step": 8802 + }, + { + "epoch": 131.39, + "learning_rate": 4.389473684210526e-05, + "loss": 0.0002, + "step": 8803 + }, + { + "epoch": 131.4, + "learning_rate": 4.3859649122807014e-05, + "loss": 0.079, + "step": 8804 + }, + { + "epoch": 131.42, + "learning_rate": 4.382456140350876e-05, + "loss": 0.0002, + "step": 8805 + }, + { + "epoch": 131.43, + "learning_rate": 4.378947368421052e-05, + "loss": 0.0009, + "step": 8806 + }, + { + "epoch": 131.45, + "learning_rate": 4.375438596491228e-05, + "loss": 0.0003, + "step": 8807 + }, + { + "epoch": 131.46, + "learning_rate": 4.3719298245614037e-05, + "loss": 0.0005, + "step": 8808 + }, + { + "epoch": 131.48, + "learning_rate": 4.3684210526315785e-05, + "loss": 0.0002, + "step": 8809 + }, + { + "epoch": 131.49, + "learning_rate": 4.364912280701754e-05, + "loss": 0.0003, + "step": 8810 + }, + { + "epoch": 131.51, + "learning_rate": 4.3614035087719296e-05, + "loss": 0.003, + "step": 8811 + }, + { + "epoch": 131.52, + "learning_rate": 4.357894736842105e-05, + "loss": 0.0004, + "step": 8812 + }, + { + "epoch": 131.54, + "learning_rate": 4.35438596491228e-05, + "loss": 0.0002, + "step": 8813 + }, + { + "epoch": 131.55, + "learning_rate": 4.3508771929824556e-05, + "loss": 0.0002, + "step": 8814 + }, + { + "epoch": 131.57, + "learning_rate": 4.347368421052631e-05, + "loss": 0.0003, + "step": 8815 + }, + { + "epoch": 131.58, + "learning_rate": 4.343859649122807e-05, + "loss": 0.0003, + "step": 8816 + }, + { + "epoch": 131.59, + "learning_rate": 4.3403508771929816e-05, + "loss": 0.0004, + "step": 8817 + }, + { + "epoch": 131.61, + "learning_rate": 4.336842105263157e-05, + "loss": 0.0003, + "step": 8818 + }, + { + "epoch": 131.62, + "learning_rate": 4.333333333333333e-05, + "loss": 0.0003, + "step": 8819 + }, + { + "epoch": 131.64, + "learning_rate": 4.329824561403509e-05, + "loss": 0.0004, + "step": 8820 + }, + { + "epoch": 131.65, + "learning_rate": 4.326315789473684e-05, + "loss": 0.0003, + "step": 8821 + }, + { + "epoch": 131.67, + "learning_rate": 4.3228070175438594e-05, + "loss": 0.0002, + "step": 8822 + }, + { + "epoch": 131.68, + "learning_rate": 4.319298245614035e-05, + "loss": 0.009, + "step": 8823 + }, + { + "epoch": 131.7, + "learning_rate": 4.3157894736842105e-05, + "loss": 0.0358, + "step": 8824 + }, + { + "epoch": 131.71, + "learning_rate": 4.3122807017543854e-05, + "loss": 0.0002, + "step": 8825 + }, + { + "epoch": 131.73, + "learning_rate": 4.308771929824561e-05, + "loss": 0.0003, + "step": 8826 + }, + { + "epoch": 131.74, + "learning_rate": 4.3052631578947365e-05, + "loss": 0.1321, + "step": 8827 + }, + { + "epoch": 131.76, + "learning_rate": 4.301754385964912e-05, + "loss": 0.0002, + "step": 8828 + }, + { + "epoch": 131.77, + "learning_rate": 4.298245614035087e-05, + "loss": 0.0002, + "step": 8829 + }, + { + "epoch": 131.79, + "learning_rate": 4.2947368421052625e-05, + "loss": 0.0002, + "step": 8830 + }, + { + "epoch": 131.8, + "learning_rate": 4.291228070175438e-05, + "loss": 0.0002, + "step": 8831 + }, + { + "epoch": 131.82, + "learning_rate": 4.287719298245614e-05, + "loss": 0.0039, + "step": 8832 + }, + { + "epoch": 131.83, + "learning_rate": 4.284210526315789e-05, + "loss": 0.0006, + "step": 8833 + }, + { + "epoch": 131.85, + "learning_rate": 4.280701754385965e-05, + "loss": 0.0003, + "step": 8834 + }, + { + "epoch": 131.86, + "learning_rate": 4.27719298245614e-05, + "loss": 0.0007, + "step": 8835 + }, + { + "epoch": 131.88, + "learning_rate": 4.273684210526316e-05, + "loss": 0.0002, + "step": 8836 + }, + { + "epoch": 131.89, + "learning_rate": 4.270175438596491e-05, + "loss": 0.0002, + "step": 8837 + }, + { + "epoch": 131.91, + "learning_rate": 4.266666666666666e-05, + "loss": 0.0003, + "step": 8838 + }, + { + "epoch": 131.92, + "learning_rate": 4.263157894736842e-05, + "loss": 0.0003, + "step": 8839 + }, + { + "epoch": 131.94, + "learning_rate": 4.2596491228070174e-05, + "loss": 0.0007, + "step": 8840 + }, + { + "epoch": 131.95, + "learning_rate": 4.256140350877192e-05, + "loss": 0.0003, + "step": 8841 + }, + { + "epoch": 131.97, + "learning_rate": 4.252631578947368e-05, + "loss": 0.0002, + "step": 8842 + }, + { + "epoch": 131.98, + "learning_rate": 4.2491228070175433e-05, + "loss": 0.0002, + "step": 8843 + }, + { + "epoch": 132.0, + "learning_rate": 4.245614035087718e-05, + "loss": 0.0003, + "step": 8844 + }, + { + "epoch": 132.01, + "learning_rate": 4.2421052631578945e-05, + "loss": 0.0459, + "step": 8845 + }, + { + "epoch": 132.03, + "learning_rate": 4.23859649122807e-05, + "loss": 0.0036, + "step": 8846 + }, + { + "epoch": 132.04, + "learning_rate": 4.2350877192982456e-05, + "loss": 0.2391, + "step": 8847 + }, + { + "epoch": 132.06, + "learning_rate": 4.231578947368421e-05, + "loss": 0.0002, + "step": 8848 + }, + { + "epoch": 132.07, + "learning_rate": 4.228070175438596e-05, + "loss": 0.0002, + "step": 8849 + }, + { + "epoch": 132.09, + "learning_rate": 4.2245614035087716e-05, + "loss": 0.0002, + "step": 8850 + }, + { + "epoch": 132.1, + "learning_rate": 4.221052631578947e-05, + "loss": 0.0003, + "step": 8851 + }, + { + "epoch": 132.12, + "learning_rate": 4.217543859649123e-05, + "loss": 0.0002, + "step": 8852 + }, + { + "epoch": 132.13, + "learning_rate": 4.2140350877192975e-05, + "loss": 0.0003, + "step": 8853 + }, + { + "epoch": 132.15, + "learning_rate": 4.210526315789473e-05, + "loss": 0.0002, + "step": 8854 + }, + { + "epoch": 132.16, + "learning_rate": 4.2070175438596487e-05, + "loss": 0.0003, + "step": 8855 + }, + { + "epoch": 132.18, + "learning_rate": 4.2035087719298235e-05, + "loss": 0.0002, + "step": 8856 + }, + { + "epoch": 132.19, + "learning_rate": 4.2e-05, + "loss": 0.0002, + "step": 8857 + }, + { + "epoch": 132.21, + "learning_rate": 4.196491228070175e-05, + "loss": 0.0002, + "step": 8858 + }, + { + "epoch": 132.22, + "learning_rate": 4.192982456140351e-05, + "loss": 0.0742, + "step": 8859 + }, + { + "epoch": 132.24, + "learning_rate": 4.1894736842105264e-05, + "loss": 0.0009, + "step": 8860 + }, + { + "epoch": 132.25, + "learning_rate": 4.185964912280701e-05, + "loss": 0.0268, + "step": 8861 + }, + { + "epoch": 132.27, + "learning_rate": 4.182456140350877e-05, + "loss": 0.0003, + "step": 8862 + }, + { + "epoch": 132.28, + "learning_rate": 4.1789473684210524e-05, + "loss": 0.0003, + "step": 8863 + }, + { + "epoch": 132.3, + "learning_rate": 4.175438596491228e-05, + "loss": 0.0065, + "step": 8864 + }, + { + "epoch": 132.31, + "learning_rate": 4.171929824561403e-05, + "loss": 0.0002, + "step": 8865 + }, + { + "epoch": 132.33, + "learning_rate": 4.1684210526315784e-05, + "loss": 0.0004, + "step": 8866 + }, + { + "epoch": 132.34, + "learning_rate": 4.164912280701754e-05, + "loss": 0.0506, + "step": 8867 + }, + { + "epoch": 132.36, + "learning_rate": 4.161403508771929e-05, + "loss": 0.0004, + "step": 8868 + }, + { + "epoch": 132.37, + "learning_rate": 4.1578947368421044e-05, + "loss": 0.0002, + "step": 8869 + }, + { + "epoch": 132.39, + "learning_rate": 4.1543859649122806e-05, + "loss": 0.0002, + "step": 8870 + }, + { + "epoch": 132.4, + "learning_rate": 4.150877192982456e-05, + "loss": 0.0006, + "step": 8871 + }, + { + "epoch": 132.42, + "learning_rate": 4.147368421052632e-05, + "loss": 0.0002, + "step": 8872 + }, + { + "epoch": 132.43, + "learning_rate": 4.1438596491228066e-05, + "loss": 0.0002, + "step": 8873 + }, + { + "epoch": 132.45, + "learning_rate": 4.140350877192982e-05, + "loss": 0.0003, + "step": 8874 + }, + { + "epoch": 132.46, + "learning_rate": 4.136842105263158e-05, + "loss": 0.0003, + "step": 8875 + }, + { + "epoch": 132.48, + "learning_rate": 4.1333333333333326e-05, + "loss": 0.0238, + "step": 8876 + }, + { + "epoch": 132.49, + "learning_rate": 4.129824561403508e-05, + "loss": 0.0003, + "step": 8877 + }, + { + "epoch": 132.51, + "learning_rate": 4.126315789473684e-05, + "loss": 0.0002, + "step": 8878 + }, + { + "epoch": 132.52, + "learning_rate": 4.122807017543859e-05, + "loss": 0.0006, + "step": 8879 + }, + { + "epoch": 132.54, + "learning_rate": 4.119298245614034e-05, + "loss": 0.0003, + "step": 8880 + }, + { + "epoch": 132.55, + "learning_rate": 4.11578947368421e-05, + "loss": 0.0003, + "step": 8881 + }, + { + "epoch": 132.57, + "learning_rate": 4.112280701754386e-05, + "loss": 0.0003, + "step": 8882 + }, + { + "epoch": 132.58, + "learning_rate": 4.1087719298245615e-05, + "loss": 0.0002, + "step": 8883 + }, + { + "epoch": 132.59, + "learning_rate": 4.105263157894737e-05, + "loss": 0.0003, + "step": 8884 + }, + { + "epoch": 132.61, + "learning_rate": 4.101754385964912e-05, + "loss": 0.0002, + "step": 8885 + }, + { + "epoch": 132.62, + "learning_rate": 4.0982456140350875e-05, + "loss": 0.0002, + "step": 8886 + }, + { + "epoch": 132.64, + "learning_rate": 4.094736842105263e-05, + "loss": 0.0003, + "step": 8887 + }, + { + "epoch": 132.65, + "learning_rate": 4.091228070175438e-05, + "loss": 0.0002, + "step": 8888 + }, + { + "epoch": 132.67, + "learning_rate": 4.0877192982456135e-05, + "loss": 0.0002, + "step": 8889 + }, + { + "epoch": 132.68, + "learning_rate": 4.084210526315789e-05, + "loss": 0.0003, + "step": 8890 + }, + { + "epoch": 132.7, + "learning_rate": 4.0807017543859646e-05, + "loss": 0.0003, + "step": 8891 + }, + { + "epoch": 132.71, + "learning_rate": 4.0771929824561395e-05, + "loss": 0.0002, + "step": 8892 + }, + { + "epoch": 132.73, + "learning_rate": 4.073684210526315e-05, + "loss": 0.0002, + "step": 8893 + }, + { + "epoch": 132.74, + "learning_rate": 4.0701754385964906e-05, + "loss": 0.0003, + "step": 8894 + }, + { + "epoch": 132.76, + "learning_rate": 4.066666666666667e-05, + "loss": 0.0003, + "step": 8895 + }, + { + "epoch": 132.77, + "learning_rate": 4.0631578947368424e-05, + "loss": 0.0002, + "step": 8896 + }, + { + "epoch": 132.79, + "learning_rate": 4.059649122807017e-05, + "loss": 0.0002, + "step": 8897 + }, + { + "epoch": 132.8, + "learning_rate": 4.056140350877193e-05, + "loss": 0.0062, + "step": 8898 + }, + { + "epoch": 132.82, + "learning_rate": 4.0526315789473684e-05, + "loss": 0.0178, + "step": 8899 + }, + { + "epoch": 132.83, + "learning_rate": 4.049122807017543e-05, + "loss": 0.0005, + "step": 8900 + }, + { + "epoch": 132.85, + "learning_rate": 4.045614035087719e-05, + "loss": 0.0003, + "step": 8901 + }, + { + "epoch": 132.86, + "learning_rate": 4.0421052631578943e-05, + "loss": 0.0003, + "step": 8902 + }, + { + "epoch": 132.88, + "learning_rate": 4.03859649122807e-05, + "loss": 0.0002, + "step": 8903 + }, + { + "epoch": 132.89, + "learning_rate": 4.035087719298245e-05, + "loss": 0.0002, + "step": 8904 + }, + { + "epoch": 132.91, + "learning_rate": 4.03157894736842e-05, + "loss": 0.0003, + "step": 8905 + }, + { + "epoch": 132.92, + "learning_rate": 4.028070175438596e-05, + "loss": 0.0002, + "step": 8906 + }, + { + "epoch": 132.94, + "learning_rate": 4.024561403508772e-05, + "loss": 0.0039, + "step": 8907 + }, + { + "epoch": 132.95, + "learning_rate": 4.021052631578948e-05, + "loss": 0.0004, + "step": 8908 + }, + { + "epoch": 132.97, + "learning_rate": 4.0175438596491226e-05, + "loss": 0.0048, + "step": 8909 + }, + { + "epoch": 132.98, + "learning_rate": 4.014035087719298e-05, + "loss": 0.0053, + "step": 8910 + }, + { + "epoch": 133.0, + "learning_rate": 4.0105263157894737e-05, + "loss": 0.0002, + "step": 8911 + }, + { + "epoch": 133.01, + "learning_rate": 4.0070175438596485e-05, + "loss": 0.0002, + "step": 8912 + }, + { + "epoch": 133.03, + "learning_rate": 4.003508771929824e-05, + "loss": 0.0002, + "step": 8913 + }, + { + "epoch": 133.04, + "learning_rate": 3.9999999999999996e-05, + "loss": 0.0003, + "step": 8914 + }, + { + "epoch": 133.06, + "learning_rate": 3.996491228070175e-05, + "loss": 0.0011, + "step": 8915 + }, + { + "epoch": 133.07, + "learning_rate": 3.99298245614035e-05, + "loss": 0.0002, + "step": 8916 + }, + { + "epoch": 133.09, + "learning_rate": 3.9894736842105256e-05, + "loss": 0.0004, + "step": 8917 + }, + { + "epoch": 133.1, + "learning_rate": 3.985964912280701e-05, + "loss": 0.0003, + "step": 8918 + }, + { + "epoch": 133.12, + "learning_rate": 3.982456140350877e-05, + "loss": 0.0002, + "step": 8919 + }, + { + "epoch": 133.13, + "learning_rate": 3.978947368421053e-05, + "loss": 0.0002, + "step": 8920 + }, + { + "epoch": 133.15, + "learning_rate": 3.975438596491228e-05, + "loss": 0.0002, + "step": 8921 + }, + { + "epoch": 133.16, + "learning_rate": 3.9719298245614034e-05, + "loss": 0.0348, + "step": 8922 + }, + { + "epoch": 133.18, + "learning_rate": 3.968421052631579e-05, + "loss": 0.0002, + "step": 8923 + }, + { + "epoch": 133.19, + "learning_rate": 3.964912280701754e-05, + "loss": 0.0002, + "step": 8924 + }, + { + "epoch": 133.21, + "learning_rate": 3.9614035087719294e-05, + "loss": 0.0002, + "step": 8925 + }, + { + "epoch": 133.22, + "learning_rate": 3.957894736842105e-05, + "loss": 0.0017, + "step": 8926 + }, + { + "epoch": 133.24, + "learning_rate": 3.9543859649122805e-05, + "loss": 0.0003, + "step": 8927 + }, + { + "epoch": 133.25, + "learning_rate": 3.9508771929824554e-05, + "loss": 0.0003, + "step": 8928 + }, + { + "epoch": 133.27, + "learning_rate": 3.947368421052631e-05, + "loss": 0.0003, + "step": 8929 + }, + { + "epoch": 133.28, + "learning_rate": 3.9438596491228065e-05, + "loss": 0.0002, + "step": 8930 + }, + { + "epoch": 133.3, + "learning_rate": 3.940350877192982e-05, + "loss": 0.0002, + "step": 8931 + }, + { + "epoch": 133.31, + "learning_rate": 3.936842105263158e-05, + "loss": 0.0002, + "step": 8932 + }, + { + "epoch": 133.33, + "learning_rate": 3.933333333333333e-05, + "loss": 0.0003, + "step": 8933 + }, + { + "epoch": 133.34, + "learning_rate": 3.929824561403509e-05, + "loss": 0.0003, + "step": 8934 + }, + { + "epoch": 133.36, + "learning_rate": 3.926315789473684e-05, + "loss": 0.0003, + "step": 8935 + }, + { + "epoch": 133.37, + "learning_rate": 3.922807017543859e-05, + "loss": 0.0002, + "step": 8936 + }, + { + "epoch": 133.39, + "learning_rate": 3.919298245614035e-05, + "loss": 0.0002, + "step": 8937 + }, + { + "epoch": 133.4, + "learning_rate": 3.91578947368421e-05, + "loss": 0.0003, + "step": 8938 + }, + { + "epoch": 133.42, + "learning_rate": 3.912280701754386e-05, + "loss": 0.0002, + "step": 8939 + }, + { + "epoch": 133.43, + "learning_rate": 3.908771929824561e-05, + "loss": 0.0002, + "step": 8940 + }, + { + "epoch": 133.45, + "learning_rate": 3.905263157894736e-05, + "loss": 0.0374, + "step": 8941 + }, + { + "epoch": 133.46, + "learning_rate": 3.901754385964912e-05, + "loss": 0.0002, + "step": 8942 + }, + { + "epoch": 133.48, + "learning_rate": 3.8982456140350874e-05, + "loss": 0.0004, + "step": 8943 + }, + { + "epoch": 133.49, + "learning_rate": 3.894736842105262e-05, + "loss": 0.0002, + "step": 8944 + }, + { + "epoch": 133.51, + "learning_rate": 3.8912280701754385e-05, + "loss": 0.173, + "step": 8945 + }, + { + "epoch": 133.52, + "learning_rate": 3.887719298245614e-05, + "loss": 0.0002, + "step": 8946 + }, + { + "epoch": 133.54, + "learning_rate": 3.8842105263157896e-05, + "loss": 0.0002, + "step": 8947 + }, + { + "epoch": 133.55, + "learning_rate": 3.8807017543859645e-05, + "loss": 0.0006, + "step": 8948 + }, + { + "epoch": 133.57, + "learning_rate": 3.87719298245614e-05, + "loss": 0.0002, + "step": 8949 + }, + { + "epoch": 133.58, + "learning_rate": 3.8736842105263156e-05, + "loss": 0.0002, + "step": 8950 + }, + { + "epoch": 133.59, + "learning_rate": 3.870175438596491e-05, + "loss": 0.0002, + "step": 8951 + }, + { + "epoch": 133.61, + "learning_rate": 3.866666666666666e-05, + "loss": 0.0002, + "step": 8952 + }, + { + "epoch": 133.62, + "learning_rate": 3.8631578947368416e-05, + "loss": 0.0002, + "step": 8953 + }, + { + "epoch": 133.64, + "learning_rate": 3.859649122807017e-05, + "loss": 0.0003, + "step": 8954 + }, + { + "epoch": 133.65, + "learning_rate": 3.856140350877193e-05, + "loss": 0.0003, + "step": 8955 + }, + { + "epoch": 133.67, + "learning_rate": 3.8526315789473676e-05, + "loss": 0.0003, + "step": 8956 + }, + { + "epoch": 133.68, + "learning_rate": 3.849122807017543e-05, + "loss": 0.0003, + "step": 8957 + }, + { + "epoch": 133.7, + "learning_rate": 3.8456140350877193e-05, + "loss": 0.0002, + "step": 8958 + }, + { + "epoch": 133.71, + "learning_rate": 3.842105263157895e-05, + "loss": 0.0002, + "step": 8959 + }, + { + "epoch": 133.73, + "learning_rate": 3.83859649122807e-05, + "loss": 0.0002, + "step": 8960 + }, + { + "epoch": 133.74, + "learning_rate": 3.835087719298245e-05, + "loss": 0.0729, + "step": 8961 + }, + { + "epoch": 133.76, + "learning_rate": 3.831578947368421e-05, + "loss": 0.0002, + "step": 8962 + }, + { + "epoch": 133.77, + "learning_rate": 3.8280701754385964e-05, + "loss": 0.0002, + "step": 8963 + }, + { + "epoch": 133.79, + "learning_rate": 3.824561403508771e-05, + "loss": 0.0002, + "step": 8964 + }, + { + "epoch": 133.8, + "learning_rate": 3.821052631578947e-05, + "loss": 0.0002, + "step": 8965 + }, + { + "epoch": 133.82, + "learning_rate": 3.8175438596491224e-05, + "loss": 0.0399, + "step": 8966 + }, + { + "epoch": 133.83, + "learning_rate": 3.814035087719298e-05, + "loss": 0.0002, + "step": 8967 + }, + { + "epoch": 133.85, + "learning_rate": 3.810526315789473e-05, + "loss": 0.0005, + "step": 8968 + }, + { + "epoch": 133.86, + "learning_rate": 3.8070175438596484e-05, + "loss": 0.0002, + "step": 8969 + }, + { + "epoch": 133.88, + "learning_rate": 3.8035087719298247e-05, + "loss": 0.0002, + "step": 8970 + }, + { + "epoch": 133.89, + "learning_rate": 3.8e-05, + "loss": 0.0002, + "step": 8971 + }, + { + "epoch": 133.91, + "learning_rate": 3.796491228070175e-05, + "loss": 0.0002, + "step": 8972 + }, + { + "epoch": 133.92, + "learning_rate": 3.7929824561403506e-05, + "loss": 0.0004, + "step": 8973 + }, + { + "epoch": 133.94, + "learning_rate": 3.789473684210526e-05, + "loss": 0.0011, + "step": 8974 + }, + { + "epoch": 133.95, + "learning_rate": 3.785964912280702e-05, + "loss": 0.0014, + "step": 8975 + }, + { + "epoch": 133.97, + "learning_rate": 3.7824561403508766e-05, + "loss": 0.0002, + "step": 8976 + }, + { + "epoch": 133.98, + "learning_rate": 3.778947368421052e-05, + "loss": 0.0002, + "step": 8977 + }, + { + "epoch": 134.0, + "learning_rate": 3.775438596491228e-05, + "loss": 0.0002, + "step": 8978 + }, + { + "epoch": 134.01, + "learning_rate": 3.771929824561403e-05, + "loss": 0.0004, + "step": 8979 + }, + { + "epoch": 134.03, + "learning_rate": 3.768421052631578e-05, + "loss": 0.0003, + "step": 8980 + }, + { + "epoch": 134.04, + "learning_rate": 3.764912280701754e-05, + "loss": 0.0003, + "step": 8981 + }, + { + "epoch": 134.06, + "learning_rate": 3.761403508771929e-05, + "loss": 0.0002, + "step": 8982 + }, + { + "epoch": 134.07, + "learning_rate": 3.7578947368421055e-05, + "loss": 0.0002, + "step": 8983 + }, + { + "epoch": 134.09, + "learning_rate": 3.7543859649122804e-05, + "loss": 0.0002, + "step": 8984 + }, + { + "epoch": 134.1, + "learning_rate": 3.750877192982456e-05, + "loss": 0.0003, + "step": 8985 + }, + { + "epoch": 134.12, + "learning_rate": 3.7473684210526315e-05, + "loss": 0.0003, + "step": 8986 + }, + { + "epoch": 134.13, + "learning_rate": 3.743859649122807e-05, + "loss": 0.0002, + "step": 8987 + }, + { + "epoch": 134.15, + "learning_rate": 3.740350877192982e-05, + "loss": 0.0003, + "step": 8988 + }, + { + "epoch": 134.16, + "learning_rate": 3.7368421052631575e-05, + "loss": 0.0002, + "step": 8989 + }, + { + "epoch": 134.18, + "learning_rate": 3.733333333333333e-05, + "loss": 0.0002, + "step": 8990 + }, + { + "epoch": 134.19, + "learning_rate": 3.7298245614035086e-05, + "loss": 0.0002, + "step": 8991 + }, + { + "epoch": 134.21, + "learning_rate": 3.726315789473684e-05, + "loss": 0.0004, + "step": 8992 + }, + { + "epoch": 134.22, + "learning_rate": 3.72280701754386e-05, + "loss": 0.0002, + "step": 8993 + }, + { + "epoch": 134.24, + "learning_rate": 3.7192982456140346e-05, + "loss": 0.0002, + "step": 8994 + }, + { + "epoch": 134.25, + "learning_rate": 3.71578947368421e-05, + "loss": 0.0002, + "step": 8995 + }, + { + "epoch": 134.27, + "learning_rate": 3.712280701754386e-05, + "loss": 0.0002, + "step": 8996 + }, + { + "epoch": 134.28, + "learning_rate": 3.7087719298245606e-05, + "loss": 0.0002, + "step": 8997 + }, + { + "epoch": 134.3, + "learning_rate": 3.705263157894737e-05, + "loss": 0.001, + "step": 8998 + }, + { + "epoch": 134.31, + "learning_rate": 3.7017543859649124e-05, + "loss": 0.0002, + "step": 8999 + }, + { + "epoch": 134.33, + "learning_rate": 3.698245614035087e-05, + "loss": 0.0002, + "step": 9000 + }, + { + "epoch": 134.33, + "eval_accuracy": 0.8888888888888888, + "eval_f1": 0.8887363564634383, + "eval_loss": 0.6235546469688416, + "eval_runtime": 344.0974, + "eval_samples_per_second": 11.875, + "eval_steps_per_second": 0.744, + "step": 9000 + }, + { + "epoch": 134.34, + "learning_rate": 3.694736842105263e-05, + "loss": 0.0051, + "step": 9001 + }, + { + "epoch": 134.36, + "learning_rate": 3.6912280701754384e-05, + "loss": 0.0002, + "step": 9002 + }, + { + "epoch": 134.37, + "learning_rate": 3.687719298245613e-05, + "loss": 0.0003, + "step": 9003 + }, + { + "epoch": 134.39, + "learning_rate": 3.684210526315789e-05, + "loss": 0.0002, + "step": 9004 + }, + { + "epoch": 134.4, + "learning_rate": 3.680701754385965e-05, + "loss": 0.0002, + "step": 9005 + }, + { + "epoch": 134.42, + "learning_rate": 3.67719298245614e-05, + "loss": 0.0002, + "step": 9006 + }, + { + "epoch": 134.43, + "learning_rate": 3.6736842105263155e-05, + "loss": 0.0002, + "step": 9007 + }, + { + "epoch": 134.45, + "learning_rate": 3.670175438596491e-05, + "loss": 0.0003, + "step": 9008 + }, + { + "epoch": 134.46, + "learning_rate": 3.666666666666666e-05, + "loss": 0.0003, + "step": 9009 + }, + { + "epoch": 134.48, + "learning_rate": 3.6631578947368414e-05, + "loss": 0.139, + "step": 9010 + }, + { + "epoch": 134.49, + "learning_rate": 3.659649122807018e-05, + "loss": 0.0002, + "step": 9011 + }, + { + "epoch": 134.51, + "learning_rate": 3.6561403508771926e-05, + "loss": 0.0003, + "step": 9012 + }, + { + "epoch": 134.52, + "learning_rate": 3.652631578947368e-05, + "loss": 0.0002, + "step": 9013 + }, + { + "epoch": 134.54, + "learning_rate": 3.649122807017544e-05, + "loss": 0.0002, + "step": 9014 + }, + { + "epoch": 134.55, + "learning_rate": 3.6456140350877185e-05, + "loss": 0.0002, + "step": 9015 + }, + { + "epoch": 134.57, + "learning_rate": 3.642105263157894e-05, + "loss": 0.0002, + "step": 9016 + }, + { + "epoch": 134.58, + "learning_rate": 3.63859649122807e-05, + "loss": 0.0002, + "step": 9017 + }, + { + "epoch": 134.59, + "learning_rate": 3.635087719298245e-05, + "loss": 0.0004, + "step": 9018 + }, + { + "epoch": 134.61, + "learning_rate": 3.631578947368421e-05, + "loss": 0.0007, + "step": 9019 + }, + { + "epoch": 134.62, + "learning_rate": 3.628070175438596e-05, + "loss": 0.0002, + "step": 9020 + }, + { + "epoch": 134.64, + "learning_rate": 3.624561403508771e-05, + "loss": 0.0566, + "step": 9021 + }, + { + "epoch": 134.65, + "learning_rate": 3.621052631578947e-05, + "loss": 0.0002, + "step": 9022 + }, + { + "epoch": 134.67, + "learning_rate": 3.617543859649123e-05, + "loss": 0.0002, + "step": 9023 + }, + { + "epoch": 134.68, + "learning_rate": 3.614035087719298e-05, + "loss": 0.0002, + "step": 9024 + }, + { + "epoch": 134.7, + "learning_rate": 3.6105263157894734e-05, + "loss": 0.0005, + "step": 9025 + }, + { + "epoch": 134.71, + "learning_rate": 3.607017543859649e-05, + "loss": 0.0002, + "step": 9026 + }, + { + "epoch": 134.73, + "learning_rate": 3.603508771929824e-05, + "loss": 0.0002, + "step": 9027 + }, + { + "epoch": 134.74, + "learning_rate": 3.5999999999999994e-05, + "loss": 0.0002, + "step": 9028 + }, + { + "epoch": 134.76, + "learning_rate": 3.596491228070175e-05, + "loss": 0.0003, + "step": 9029 + }, + { + "epoch": 134.77, + "learning_rate": 3.5929824561403505e-05, + "loss": 0.0003, + "step": 9030 + }, + { + "epoch": 134.79, + "learning_rate": 3.589473684210526e-05, + "loss": 0.0002, + "step": 9031 + }, + { + "epoch": 134.8, + "learning_rate": 3.5859649122807016e-05, + "loss": 0.0005, + "step": 9032 + }, + { + "epoch": 134.82, + "learning_rate": 3.5824561403508765e-05, + "loss": 0.0002, + "step": 9033 + }, + { + "epoch": 134.83, + "learning_rate": 3.578947368421052e-05, + "loss": 0.0002, + "step": 9034 + }, + { + "epoch": 134.85, + "learning_rate": 3.5754385964912276e-05, + "loss": 0.0002, + "step": 9035 + }, + { + "epoch": 134.86, + "learning_rate": 3.571929824561403e-05, + "loss": 0.0002, + "step": 9036 + }, + { + "epoch": 134.88, + "learning_rate": 3.568421052631579e-05, + "loss": 0.0002, + "step": 9037 + }, + { + "epoch": 134.89, + "learning_rate": 3.564912280701754e-05, + "loss": 0.0002, + "step": 9038 + }, + { + "epoch": 134.91, + "learning_rate": 3.561403508771929e-05, + "loss": 0.0002, + "step": 9039 + }, + { + "epoch": 134.92, + "learning_rate": 3.557894736842105e-05, + "loss": 0.0002, + "step": 9040 + }, + { + "epoch": 134.94, + "learning_rate": 3.55438596491228e-05, + "loss": 0.0002, + "step": 9041 + }, + { + "epoch": 134.95, + "learning_rate": 3.550877192982456e-05, + "loss": 0.0003, + "step": 9042 + }, + { + "epoch": 134.97, + "learning_rate": 3.5473684210526314e-05, + "loss": 0.0002, + "step": 9043 + }, + { + "epoch": 134.98, + "learning_rate": 3.543859649122807e-05, + "loss": 0.0002, + "step": 9044 + }, + { + "epoch": 135.0, + "learning_rate": 3.540350877192982e-05, + "loss": 0.0002, + "step": 9045 + }, + { + "epoch": 135.01, + "learning_rate": 3.5368421052631574e-05, + "loss": 0.0003, + "step": 9046 + }, + { + "epoch": 135.03, + "learning_rate": 3.533333333333333e-05, + "loss": 0.0002, + "step": 9047 + }, + { + "epoch": 135.04, + "learning_rate": 3.5298245614035085e-05, + "loss": 0.0644, + "step": 9048 + }, + { + "epoch": 135.06, + "learning_rate": 3.526315789473684e-05, + "loss": 0.0002, + "step": 9049 + }, + { + "epoch": 135.07, + "learning_rate": 3.5228070175438596e-05, + "loss": 0.0002, + "step": 9050 + }, + { + "epoch": 135.09, + "learning_rate": 3.5192982456140345e-05, + "loss": 0.0006, + "step": 9051 + }, + { + "epoch": 135.1, + "learning_rate": 3.51578947368421e-05, + "loss": 0.0004, + "step": 9052 + }, + { + "epoch": 135.12, + "learning_rate": 3.5122807017543856e-05, + "loss": 0.0002, + "step": 9053 + }, + { + "epoch": 135.13, + "learning_rate": 3.508771929824561e-05, + "loss": 0.0004, + "step": 9054 + }, + { + "epoch": 135.15, + "learning_rate": 3.505263157894737e-05, + "loss": 0.0006, + "step": 9055 + }, + { + "epoch": 135.16, + "learning_rate": 3.501754385964912e-05, + "loss": 0.0002, + "step": 9056 + }, + { + "epoch": 135.18, + "learning_rate": 3.498245614035087e-05, + "loss": 0.0002, + "step": 9057 + }, + { + "epoch": 135.19, + "learning_rate": 3.494736842105263e-05, + "loss": 0.0002, + "step": 9058 + }, + { + "epoch": 135.21, + "learning_rate": 3.491228070175438e-05, + "loss": 0.0002, + "step": 9059 + }, + { + "epoch": 135.22, + "learning_rate": 3.487719298245614e-05, + "loss": 0.0829, + "step": 9060 + }, + { + "epoch": 135.24, + "learning_rate": 3.4842105263157894e-05, + "loss": 0.0002, + "step": 9061 + }, + { + "epoch": 135.25, + "learning_rate": 3.480701754385965e-05, + "loss": 0.0005, + "step": 9062 + }, + { + "epoch": 135.27, + "learning_rate": 3.47719298245614e-05, + "loss": 0.0003, + "step": 9063 + }, + { + "epoch": 135.28, + "learning_rate": 3.4736842105263153e-05, + "loss": 0.0002, + "step": 9064 + }, + { + "epoch": 135.3, + "learning_rate": 3.470175438596491e-05, + "loss": 0.0003, + "step": 9065 + }, + { + "epoch": 135.31, + "learning_rate": 3.4666666666666665e-05, + "loss": 0.0002, + "step": 9066 + }, + { + "epoch": 135.33, + "learning_rate": 3.463157894736842e-05, + "loss": 0.0004, + "step": 9067 + }, + { + "epoch": 135.34, + "learning_rate": 3.4596491228070176e-05, + "loss": 0.0002, + "step": 9068 + }, + { + "epoch": 135.36, + "learning_rate": 3.4561403508771924e-05, + "loss": 0.0002, + "step": 9069 + }, + { + "epoch": 135.37, + "learning_rate": 3.452631578947368e-05, + "loss": 0.001, + "step": 9070 + }, + { + "epoch": 135.39, + "learning_rate": 3.4491228070175436e-05, + "loss": 0.0002, + "step": 9071 + }, + { + "epoch": 135.4, + "learning_rate": 3.445614035087719e-05, + "loss": 0.0002, + "step": 9072 + }, + { + "epoch": 135.42, + "learning_rate": 3.442105263157894e-05, + "loss": 0.0002, + "step": 9073 + }, + { + "epoch": 135.43, + "learning_rate": 3.43859649122807e-05, + "loss": 0.0002, + "step": 9074 + }, + { + "epoch": 135.45, + "learning_rate": 3.435087719298245e-05, + "loss": 0.0002, + "step": 9075 + }, + { + "epoch": 135.46, + "learning_rate": 3.4315789473684207e-05, + "loss": 0.0002, + "step": 9076 + }, + { + "epoch": 135.48, + "learning_rate": 3.428070175438596e-05, + "loss": 0.0002, + "step": 9077 + }, + { + "epoch": 135.49, + "learning_rate": 3.424561403508772e-05, + "loss": 0.0002, + "step": 9078 + }, + { + "epoch": 135.51, + "learning_rate": 3.4210526315789466e-05, + "loss": 0.0002, + "step": 9079 + }, + { + "epoch": 135.52, + "learning_rate": 3.417543859649123e-05, + "loss": 0.0002, + "step": 9080 + }, + { + "epoch": 135.54, + "learning_rate": 3.414035087719298e-05, + "loss": 0.0002, + "step": 9081 + }, + { + "epoch": 135.55, + "learning_rate": 3.410526315789473e-05, + "loss": 0.0002, + "step": 9082 + }, + { + "epoch": 135.57, + "learning_rate": 3.407017543859649e-05, + "loss": 0.0002, + "step": 9083 + }, + { + "epoch": 135.58, + "learning_rate": 3.4035087719298244e-05, + "loss": 0.0002, + "step": 9084 + }, + { + "epoch": 135.59, + "learning_rate": 3.399999999999999e-05, + "loss": 0.0007, + "step": 9085 + }, + { + "epoch": 135.61, + "learning_rate": 3.3964912280701755e-05, + "loss": 0.0005, + "step": 9086 + }, + { + "epoch": 135.62, + "learning_rate": 3.3929824561403504e-05, + "loss": 0.0002, + "step": 9087 + }, + { + "epoch": 135.64, + "learning_rate": 3.389473684210526e-05, + "loss": 0.0002, + "step": 9088 + }, + { + "epoch": 135.65, + "learning_rate": 3.3859649122807015e-05, + "loss": 0.0002, + "step": 9089 + }, + { + "epoch": 135.67, + "learning_rate": 3.382456140350877e-05, + "loss": 0.0002, + "step": 9090 + }, + { + "epoch": 135.68, + "learning_rate": 3.378947368421052e-05, + "loss": 0.0006, + "step": 9091 + }, + { + "epoch": 135.7, + "learning_rate": 3.375438596491228e-05, + "loss": 0.0002, + "step": 9092 + }, + { + "epoch": 135.71, + "learning_rate": 3.371929824561403e-05, + "loss": 0.1007, + "step": 9093 + }, + { + "epoch": 135.73, + "learning_rate": 3.3684210526315786e-05, + "loss": 0.0003, + "step": 9094 + }, + { + "epoch": 135.74, + "learning_rate": 3.364912280701754e-05, + "loss": 0.0002, + "step": 9095 + }, + { + "epoch": 135.76, + "learning_rate": 3.36140350877193e-05, + "loss": 0.0002, + "step": 9096 + }, + { + "epoch": 135.77, + "learning_rate": 3.3578947368421046e-05, + "loss": 0.0002, + "step": 9097 + }, + { + "epoch": 135.79, + "learning_rate": 3.35438596491228e-05, + "loss": 0.0138, + "step": 9098 + }, + { + "epoch": 135.8, + "learning_rate": 3.350877192982456e-05, + "loss": 0.0002, + "step": 9099 + }, + { + "epoch": 135.82, + "learning_rate": 3.347368421052631e-05, + "loss": 0.0002, + "step": 9100 + }, + { + "epoch": 135.83, + "learning_rate": 3.343859649122807e-05, + "loss": 0.0618, + "step": 9101 + }, + { + "epoch": 135.85, + "learning_rate": 3.3403508771929824e-05, + "loss": 0.0002, + "step": 9102 + }, + { + "epoch": 135.86, + "learning_rate": 3.336842105263157e-05, + "loss": 0.0002, + "step": 9103 + }, + { + "epoch": 135.88, + "learning_rate": 3.333333333333333e-05, + "loss": 0.0002, + "step": 9104 + }, + { + "epoch": 135.89, + "learning_rate": 3.3298245614035084e-05, + "loss": 0.0002, + "step": 9105 + }, + { + "epoch": 135.91, + "learning_rate": 3.326315789473684e-05, + "loss": 0.0003, + "step": 9106 + }, + { + "epoch": 135.92, + "learning_rate": 3.3228070175438595e-05, + "loss": 0.0002, + "step": 9107 + }, + { + "epoch": 135.94, + "learning_rate": 3.319298245614035e-05, + "loss": 0.0002, + "step": 9108 + }, + { + "epoch": 135.95, + "learning_rate": 3.31578947368421e-05, + "loss": 0.0054, + "step": 9109 + }, + { + "epoch": 135.97, + "learning_rate": 3.3122807017543855e-05, + "loss": 0.0002, + "step": 9110 + }, + { + "epoch": 135.98, + "learning_rate": 3.308771929824561e-05, + "loss": 0.0002, + "step": 9111 + }, + { + "epoch": 136.0, + "learning_rate": 3.3052631578947366e-05, + "loss": 0.0002, + "step": 9112 + }, + { + "epoch": 136.01, + "learning_rate": 3.301754385964912e-05, + "loss": 0.0008, + "step": 9113 + }, + { + "epoch": 136.03, + "learning_rate": 3.298245614035088e-05, + "loss": 0.0002, + "step": 9114 + }, + { + "epoch": 136.04, + "learning_rate": 3.2947368421052626e-05, + "loss": 0.0002, + "step": 9115 + }, + { + "epoch": 136.06, + "learning_rate": 3.291228070175438e-05, + "loss": 0.0002, + "step": 9116 + }, + { + "epoch": 136.07, + "learning_rate": 3.287719298245614e-05, + "loss": 0.0002, + "step": 9117 + }, + { + "epoch": 136.09, + "learning_rate": 3.284210526315789e-05, + "loss": 0.0845, + "step": 9118 + }, + { + "epoch": 136.1, + "learning_rate": 3.280701754385965e-05, + "loss": 0.0002, + "step": 9119 + }, + { + "epoch": 136.12, + "learning_rate": 3.2771929824561403e-05, + "loss": 0.0002, + "step": 9120 + }, + { + "epoch": 136.13, + "learning_rate": 3.273684210526315e-05, + "loss": 0.0002, + "step": 9121 + }, + { + "epoch": 136.15, + "learning_rate": 3.270175438596491e-05, + "loss": 0.0019, + "step": 9122 + }, + { + "epoch": 136.16, + "learning_rate": 3.266666666666666e-05, + "loss": 0.0002, + "step": 9123 + }, + { + "epoch": 136.18, + "learning_rate": 3.263157894736842e-05, + "loss": 0.0002, + "step": 9124 + }, + { + "epoch": 136.19, + "learning_rate": 3.2596491228070174e-05, + "loss": 0.0002, + "step": 9125 + }, + { + "epoch": 136.21, + "learning_rate": 3.256140350877193e-05, + "loss": 0.1942, + "step": 9126 + }, + { + "epoch": 136.22, + "learning_rate": 3.252631578947368e-05, + "loss": 0.0002, + "step": 9127 + }, + { + "epoch": 136.24, + "learning_rate": 3.2491228070175434e-05, + "loss": 0.0002, + "step": 9128 + }, + { + "epoch": 136.25, + "learning_rate": 3.245614035087719e-05, + "loss": 0.0002, + "step": 9129 + }, + { + "epoch": 136.27, + "learning_rate": 3.2421052631578945e-05, + "loss": 0.0002, + "step": 9130 + }, + { + "epoch": 136.28, + "learning_rate": 3.23859649122807e-05, + "loss": 0.0007, + "step": 9131 + }, + { + "epoch": 136.3, + "learning_rate": 3.2350877192982457e-05, + "loss": 0.0002, + "step": 9132 + }, + { + "epoch": 136.31, + "learning_rate": 3.2315789473684205e-05, + "loss": 0.0002, + "step": 9133 + }, + { + "epoch": 136.33, + "learning_rate": 3.228070175438596e-05, + "loss": 0.0002, + "step": 9134 + }, + { + "epoch": 136.34, + "learning_rate": 3.2245614035087716e-05, + "loss": 0.0002, + "step": 9135 + }, + { + "epoch": 136.36, + "learning_rate": 3.221052631578947e-05, + "loss": 0.0002, + "step": 9136 + }, + { + "epoch": 136.37, + "learning_rate": 3.217543859649123e-05, + "loss": 0.0002, + "step": 9137 + }, + { + "epoch": 136.39, + "learning_rate": 3.214035087719298e-05, + "loss": 0.0002, + "step": 9138 + }, + { + "epoch": 136.4, + "learning_rate": 3.210526315789473e-05, + "loss": 0.0002, + "step": 9139 + }, + { + "epoch": 136.42, + "learning_rate": 3.207017543859649e-05, + "loss": 0.0002, + "step": 9140 + }, + { + "epoch": 136.43, + "learning_rate": 3.203508771929824e-05, + "loss": 0.0002, + "step": 9141 + }, + { + "epoch": 136.45, + "learning_rate": 3.2e-05, + "loss": 0.0004, + "step": 9142 + }, + { + "epoch": 136.46, + "learning_rate": 3.1964912280701754e-05, + "loss": 0.0007, + "step": 9143 + }, + { + "epoch": 136.48, + "learning_rate": 3.192982456140351e-05, + "loss": 0.0018, + "step": 9144 + }, + { + "epoch": 136.49, + "learning_rate": 3.189473684210526e-05, + "loss": 0.0002, + "step": 9145 + }, + { + "epoch": 136.51, + "learning_rate": 3.1859649122807014e-05, + "loss": 0.0003, + "step": 9146 + }, + { + "epoch": 136.52, + "learning_rate": 3.182456140350877e-05, + "loss": 0.0002, + "step": 9147 + }, + { + "epoch": 136.54, + "learning_rate": 3.178947368421052e-05, + "loss": 0.0002, + "step": 9148 + }, + { + "epoch": 136.55, + "learning_rate": 3.175438596491228e-05, + "loss": 0.0002, + "step": 9149 + }, + { + "epoch": 136.57, + "learning_rate": 3.1719298245614036e-05, + "loss": 0.19, + "step": 9150 + }, + { + "epoch": 136.58, + "learning_rate": 3.1684210526315785e-05, + "loss": 0.0011, + "step": 9151 + }, + { + "epoch": 136.59, + "learning_rate": 3.164912280701754e-05, + "loss": 0.0005, + "step": 9152 + }, + { + "epoch": 136.61, + "learning_rate": 3.1614035087719296e-05, + "loss": 0.0002, + "step": 9153 + }, + { + "epoch": 136.62, + "learning_rate": 3.1578947368421045e-05, + "loss": 0.0002, + "step": 9154 + }, + { + "epoch": 136.64, + "learning_rate": 3.154385964912281e-05, + "loss": 0.0002, + "step": 9155 + }, + { + "epoch": 136.65, + "learning_rate": 3.150877192982456e-05, + "loss": 0.0042, + "step": 9156 + }, + { + "epoch": 136.67, + "learning_rate": 3.147368421052631e-05, + "loss": 0.0002, + "step": 9157 + }, + { + "epoch": 136.68, + "learning_rate": 3.143859649122807e-05, + "loss": 0.0016, + "step": 9158 + }, + { + "epoch": 136.7, + "learning_rate": 3.140350877192982e-05, + "loss": 0.0003, + "step": 9159 + }, + { + "epoch": 136.71, + "learning_rate": 3.136842105263157e-05, + "loss": 0.0002, + "step": 9160 + }, + { + "epoch": 136.73, + "learning_rate": 3.1333333333333334e-05, + "loss": 0.0006, + "step": 9161 + }, + { + "epoch": 136.74, + "learning_rate": 3.129824561403509e-05, + "loss": 0.0002, + "step": 9162 + }, + { + "epoch": 136.76, + "learning_rate": 3.126315789473684e-05, + "loss": 0.0002, + "step": 9163 + }, + { + "epoch": 136.77, + "learning_rate": 3.1228070175438594e-05, + "loss": 0.0015, + "step": 9164 + }, + { + "epoch": 136.79, + "learning_rate": 3.119298245614035e-05, + "loss": 0.0002, + "step": 9165 + }, + { + "epoch": 136.8, + "learning_rate": 3.11578947368421e-05, + "loss": 0.0027, + "step": 9166 + }, + { + "epoch": 136.82, + "learning_rate": 3.1122807017543854e-05, + "loss": 0.0004, + "step": 9167 + }, + { + "epoch": 136.83, + "learning_rate": 3.1087719298245616e-05, + "loss": 0.0005, + "step": 9168 + }, + { + "epoch": 136.85, + "learning_rate": 3.1052631578947365e-05, + "loss": 0.0003, + "step": 9169 + }, + { + "epoch": 136.86, + "learning_rate": 3.101754385964912e-05, + "loss": 0.0002, + "step": 9170 + }, + { + "epoch": 136.88, + "learning_rate": 3.0982456140350876e-05, + "loss": 0.0002, + "step": 9171 + }, + { + "epoch": 136.89, + "learning_rate": 3.0947368421052625e-05, + "loss": 0.0002, + "step": 9172 + }, + { + "epoch": 136.91, + "learning_rate": 3.091228070175438e-05, + "loss": 0.0002, + "step": 9173 + }, + { + "epoch": 136.92, + "learning_rate": 3.087719298245614e-05, + "loss": 0.0053, + "step": 9174 + }, + { + "epoch": 136.94, + "learning_rate": 3.084210526315789e-05, + "loss": 0.0007, + "step": 9175 + }, + { + "epoch": 136.95, + "learning_rate": 3.080701754385965e-05, + "loss": 0.0002, + "step": 9176 + }, + { + "epoch": 136.97, + "learning_rate": 3.07719298245614e-05, + "loss": 0.0003, + "step": 9177 + }, + { + "epoch": 136.98, + "learning_rate": 3.073684210526315e-05, + "loss": 0.0002, + "step": 9178 + }, + { + "epoch": 137.0, + "learning_rate": 3.070175438596491e-05, + "loss": 0.0002, + "step": 9179 + }, + { + "epoch": 137.01, + "learning_rate": 3.066666666666667e-05, + "loss": 0.0855, + "step": 9180 + }, + { + "epoch": 137.03, + "learning_rate": 3.063157894736842e-05, + "loss": 0.0021, + "step": 9181 + }, + { + "epoch": 137.04, + "learning_rate": 3.059649122807017e-05, + "loss": 0.0239, + "step": 9182 + }, + { + "epoch": 137.06, + "learning_rate": 3.056140350877193e-05, + "loss": 0.0002, + "step": 9183 + }, + { + "epoch": 137.07, + "learning_rate": 3.052631578947368e-05, + "loss": 0.0002, + "step": 9184 + }, + { + "epoch": 137.09, + "learning_rate": 3.0491228070175433e-05, + "loss": 0.0031, + "step": 9185 + }, + { + "epoch": 137.1, + "learning_rate": 3.0456140350877192e-05, + "loss": 0.0011, + "step": 9186 + }, + { + "epoch": 137.12, + "learning_rate": 3.0421052631578948e-05, + "loss": 0.0002, + "step": 9187 + }, + { + "epoch": 137.13, + "learning_rate": 3.03859649122807e-05, + "loss": 0.0182, + "step": 9188 + }, + { + "epoch": 137.15, + "learning_rate": 3.0350877192982452e-05, + "loss": 0.0387, + "step": 9189 + }, + { + "epoch": 137.16, + "learning_rate": 3.0315789473684208e-05, + "loss": 0.0002, + "step": 9190 + }, + { + "epoch": 137.18, + "learning_rate": 3.028070175438596e-05, + "loss": 0.0002, + "step": 9191 + }, + { + "epoch": 137.19, + "learning_rate": 3.0245614035087715e-05, + "loss": 0.0002, + "step": 9192 + }, + { + "epoch": 137.21, + "learning_rate": 3.0210526315789474e-05, + "loss": 0.0002, + "step": 9193 + }, + { + "epoch": 137.22, + "learning_rate": 3.0175438596491226e-05, + "loss": 0.0003, + "step": 9194 + }, + { + "epoch": 137.24, + "learning_rate": 3.014035087719298e-05, + "loss": 0.0002, + "step": 9195 + }, + { + "epoch": 137.25, + "learning_rate": 3.0105263157894734e-05, + "loss": 0.0215, + "step": 9196 + }, + { + "epoch": 137.27, + "learning_rate": 3.0070175438596486e-05, + "loss": 0.0002, + "step": 9197 + }, + { + "epoch": 137.28, + "learning_rate": 3.0035087719298242e-05, + "loss": 0.0002, + "step": 9198 + }, + { + "epoch": 137.3, + "learning_rate": 2.9999999999999997e-05, + "loss": 0.0002, + "step": 9199 + }, + { + "epoch": 137.31, + "learning_rate": 2.9964912280701753e-05, + "loss": 0.002, + "step": 9200 + }, + { + "epoch": 137.31, + "eval_accuracy": 0.8844836025452766, + "eval_f1": 0.8846804745314188, + "eval_loss": 0.667094349861145, + "eval_runtime": 345.8776, + "eval_samples_per_second": 11.813, + "eval_steps_per_second": 0.74, + "step": 9200 + }, + { + "epoch": 137.33, + "learning_rate": 2.9929824561403505e-05, + "loss": 0.0002, + "step": 9201 + }, + { + "epoch": 137.34, + "learning_rate": 2.989473684210526e-05, + "loss": 0.0002, + "step": 9202 + }, + { + "epoch": 137.36, + "learning_rate": 2.9859649122807013e-05, + "loss": 0.0007, + "step": 9203 + }, + { + "epoch": 137.37, + "learning_rate": 2.982456140350877e-05, + "loss": 0.0002, + "step": 9204 + }, + { + "epoch": 137.39, + "learning_rate": 2.9789473684210524e-05, + "loss": 0.0002, + "step": 9205 + }, + { + "epoch": 137.4, + "learning_rate": 2.975438596491228e-05, + "loss": 0.0003, + "step": 9206 + }, + { + "epoch": 137.42, + "learning_rate": 2.971929824561403e-05, + "loss": 0.0002, + "step": 9207 + }, + { + "epoch": 137.43, + "learning_rate": 2.9684210526315787e-05, + "loss": 0.1348, + "step": 9208 + }, + { + "epoch": 137.45, + "learning_rate": 2.964912280701754e-05, + "loss": 0.0002, + "step": 9209 + }, + { + "epoch": 137.46, + "learning_rate": 2.9614035087719295e-05, + "loss": 0.0002, + "step": 9210 + }, + { + "epoch": 137.48, + "learning_rate": 2.957894736842105e-05, + "loss": 0.0002, + "step": 9211 + }, + { + "epoch": 137.49, + "learning_rate": 2.9543859649122806e-05, + "loss": 0.0002, + "step": 9212 + }, + { + "epoch": 137.51, + "learning_rate": 2.9508771929824558e-05, + "loss": 0.0002, + "step": 9213 + }, + { + "epoch": 137.52, + "learning_rate": 2.9473684210526314e-05, + "loss": 0.0002, + "step": 9214 + }, + { + "epoch": 137.54, + "learning_rate": 2.9438596491228066e-05, + "loss": 0.0002, + "step": 9215 + }, + { + "epoch": 137.55, + "learning_rate": 2.940350877192982e-05, + "loss": 0.0002, + "step": 9216 + }, + { + "epoch": 137.57, + "learning_rate": 2.9368421052631574e-05, + "loss": 0.067, + "step": 9217 + }, + { + "epoch": 137.58, + "learning_rate": 2.9333333333333333e-05, + "loss": 0.0002, + "step": 9218 + }, + { + "epoch": 137.59, + "learning_rate": 2.9298245614035085e-05, + "loss": 0.0002, + "step": 9219 + }, + { + "epoch": 137.61, + "learning_rate": 2.926315789473684e-05, + "loss": 0.0002, + "step": 9220 + }, + { + "epoch": 137.62, + "learning_rate": 2.9228070175438592e-05, + "loss": 0.0002, + "step": 9221 + }, + { + "epoch": 137.64, + "learning_rate": 2.9192982456140348e-05, + "loss": 0.0002, + "step": 9222 + }, + { + "epoch": 137.65, + "learning_rate": 2.91578947368421e-05, + "loss": 0.0002, + "step": 9223 + }, + { + "epoch": 137.67, + "learning_rate": 2.912280701754386e-05, + "loss": 0.0008, + "step": 9224 + }, + { + "epoch": 137.68, + "learning_rate": 2.908771929824561e-05, + "loss": 0.0002, + "step": 9225 + }, + { + "epoch": 137.7, + "learning_rate": 2.9052631578947367e-05, + "loss": 0.0771, + "step": 9226 + }, + { + "epoch": 137.71, + "learning_rate": 2.901754385964912e-05, + "loss": 0.0005, + "step": 9227 + }, + { + "epoch": 137.73, + "learning_rate": 2.8982456140350875e-05, + "loss": 0.0002, + "step": 9228 + }, + { + "epoch": 137.74, + "learning_rate": 2.8947368421052627e-05, + "loss": 0.0006, + "step": 9229 + }, + { + "epoch": 137.76, + "learning_rate": 2.8912280701754386e-05, + "loss": 0.0002, + "step": 9230 + }, + { + "epoch": 137.77, + "learning_rate": 2.8877192982456138e-05, + "loss": 0.0002, + "step": 9231 + }, + { + "epoch": 137.79, + "learning_rate": 2.8842105263157893e-05, + "loss": 0.0002, + "step": 9232 + }, + { + "epoch": 137.8, + "learning_rate": 2.8807017543859646e-05, + "loss": 0.0002, + "step": 9233 + }, + { + "epoch": 137.82, + "learning_rate": 2.87719298245614e-05, + "loss": 0.0002, + "step": 9234 + }, + { + "epoch": 137.83, + "learning_rate": 2.8736842105263153e-05, + "loss": 0.0002, + "step": 9235 + }, + { + "epoch": 137.85, + "learning_rate": 2.8701754385964912e-05, + "loss": 0.0208, + "step": 9236 + }, + { + "epoch": 137.86, + "learning_rate": 2.8666666666666664e-05, + "loss": 0.1129, + "step": 9237 + }, + { + "epoch": 137.88, + "learning_rate": 2.863157894736842e-05, + "loss": 0.0002, + "step": 9238 + }, + { + "epoch": 137.89, + "learning_rate": 2.8596491228070172e-05, + "loss": 0.0002, + "step": 9239 + }, + { + "epoch": 137.91, + "learning_rate": 2.8561403508771928e-05, + "loss": 0.0002, + "step": 9240 + }, + { + "epoch": 137.92, + "learning_rate": 2.852631578947368e-05, + "loss": 0.0003, + "step": 9241 + }, + { + "epoch": 137.94, + "learning_rate": 2.8491228070175435e-05, + "loss": 0.0004, + "step": 9242 + }, + { + "epoch": 137.95, + "learning_rate": 2.845614035087719e-05, + "loss": 0.0002, + "step": 9243 + }, + { + "epoch": 137.97, + "learning_rate": 2.8421052631578946e-05, + "loss": 0.0002, + "step": 9244 + }, + { + "epoch": 137.98, + "learning_rate": 2.83859649122807e-05, + "loss": 0.0002, + "step": 9245 + }, + { + "epoch": 138.0, + "learning_rate": 2.8350877192982454e-05, + "loss": 0.0002, + "step": 9246 + }, + { + "epoch": 138.01, + "learning_rate": 2.8315789473684206e-05, + "loss": 0.0003, + "step": 9247 + }, + { + "epoch": 138.03, + "learning_rate": 2.8280701754385962e-05, + "loss": 0.0003, + "step": 9248 + }, + { + "epoch": 138.04, + "learning_rate": 2.8245614035087717e-05, + "loss": 0.0002, + "step": 9249 + }, + { + "epoch": 138.06, + "learning_rate": 2.8210526315789473e-05, + "loss": 0.0004, + "step": 9250 + }, + { + "epoch": 138.07, + "learning_rate": 2.8175438596491225e-05, + "loss": 0.0002, + "step": 9251 + }, + { + "epoch": 138.09, + "learning_rate": 2.814035087719298e-05, + "loss": 0.0002, + "step": 9252 + }, + { + "epoch": 138.1, + "learning_rate": 2.8105263157894733e-05, + "loss": 0.0016, + "step": 9253 + }, + { + "epoch": 138.12, + "learning_rate": 2.807017543859649e-05, + "loss": 0.0002, + "step": 9254 + }, + { + "epoch": 138.13, + "learning_rate": 2.8035087719298244e-05, + "loss": 0.0002, + "step": 9255 + }, + { + "epoch": 138.15, + "learning_rate": 2.8e-05, + "loss": 0.0002, + "step": 9256 + }, + { + "epoch": 138.16, + "learning_rate": 2.7964912280701752e-05, + "loss": 0.0002, + "step": 9257 + }, + { + "epoch": 138.18, + "learning_rate": 2.7929824561403507e-05, + "loss": 0.0001, + "step": 9258 + }, + { + "epoch": 138.19, + "learning_rate": 2.789473684210526e-05, + "loss": 0.019, + "step": 9259 + }, + { + "epoch": 138.21, + "learning_rate": 2.7859649122807015e-05, + "loss": 0.0002, + "step": 9260 + }, + { + "epoch": 138.22, + "learning_rate": 2.7824561403508767e-05, + "loss": 0.0003, + "step": 9261 + }, + { + "epoch": 138.24, + "learning_rate": 2.7789473684210526e-05, + "loss": 0.0002, + "step": 9262 + }, + { + "epoch": 138.25, + "learning_rate": 2.7754385964912278e-05, + "loss": 0.0003, + "step": 9263 + }, + { + "epoch": 138.27, + "learning_rate": 2.7719298245614034e-05, + "loss": 0.0002, + "step": 9264 + }, + { + "epoch": 138.28, + "learning_rate": 2.7684210526315786e-05, + "loss": 0.1113, + "step": 9265 + }, + { + "epoch": 138.3, + "learning_rate": 2.764912280701754e-05, + "loss": 0.0003, + "step": 9266 + }, + { + "epoch": 138.31, + "learning_rate": 2.7614035087719294e-05, + "loss": 0.0002, + "step": 9267 + }, + { + "epoch": 138.33, + "learning_rate": 2.7578947368421053e-05, + "loss": 0.0002, + "step": 9268 + }, + { + "epoch": 138.34, + "learning_rate": 2.7543859649122805e-05, + "loss": 0.0002, + "step": 9269 + }, + { + "epoch": 138.36, + "learning_rate": 2.750877192982456e-05, + "loss": 0.0002, + "step": 9270 + }, + { + "epoch": 138.37, + "learning_rate": 2.7473684210526313e-05, + "loss": 0.0002, + "step": 9271 + }, + { + "epoch": 138.39, + "learning_rate": 2.7438596491228068e-05, + "loss": 0.0002, + "step": 9272 + }, + { + "epoch": 138.4, + "learning_rate": 2.740350877192982e-05, + "loss": 0.0736, + "step": 9273 + }, + { + "epoch": 138.42, + "learning_rate": 2.736842105263158e-05, + "loss": 0.0002, + "step": 9274 + }, + { + "epoch": 138.43, + "learning_rate": 2.733333333333333e-05, + "loss": 0.0002, + "step": 9275 + }, + { + "epoch": 138.45, + "learning_rate": 2.7298245614035087e-05, + "loss": 0.0002, + "step": 9276 + }, + { + "epoch": 138.46, + "learning_rate": 2.726315789473684e-05, + "loss": 0.0002, + "step": 9277 + }, + { + "epoch": 138.48, + "learning_rate": 2.7228070175438595e-05, + "loss": 0.0002, + "step": 9278 + }, + { + "epoch": 138.49, + "learning_rate": 2.7192982456140347e-05, + "loss": 0.0002, + "step": 9279 + }, + { + "epoch": 138.51, + "learning_rate": 2.7157894736842106e-05, + "loss": 0.0002, + "step": 9280 + }, + { + "epoch": 138.52, + "learning_rate": 2.7122807017543858e-05, + "loss": 0.0001, + "step": 9281 + }, + { + "epoch": 138.54, + "learning_rate": 2.7087719298245613e-05, + "loss": 0.0002, + "step": 9282 + }, + { + "epoch": 138.55, + "learning_rate": 2.7052631578947366e-05, + "loss": 0.0003, + "step": 9283 + }, + { + "epoch": 138.57, + "learning_rate": 2.701754385964912e-05, + "loss": 0.0003, + "step": 9284 + }, + { + "epoch": 138.58, + "learning_rate": 2.6982456140350873e-05, + "loss": 0.0005, + "step": 9285 + }, + { + "epoch": 138.59, + "learning_rate": 2.6947368421052626e-05, + "loss": 0.0002, + "step": 9286 + }, + { + "epoch": 138.61, + "learning_rate": 2.6912280701754384e-05, + "loss": 0.0002, + "step": 9287 + }, + { + "epoch": 138.62, + "learning_rate": 2.687719298245614e-05, + "loss": 0.0003, + "step": 9288 + }, + { + "epoch": 138.64, + "learning_rate": 2.6842105263157892e-05, + "loss": 0.0002, + "step": 9289 + }, + { + "epoch": 138.65, + "learning_rate": 2.6807017543859648e-05, + "loss": 0.0002, + "step": 9290 + }, + { + "epoch": 138.67, + "learning_rate": 2.67719298245614e-05, + "loss": 0.0002, + "step": 9291 + }, + { + "epoch": 138.68, + "learning_rate": 2.6736842105263152e-05, + "loss": 0.001, + "step": 9292 + }, + { + "epoch": 138.7, + "learning_rate": 2.670175438596491e-05, + "loss": 0.0002, + "step": 9293 + }, + { + "epoch": 138.71, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.0002, + "step": 9294 + }, + { + "epoch": 138.73, + "learning_rate": 2.663157894736842e-05, + "loss": 0.0006, + "step": 9295 + }, + { + "epoch": 138.74, + "learning_rate": 2.6596491228070174e-05, + "loss": 0.0002, + "step": 9296 + }, + { + "epoch": 138.76, + "learning_rate": 2.6561403508771926e-05, + "loss": 0.0002, + "step": 9297 + }, + { + "epoch": 138.77, + "learning_rate": 2.652631578947368e-05, + "loss": 0.0002, + "step": 9298 + }, + { + "epoch": 138.79, + "learning_rate": 2.6491228070175438e-05, + "loss": 0.0002, + "step": 9299 + }, + { + "epoch": 138.8, + "learning_rate": 2.6456140350877193e-05, + "loss": 0.0002, + "step": 9300 + }, + { + "epoch": 138.82, + "learning_rate": 2.6421052631578945e-05, + "loss": 0.0003, + "step": 9301 + }, + { + "epoch": 138.83, + "learning_rate": 2.63859649122807e-05, + "loss": 0.0002, + "step": 9302 + }, + { + "epoch": 138.85, + "learning_rate": 2.6350877192982453e-05, + "loss": 0.0097, + "step": 9303 + }, + { + "epoch": 138.86, + "learning_rate": 2.6315789473684205e-05, + "loss": 0.0002, + "step": 9304 + }, + { + "epoch": 138.88, + "learning_rate": 2.6280701754385964e-05, + "loss": 0.0002, + "step": 9305 + }, + { + "epoch": 138.89, + "learning_rate": 2.624561403508772e-05, + "loss": 0.0002, + "step": 9306 + }, + { + "epoch": 138.91, + "learning_rate": 2.6210526315789472e-05, + "loss": 0.0002, + "step": 9307 + }, + { + "epoch": 138.92, + "learning_rate": 2.6175438596491227e-05, + "loss": 0.0002, + "step": 9308 + }, + { + "epoch": 138.94, + "learning_rate": 2.614035087719298e-05, + "loss": 0.0002, + "step": 9309 + }, + { + "epoch": 138.95, + "learning_rate": 2.6105263157894732e-05, + "loss": 0.0002, + "step": 9310 + }, + { + "epoch": 138.97, + "learning_rate": 2.6070175438596487e-05, + "loss": 0.0002, + "step": 9311 + }, + { + "epoch": 138.98, + "learning_rate": 2.6035087719298246e-05, + "loss": 0.0002, + "step": 9312 + }, + { + "epoch": 139.0, + "learning_rate": 2.6e-05, + "loss": 0.0002, + "step": 9313 + }, + { + "epoch": 139.01, + "learning_rate": 2.5964912280701754e-05, + "loss": 0.0005, + "step": 9314 + }, + { + "epoch": 139.03, + "learning_rate": 2.5929824561403506e-05, + "loss": 0.0003, + "step": 9315 + }, + { + "epoch": 139.04, + "learning_rate": 2.5894736842105258e-05, + "loss": 0.0002, + "step": 9316 + }, + { + "epoch": 139.06, + "learning_rate": 2.5859649122807014e-05, + "loss": 0.0002, + "step": 9317 + }, + { + "epoch": 139.07, + "learning_rate": 2.5824561403508773e-05, + "loss": 0.0017, + "step": 9318 + }, + { + "epoch": 139.09, + "learning_rate": 2.5789473684210525e-05, + "loss": 0.0002, + "step": 9319 + }, + { + "epoch": 139.1, + "learning_rate": 2.575438596491228e-05, + "loss": 0.0002, + "step": 9320 + }, + { + "epoch": 139.12, + "learning_rate": 2.5719298245614033e-05, + "loss": 0.0002, + "step": 9321 + }, + { + "epoch": 139.13, + "learning_rate": 2.5684210526315785e-05, + "loss": 0.0002, + "step": 9322 + }, + { + "epoch": 139.15, + "learning_rate": 2.564912280701754e-05, + "loss": 0.0002, + "step": 9323 + }, + { + "epoch": 139.16, + "learning_rate": 2.56140350877193e-05, + "loss": 0.0001, + "step": 9324 + }, + { + "epoch": 139.18, + "learning_rate": 2.557894736842105e-05, + "loss": 0.0002, + "step": 9325 + }, + { + "epoch": 139.19, + "learning_rate": 2.5543859649122807e-05, + "loss": 0.0002, + "step": 9326 + }, + { + "epoch": 139.21, + "learning_rate": 2.550877192982456e-05, + "loss": 0.0002, + "step": 9327 + }, + { + "epoch": 139.22, + "learning_rate": 2.547368421052631e-05, + "loss": 0.0002, + "step": 9328 + }, + { + "epoch": 139.24, + "learning_rate": 2.5438596491228067e-05, + "loss": 0.0002, + "step": 9329 + }, + { + "epoch": 139.25, + "learning_rate": 2.540350877192982e-05, + "loss": 0.0002, + "step": 9330 + }, + { + "epoch": 139.27, + "learning_rate": 2.5368421052631578e-05, + "loss": 0.0002, + "step": 9331 + }, + { + "epoch": 139.28, + "learning_rate": 2.533333333333333e-05, + "loss": 0.0002, + "step": 9332 + }, + { + "epoch": 139.3, + "learning_rate": 2.5298245614035086e-05, + "loss": 0.0002, + "step": 9333 + }, + { + "epoch": 139.31, + "learning_rate": 2.5263157894736838e-05, + "loss": 0.0002, + "step": 9334 + }, + { + "epoch": 139.33, + "learning_rate": 2.5228070175438593e-05, + "loss": 0.0002, + "step": 9335 + }, + { + "epoch": 139.34, + "learning_rate": 2.5192982456140346e-05, + "loss": 0.0002, + "step": 9336 + }, + { + "epoch": 139.36, + "learning_rate": 2.5157894736842105e-05, + "loss": 0.0002, + "step": 9337 + }, + { + "epoch": 139.37, + "learning_rate": 2.5122807017543857e-05, + "loss": 0.0002, + "step": 9338 + }, + { + "epoch": 139.39, + "learning_rate": 2.5087719298245612e-05, + "loss": 0.0002, + "step": 9339 + }, + { + "epoch": 139.4, + "learning_rate": 2.5052631578947364e-05, + "loss": 0.0002, + "step": 9340 + }, + { + "epoch": 139.42, + "learning_rate": 2.501754385964912e-05, + "loss": 0.0002, + "step": 9341 + }, + { + "epoch": 139.43, + "learning_rate": 2.4982456140350872e-05, + "loss": 0.0002, + "step": 9342 + }, + { + "epoch": 139.45, + "learning_rate": 2.494736842105263e-05, + "loss": 0.0002, + "step": 9343 + }, + { + "epoch": 139.46, + "learning_rate": 2.4912280701754383e-05, + "loss": 0.0003, + "step": 9344 + }, + { + "epoch": 139.48, + "learning_rate": 2.487719298245614e-05, + "loss": 0.0002, + "step": 9345 + }, + { + "epoch": 139.49, + "learning_rate": 2.484210526315789e-05, + "loss": 0.0002, + "step": 9346 + }, + { + "epoch": 139.51, + "learning_rate": 2.4807017543859647e-05, + "loss": 0.0002, + "step": 9347 + }, + { + "epoch": 139.52, + "learning_rate": 2.47719298245614e-05, + "loss": 0.0002, + "step": 9348 + }, + { + "epoch": 139.54, + "learning_rate": 2.4736842105263158e-05, + "loss": 0.0002, + "step": 9349 + }, + { + "epoch": 139.55, + "learning_rate": 2.470175438596491e-05, + "loss": 0.0002, + "step": 9350 + }, + { + "epoch": 139.57, + "learning_rate": 2.4666666666666665e-05, + "loss": 0.0015, + "step": 9351 + }, + { + "epoch": 139.58, + "learning_rate": 2.4631578947368418e-05, + "loss": 0.0007, + "step": 9352 + }, + { + "epoch": 139.59, + "learning_rate": 2.4596491228070173e-05, + "loss": 0.0002, + "step": 9353 + }, + { + "epoch": 139.61, + "learning_rate": 2.4561403508771925e-05, + "loss": 0.0002, + "step": 9354 + }, + { + "epoch": 139.62, + "learning_rate": 2.452631578947368e-05, + "loss": 0.0002, + "step": 9355 + }, + { + "epoch": 139.64, + "learning_rate": 2.4491228070175436e-05, + "loss": 0.0002, + "step": 9356 + }, + { + "epoch": 139.65, + "learning_rate": 2.4456140350877192e-05, + "loss": 0.0002, + "step": 9357 + }, + { + "epoch": 139.67, + "learning_rate": 2.4421052631578944e-05, + "loss": 0.0002, + "step": 9358 + }, + { + "epoch": 139.68, + "learning_rate": 2.43859649122807e-05, + "loss": 0.0002, + "step": 9359 + }, + { + "epoch": 139.7, + "learning_rate": 2.4350877192982452e-05, + "loss": 0.0002, + "step": 9360 + }, + { + "epoch": 139.71, + "learning_rate": 2.4315789473684207e-05, + "loss": 0.0002, + "step": 9361 + }, + { + "epoch": 139.73, + "learning_rate": 2.4280701754385963e-05, + "loss": 0.0002, + "step": 9362 + }, + { + "epoch": 139.74, + "learning_rate": 2.424561403508772e-05, + "loss": 0.0002, + "step": 9363 + }, + { + "epoch": 139.76, + "learning_rate": 2.421052631578947e-05, + "loss": 0.0002, + "step": 9364 + }, + { + "epoch": 139.77, + "learning_rate": 2.4175438596491226e-05, + "loss": 0.0002, + "step": 9365 + }, + { + "epoch": 139.79, + "learning_rate": 2.414035087719298e-05, + "loss": 0.0251, + "step": 9366 + }, + { + "epoch": 139.8, + "learning_rate": 2.4105263157894734e-05, + "loss": 0.0002, + "step": 9367 + }, + { + "epoch": 139.82, + "learning_rate": 2.407017543859649e-05, + "loss": 0.0118, + "step": 9368 + }, + { + "epoch": 139.83, + "learning_rate": 2.4035087719298245e-05, + "loss": 0.2565, + "step": 9369 + }, + { + "epoch": 139.85, + "learning_rate": 2.3999999999999997e-05, + "loss": 0.0004, + "step": 9370 + }, + { + "epoch": 139.86, + "learning_rate": 2.3964912280701753e-05, + "loss": 0.0002, + "step": 9371 + }, + { + "epoch": 139.88, + "learning_rate": 2.3929824561403505e-05, + "loss": 0.0003, + "step": 9372 + }, + { + "epoch": 139.89, + "learning_rate": 2.389473684210526e-05, + "loss": 0.0002, + "step": 9373 + }, + { + "epoch": 139.91, + "learning_rate": 2.3859649122807016e-05, + "loss": 0.0033, + "step": 9374 + }, + { + "epoch": 139.92, + "learning_rate": 2.382456140350877e-05, + "loss": 0.0002, + "step": 9375 + }, + { + "epoch": 139.94, + "learning_rate": 2.3789473684210524e-05, + "loss": 0.0002, + "step": 9376 + }, + { + "epoch": 139.95, + "learning_rate": 2.375438596491228e-05, + "loss": 0.0002, + "step": 9377 + }, + { + "epoch": 139.97, + "learning_rate": 2.371929824561403e-05, + "loss": 0.0001, + "step": 9378 + }, + { + "epoch": 139.98, + "learning_rate": 2.3684210526315787e-05, + "loss": 0.0002, + "step": 9379 + }, + { + "epoch": 140.0, + "learning_rate": 2.364912280701754e-05, + "loss": 0.0002, + "step": 9380 + }, + { + "epoch": 140.01, + "learning_rate": 2.3614035087719298e-05, + "loss": 0.0002, + "step": 9381 + }, + { + "epoch": 140.03, + "learning_rate": 2.357894736842105e-05, + "loss": 0.0002, + "step": 9382 + }, + { + "epoch": 140.04, + "learning_rate": 2.3543859649122806e-05, + "loss": 0.0002, + "step": 9383 + }, + { + "epoch": 140.06, + "learning_rate": 2.3508771929824558e-05, + "loss": 0.0002, + "step": 9384 + }, + { + "epoch": 140.07, + "learning_rate": 2.3473684210526314e-05, + "loss": 0.0032, + "step": 9385 + }, + { + "epoch": 140.09, + "learning_rate": 2.3438596491228066e-05, + "loss": 0.0002, + "step": 9386 + }, + { + "epoch": 140.1, + "learning_rate": 2.3403508771929825e-05, + "loss": 0.0002, + "step": 9387 + }, + { + "epoch": 140.12, + "learning_rate": 2.3368421052631577e-05, + "loss": 0.0002, + "step": 9388 + }, + { + "epoch": 140.13, + "learning_rate": 2.3333333333333332e-05, + "loss": 0.0005, + "step": 9389 + }, + { + "epoch": 140.15, + "learning_rate": 2.3298245614035085e-05, + "loss": 0.0003, + "step": 9390 + }, + { + "epoch": 140.16, + "learning_rate": 2.326315789473684e-05, + "loss": 0.0002, + "step": 9391 + }, + { + "epoch": 140.18, + "learning_rate": 2.3228070175438592e-05, + "loss": 0.0007, + "step": 9392 + }, + { + "epoch": 140.19, + "learning_rate": 2.319298245614035e-05, + "loss": 0.0002, + "step": 9393 + }, + { + "epoch": 140.21, + "learning_rate": 2.3157894736842103e-05, + "loss": 0.0003, + "step": 9394 + }, + { + "epoch": 140.22, + "learning_rate": 2.312280701754386e-05, + "loss": 0.0002, + "step": 9395 + }, + { + "epoch": 140.24, + "learning_rate": 2.308771929824561e-05, + "loss": 0.0014, + "step": 9396 + }, + { + "epoch": 140.25, + "learning_rate": 2.3052631578947367e-05, + "loss": 0.0002, + "step": 9397 + }, + { + "epoch": 140.27, + "learning_rate": 2.301754385964912e-05, + "loss": 0.0002, + "step": 9398 + }, + { + "epoch": 140.28, + "learning_rate": 2.2982456140350878e-05, + "loss": 0.0002, + "step": 9399 + }, + { + "epoch": 140.3, + "learning_rate": 2.294736842105263e-05, + "loss": 0.0002, + "step": 9400 + }, + { + "epoch": 140.3, + "eval_accuracy": 0.8935389133627019, + "eval_f1": 0.8931026363559365, + "eval_loss": 0.5969720482826233, + "eval_runtime": 345.1416, + "eval_samples_per_second": 11.839, + "eval_steps_per_second": 0.742, + "step": 9400 + }, + { + "epoch": 140.31, + "learning_rate": 2.2912280701754386e-05, + "loss": 0.0002, + "step": 9401 + }, + { + "epoch": 140.33, + "learning_rate": 2.2877192982456138e-05, + "loss": 0.0002, + "step": 9402 + }, + { + "epoch": 140.34, + "learning_rate": 2.2842105263157893e-05, + "loss": 0.0182, + "step": 9403 + }, + { + "epoch": 140.36, + "learning_rate": 2.2807017543859645e-05, + "loss": 0.1147, + "step": 9404 + }, + { + "epoch": 140.37, + "learning_rate": 2.27719298245614e-05, + "loss": 0.0002, + "step": 9405 + }, + { + "epoch": 140.39, + "learning_rate": 2.2736842105263157e-05, + "loss": 0.0019, + "step": 9406 + }, + { + "epoch": 140.4, + "learning_rate": 2.2701754385964912e-05, + "loss": 0.0002, + "step": 9407 + }, + { + "epoch": 140.42, + "learning_rate": 2.2666666666666664e-05, + "loss": 0.0543, + "step": 9408 + }, + { + "epoch": 140.43, + "learning_rate": 2.263157894736842e-05, + "loss": 0.0002, + "step": 9409 + }, + { + "epoch": 140.45, + "learning_rate": 2.2596491228070172e-05, + "loss": 0.0002, + "step": 9410 + }, + { + "epoch": 140.46, + "learning_rate": 2.2561403508771928e-05, + "loss": 0.0002, + "step": 9411 + }, + { + "epoch": 140.48, + "learning_rate": 2.2526315789473683e-05, + "loss": 0.0002, + "step": 9412 + }, + { + "epoch": 140.49, + "learning_rate": 2.249122807017544e-05, + "loss": 0.0002, + "step": 9413 + }, + { + "epoch": 140.51, + "learning_rate": 2.245614035087719e-05, + "loss": 0.0002, + "step": 9414 + }, + { + "epoch": 140.52, + "learning_rate": 2.2421052631578946e-05, + "loss": 0.0002, + "step": 9415 + }, + { + "epoch": 140.54, + "learning_rate": 2.23859649122807e-05, + "loss": 0.0001, + "step": 9416 + }, + { + "epoch": 140.55, + "learning_rate": 2.2350877192982454e-05, + "loss": 0.0002, + "step": 9417 + }, + { + "epoch": 140.57, + "learning_rate": 2.231578947368421e-05, + "loss": 0.0002, + "step": 9418 + }, + { + "epoch": 140.58, + "learning_rate": 2.2280701754385965e-05, + "loss": 0.0002, + "step": 9419 + }, + { + "epoch": 140.59, + "learning_rate": 2.2245614035087717e-05, + "loss": 0.0002, + "step": 9420 + }, + { + "epoch": 140.61, + "learning_rate": 2.2210526315789473e-05, + "loss": 0.0002, + "step": 9421 + }, + { + "epoch": 140.62, + "learning_rate": 2.2175438596491225e-05, + "loss": 0.0002, + "step": 9422 + }, + { + "epoch": 140.64, + "learning_rate": 2.214035087719298e-05, + "loss": 0.0002, + "step": 9423 + }, + { + "epoch": 140.65, + "learning_rate": 2.2105263157894733e-05, + "loss": 0.0002, + "step": 9424 + }, + { + "epoch": 140.67, + "learning_rate": 2.2070175438596492e-05, + "loss": 0.0002, + "step": 9425 + }, + { + "epoch": 140.68, + "learning_rate": 2.2035087719298244e-05, + "loss": 0.0002, + "step": 9426 + }, + { + "epoch": 140.7, + "learning_rate": 2.2e-05, + "loss": 0.0003, + "step": 9427 + }, + { + "epoch": 140.71, + "learning_rate": 2.196491228070175e-05, + "loss": 0.0002, + "step": 9428 + }, + { + "epoch": 140.73, + "learning_rate": 2.1929824561403507e-05, + "loss": 0.0002, + "step": 9429 + }, + { + "epoch": 140.74, + "learning_rate": 2.189473684210526e-05, + "loss": 0.0002, + "step": 9430 + }, + { + "epoch": 140.76, + "learning_rate": 2.1859649122807018e-05, + "loss": 0.0003, + "step": 9431 + }, + { + "epoch": 140.77, + "learning_rate": 2.182456140350877e-05, + "loss": 0.0002, + "step": 9432 + }, + { + "epoch": 140.79, + "learning_rate": 2.1789473684210526e-05, + "loss": 0.0002, + "step": 9433 + }, + { + "epoch": 140.8, + "learning_rate": 2.1754385964912278e-05, + "loss": 0.0064, + "step": 9434 + }, + { + "epoch": 140.82, + "learning_rate": 2.1719298245614034e-05, + "loss": 0.0004, + "step": 9435 + }, + { + "epoch": 140.83, + "learning_rate": 2.1684210526315786e-05, + "loss": 0.0002, + "step": 9436 + }, + { + "epoch": 140.85, + "learning_rate": 2.1649122807017545e-05, + "loss": 0.0002, + "step": 9437 + }, + { + "epoch": 140.86, + "learning_rate": 2.1614035087719297e-05, + "loss": 0.0002, + "step": 9438 + }, + { + "epoch": 140.88, + "learning_rate": 2.1578947368421053e-05, + "loss": 0.0001, + "step": 9439 + }, + { + "epoch": 140.89, + "learning_rate": 2.1543859649122805e-05, + "loss": 0.0014, + "step": 9440 + }, + { + "epoch": 140.91, + "learning_rate": 2.150877192982456e-05, + "loss": 0.0654, + "step": 9441 + }, + { + "epoch": 140.92, + "learning_rate": 2.1473684210526312e-05, + "loss": 0.0002, + "step": 9442 + }, + { + "epoch": 140.94, + "learning_rate": 2.143859649122807e-05, + "loss": 0.0004, + "step": 9443 + }, + { + "epoch": 140.95, + "learning_rate": 2.1403508771929824e-05, + "loss": 0.0003, + "step": 9444 + }, + { + "epoch": 140.97, + "learning_rate": 2.136842105263158e-05, + "loss": 0.0002, + "step": 9445 + }, + { + "epoch": 140.98, + "learning_rate": 2.133333333333333e-05, + "loss": 0.0002, + "step": 9446 + }, + { + "epoch": 141.0, + "learning_rate": 2.1298245614035087e-05, + "loss": 0.0002, + "step": 9447 + }, + { + "epoch": 141.01, + "learning_rate": 2.126315789473684e-05, + "loss": 0.0003, + "step": 9448 + }, + { + "epoch": 141.03, + "learning_rate": 2.122807017543859e-05, + "loss": 0.0002, + "step": 9449 + }, + { + "epoch": 141.04, + "learning_rate": 2.119298245614035e-05, + "loss": 0.0002, + "step": 9450 + }, + { + "epoch": 141.06, + "learning_rate": 2.1157894736842106e-05, + "loss": 0.0002, + "step": 9451 + }, + { + "epoch": 141.07, + "learning_rate": 2.1122807017543858e-05, + "loss": 0.0002, + "step": 9452 + }, + { + "epoch": 141.09, + "learning_rate": 2.1087719298245613e-05, + "loss": 0.1253, + "step": 9453 + }, + { + "epoch": 141.1, + "learning_rate": 2.1052631578947366e-05, + "loss": 0.0002, + "step": 9454 + }, + { + "epoch": 141.12, + "learning_rate": 2.1017543859649118e-05, + "loss": 0.0002, + "step": 9455 + }, + { + "epoch": 141.13, + "learning_rate": 2.0982456140350877e-05, + "loss": 0.0002, + "step": 9456 + }, + { + "epoch": 141.15, + "learning_rate": 2.0947368421052632e-05, + "loss": 0.1106, + "step": 9457 + }, + { + "epoch": 141.16, + "learning_rate": 2.0912280701754384e-05, + "loss": 0.0002, + "step": 9458 + }, + { + "epoch": 141.18, + "learning_rate": 2.087719298245614e-05, + "loss": 0.0002, + "step": 9459 + }, + { + "epoch": 141.19, + "learning_rate": 2.0842105263157892e-05, + "loss": 0.0002, + "step": 9460 + }, + { + "epoch": 141.21, + "learning_rate": 2.0807017543859644e-05, + "loss": 0.0002, + "step": 9461 + }, + { + "epoch": 141.22, + "learning_rate": 2.0771929824561403e-05, + "loss": 0.0002, + "step": 9462 + }, + { + "epoch": 141.24, + "learning_rate": 2.073684210526316e-05, + "loss": 0.0007, + "step": 9463 + }, + { + "epoch": 141.25, + "learning_rate": 2.070175438596491e-05, + "loss": 0.0002, + "step": 9464 + }, + { + "epoch": 141.27, + "learning_rate": 2.0666666666666663e-05, + "loss": 0.0003, + "step": 9465 + }, + { + "epoch": 141.28, + "learning_rate": 2.063157894736842e-05, + "loss": 0.0561, + "step": 9466 + }, + { + "epoch": 141.3, + "learning_rate": 2.059649122807017e-05, + "loss": 0.0002, + "step": 9467 + }, + { + "epoch": 141.31, + "learning_rate": 2.056140350877193e-05, + "loss": 0.0003, + "step": 9468 + }, + { + "epoch": 141.33, + "learning_rate": 2.0526315789473685e-05, + "loss": 0.0002, + "step": 9469 + }, + { + "epoch": 141.34, + "learning_rate": 2.0491228070175437e-05, + "loss": 0.0002, + "step": 9470 + }, + { + "epoch": 141.36, + "learning_rate": 2.045614035087719e-05, + "loss": 0.0002, + "step": 9471 + }, + { + "epoch": 141.37, + "learning_rate": 2.0421052631578945e-05, + "loss": 0.0002, + "step": 9472 + }, + { + "epoch": 141.39, + "learning_rate": 2.0385964912280697e-05, + "loss": 0.0002, + "step": 9473 + }, + { + "epoch": 141.4, + "learning_rate": 2.0350877192982453e-05, + "loss": 0.0002, + "step": 9474 + }, + { + "epoch": 141.42, + "learning_rate": 2.0315789473684212e-05, + "loss": 0.0002, + "step": 9475 + }, + { + "epoch": 141.43, + "learning_rate": 2.0280701754385964e-05, + "loss": 0.0002, + "step": 9476 + }, + { + "epoch": 141.45, + "learning_rate": 2.0245614035087716e-05, + "loss": 0.0002, + "step": 9477 + }, + { + "epoch": 141.46, + "learning_rate": 2.0210526315789472e-05, + "loss": 0.0018, + "step": 9478 + }, + { + "epoch": 141.48, + "learning_rate": 2.0175438596491224e-05, + "loss": 0.0002, + "step": 9479 + }, + { + "epoch": 141.49, + "learning_rate": 2.014035087719298e-05, + "loss": 0.0002, + "step": 9480 + }, + { + "epoch": 141.51, + "learning_rate": 2.010526315789474e-05, + "loss": 0.0002, + "step": 9481 + }, + { + "epoch": 141.52, + "learning_rate": 2.007017543859649e-05, + "loss": 0.0002, + "step": 9482 + }, + { + "epoch": 141.54, + "learning_rate": 2.0035087719298243e-05, + "loss": 0.0002, + "step": 9483 + }, + { + "epoch": 141.55, + "learning_rate": 1.9999999999999998e-05, + "loss": 0.0002, + "step": 9484 + }, + { + "epoch": 141.57, + "learning_rate": 1.996491228070175e-05, + "loss": 0.0002, + "step": 9485 + }, + { + "epoch": 141.58, + "learning_rate": 1.9929824561403506e-05, + "loss": 0.0002, + "step": 9486 + }, + { + "epoch": 141.59, + "learning_rate": 1.9894736842105265e-05, + "loss": 0.0002, + "step": 9487 + }, + { + "epoch": 141.61, + "learning_rate": 1.9859649122807017e-05, + "loss": 0.0002, + "step": 9488 + }, + { + "epoch": 141.62, + "learning_rate": 1.982456140350877e-05, + "loss": 0.0002, + "step": 9489 + }, + { + "epoch": 141.64, + "learning_rate": 1.9789473684210525e-05, + "loss": 0.0002, + "step": 9490 + }, + { + "epoch": 141.65, + "learning_rate": 1.9754385964912277e-05, + "loss": 0.0002, + "step": 9491 + }, + { + "epoch": 141.67, + "learning_rate": 1.9719298245614033e-05, + "loss": 0.0002, + "step": 9492 + }, + { + "epoch": 141.68, + "learning_rate": 1.968421052631579e-05, + "loss": 0.0002, + "step": 9493 + }, + { + "epoch": 141.7, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.0002, + "step": 9494 + }, + { + "epoch": 141.71, + "learning_rate": 1.9614035087719296e-05, + "loss": 0.0002, + "step": 9495 + }, + { + "epoch": 141.73, + "learning_rate": 1.957894736842105e-05, + "loss": 0.0002, + "step": 9496 + }, + { + "epoch": 141.74, + "learning_rate": 1.9543859649122804e-05, + "loss": 0.0002, + "step": 9497 + }, + { + "epoch": 141.76, + "learning_rate": 1.950877192982456e-05, + "loss": 0.0005, + "step": 9498 + }, + { + "epoch": 141.77, + "learning_rate": 1.947368421052631e-05, + "loss": 0.0002, + "step": 9499 + }, + { + "epoch": 141.79, + "learning_rate": 1.943859649122807e-05, + "loss": 0.0002, + "step": 9500 + }, + { + "epoch": 141.8, + "learning_rate": 1.9403508771929822e-05, + "loss": 0.0002, + "step": 9501 + }, + { + "epoch": 141.82, + "learning_rate": 1.9368421052631578e-05, + "loss": 0.001, + "step": 9502 + }, + { + "epoch": 141.83, + "learning_rate": 1.933333333333333e-05, + "loss": 0.0002, + "step": 9503 + }, + { + "epoch": 141.85, + "learning_rate": 1.9298245614035086e-05, + "loss": 0.0014, + "step": 9504 + }, + { + "epoch": 141.86, + "learning_rate": 1.9263157894736838e-05, + "loss": 0.0002, + "step": 9505 + }, + { + "epoch": 141.88, + "learning_rate": 1.9228070175438597e-05, + "loss": 0.0002, + "step": 9506 + }, + { + "epoch": 141.89, + "learning_rate": 1.919298245614035e-05, + "loss": 0.0002, + "step": 9507 + }, + { + "epoch": 141.91, + "learning_rate": 1.9157894736842104e-05, + "loss": 0.0002, + "step": 9508 + }, + { + "epoch": 141.92, + "learning_rate": 1.9122807017543857e-05, + "loss": 0.0001, + "step": 9509 + }, + { + "epoch": 141.94, + "learning_rate": 1.9087719298245612e-05, + "loss": 0.0002, + "step": 9510 + }, + { + "epoch": 141.95, + "learning_rate": 1.9052631578947364e-05, + "loss": 0.0002, + "step": 9511 + }, + { + "epoch": 141.97, + "learning_rate": 1.9017543859649123e-05, + "loss": 0.0002, + "step": 9512 + }, + { + "epoch": 141.98, + "learning_rate": 1.8982456140350875e-05, + "loss": 0.0002, + "step": 9513 + }, + { + "epoch": 142.0, + "learning_rate": 1.894736842105263e-05, + "loss": 0.0014, + "step": 9514 + }, + { + "epoch": 142.01, + "learning_rate": 1.8912280701754383e-05, + "loss": 0.0002, + "step": 9515 + }, + { + "epoch": 142.03, + "learning_rate": 1.887719298245614e-05, + "loss": 0.0002, + "step": 9516 + }, + { + "epoch": 142.04, + "learning_rate": 1.884210526315789e-05, + "loss": 0.0002, + "step": 9517 + }, + { + "epoch": 142.06, + "learning_rate": 1.8807017543859646e-05, + "loss": 0.0002, + "step": 9518 + }, + { + "epoch": 142.07, + "learning_rate": 1.8771929824561402e-05, + "loss": 0.0002, + "step": 9519 + }, + { + "epoch": 142.09, + "learning_rate": 1.8736842105263158e-05, + "loss": 0.0003, + "step": 9520 + }, + { + "epoch": 142.1, + "learning_rate": 1.870175438596491e-05, + "loss": 0.0041, + "step": 9521 + }, + { + "epoch": 142.12, + "learning_rate": 1.8666666666666665e-05, + "loss": 0.0002, + "step": 9522 + }, + { + "epoch": 142.13, + "learning_rate": 1.863157894736842e-05, + "loss": 0.0011, + "step": 9523 + }, + { + "epoch": 142.15, + "learning_rate": 1.8596491228070173e-05, + "loss": 0.0002, + "step": 9524 + }, + { + "epoch": 142.16, + "learning_rate": 1.856140350877193e-05, + "loss": 0.0002, + "step": 9525 + }, + { + "epoch": 142.18, + "learning_rate": 1.8526315789473684e-05, + "loss": 0.0002, + "step": 9526 + }, + { + "epoch": 142.19, + "learning_rate": 1.8491228070175436e-05, + "loss": 0.0002, + "step": 9527 + }, + { + "epoch": 142.21, + "learning_rate": 1.8456140350877192e-05, + "loss": 0.0002, + "step": 9528 + }, + { + "epoch": 142.22, + "learning_rate": 1.8421052631578944e-05, + "loss": 0.0002, + "step": 9529 + }, + { + "epoch": 142.24, + "learning_rate": 1.83859649122807e-05, + "loss": 0.0003, + "step": 9530 + }, + { + "epoch": 142.25, + "learning_rate": 1.8350877192982455e-05, + "loss": 0.008, + "step": 9531 + }, + { + "epoch": 142.27, + "learning_rate": 1.8315789473684207e-05, + "loss": 0.0002, + "step": 9532 + }, + { + "epoch": 142.28, + "learning_rate": 1.8280701754385963e-05, + "loss": 0.0002, + "step": 9533 + }, + { + "epoch": 142.3, + "learning_rate": 1.824561403508772e-05, + "loss": 0.0002, + "step": 9534 + }, + { + "epoch": 142.31, + "learning_rate": 1.821052631578947e-05, + "loss": 0.0002, + "step": 9535 + }, + { + "epoch": 142.33, + "learning_rate": 1.8175438596491226e-05, + "loss": 0.0004, + "step": 9536 + }, + { + "epoch": 142.34, + "learning_rate": 1.814035087719298e-05, + "loss": 0.0002, + "step": 9537 + }, + { + "epoch": 142.36, + "learning_rate": 1.8105263157894734e-05, + "loss": 0.0004, + "step": 9538 + }, + { + "epoch": 142.37, + "learning_rate": 1.807017543859649e-05, + "loss": 0.0002, + "step": 9539 + }, + { + "epoch": 142.39, + "learning_rate": 1.8035087719298245e-05, + "loss": 0.0004, + "step": 9540 + }, + { + "epoch": 142.4, + "learning_rate": 1.7999999999999997e-05, + "loss": 0.0002, + "step": 9541 + }, + { + "epoch": 142.42, + "learning_rate": 1.7964912280701753e-05, + "loss": 0.0002, + "step": 9542 + }, + { + "epoch": 142.43, + "learning_rate": 1.7929824561403508e-05, + "loss": 0.0002, + "step": 9543 + }, + { + "epoch": 142.45, + "learning_rate": 1.789473684210526e-05, + "loss": 0.0058, + "step": 9544 + }, + { + "epoch": 142.46, + "learning_rate": 1.7859649122807016e-05, + "loss": 0.0002, + "step": 9545 + }, + { + "epoch": 142.48, + "learning_rate": 1.782456140350877e-05, + "loss": 0.0003, + "step": 9546 + }, + { + "epoch": 142.49, + "learning_rate": 1.7789473684210524e-05, + "loss": 0.0002, + "step": 9547 + }, + { + "epoch": 142.51, + "learning_rate": 1.775438596491228e-05, + "loss": 0.0005, + "step": 9548 + }, + { + "epoch": 142.52, + "learning_rate": 1.7719298245614035e-05, + "loss": 0.0001, + "step": 9549 + }, + { + "epoch": 142.54, + "learning_rate": 1.7684210526315787e-05, + "loss": 0.0003, + "step": 9550 + }, + { + "epoch": 142.55, + "learning_rate": 1.7649122807017542e-05, + "loss": 0.0002, + "step": 9551 + }, + { + "epoch": 142.57, + "learning_rate": 1.7614035087719298e-05, + "loss": 0.0003, + "step": 9552 + }, + { + "epoch": 142.58, + "learning_rate": 1.757894736842105e-05, + "loss": 0.0002, + "step": 9553 + }, + { + "epoch": 142.59, + "learning_rate": 1.7543859649122806e-05, + "loss": 0.0002, + "step": 9554 + }, + { + "epoch": 142.61, + "learning_rate": 1.750877192982456e-05, + "loss": 0.0002, + "step": 9555 + }, + { + "epoch": 142.62, + "learning_rate": 1.7473684210526313e-05, + "loss": 0.0585, + "step": 9556 + }, + { + "epoch": 142.64, + "learning_rate": 1.743859649122807e-05, + "loss": 0.0004, + "step": 9557 + }, + { + "epoch": 142.65, + "learning_rate": 1.7403508771929825e-05, + "loss": 0.0002, + "step": 9558 + }, + { + "epoch": 142.67, + "learning_rate": 1.7368421052631577e-05, + "loss": 0.0002, + "step": 9559 + }, + { + "epoch": 142.68, + "learning_rate": 1.7333333333333332e-05, + "loss": 0.0002, + "step": 9560 + }, + { + "epoch": 142.7, + "learning_rate": 1.7298245614035088e-05, + "loss": 0.0002, + "step": 9561 + }, + { + "epoch": 142.71, + "learning_rate": 1.726315789473684e-05, + "loss": 0.0002, + "step": 9562 + }, + { + "epoch": 142.73, + "learning_rate": 1.7228070175438596e-05, + "loss": 0.0002, + "step": 9563 + }, + { + "epoch": 142.74, + "learning_rate": 1.719298245614035e-05, + "loss": 0.0002, + "step": 9564 + }, + { + "epoch": 142.76, + "learning_rate": 1.7157894736842103e-05, + "loss": 0.0002, + "step": 9565 + }, + { + "epoch": 142.77, + "learning_rate": 1.712280701754386e-05, + "loss": 0.0328, + "step": 9566 + }, + { + "epoch": 142.79, + "learning_rate": 1.7087719298245614e-05, + "loss": 0.0002, + "step": 9567 + }, + { + "epoch": 142.8, + "learning_rate": 1.7052631578947367e-05, + "loss": 0.0002, + "step": 9568 + }, + { + "epoch": 142.82, + "learning_rate": 1.7017543859649122e-05, + "loss": 0.0002, + "step": 9569 + }, + { + "epoch": 142.83, + "learning_rate": 1.6982456140350878e-05, + "loss": 0.0002, + "step": 9570 + }, + { + "epoch": 142.85, + "learning_rate": 1.694736842105263e-05, + "loss": 0.0002, + "step": 9571 + }, + { + "epoch": 142.86, + "learning_rate": 1.6912280701754385e-05, + "loss": 0.0002, + "step": 9572 + }, + { + "epoch": 142.88, + "learning_rate": 1.687719298245614e-05, + "loss": 0.0002, + "step": 9573 + }, + { + "epoch": 142.89, + "learning_rate": 1.6842105263157893e-05, + "loss": 0.0001, + "step": 9574 + }, + { + "epoch": 142.91, + "learning_rate": 1.680701754385965e-05, + "loss": 0.0001, + "step": 9575 + }, + { + "epoch": 142.92, + "learning_rate": 1.67719298245614e-05, + "loss": 0.0002, + "step": 9576 + }, + { + "epoch": 142.94, + "learning_rate": 1.6736842105263156e-05, + "loss": 0.0002, + "step": 9577 + }, + { + "epoch": 142.95, + "learning_rate": 1.6701754385964912e-05, + "loss": 0.0002, + "step": 9578 + }, + { + "epoch": 142.97, + "learning_rate": 1.6666666666666664e-05, + "loss": 0.0002, + "step": 9579 + }, + { + "epoch": 142.98, + "learning_rate": 1.663157894736842e-05, + "loss": 0.0002, + "step": 9580 + }, + { + "epoch": 143.0, + "learning_rate": 1.6596491228070175e-05, + "loss": 0.0002, + "step": 9581 + }, + { + "epoch": 143.01, + "learning_rate": 1.6561403508771927e-05, + "loss": 0.0002, + "step": 9582 + }, + { + "epoch": 143.03, + "learning_rate": 1.6526315789473683e-05, + "loss": 0.0002, + "step": 9583 + }, + { + "epoch": 143.04, + "learning_rate": 1.649122807017544e-05, + "loss": 0.0018, + "step": 9584 + }, + { + "epoch": 143.06, + "learning_rate": 1.645614035087719e-05, + "loss": 0.0002, + "step": 9585 + }, + { + "epoch": 143.07, + "learning_rate": 1.6421052631578946e-05, + "loss": 0.0169, + "step": 9586 + }, + { + "epoch": 143.09, + "learning_rate": 1.6385964912280702e-05, + "loss": 0.0004, + "step": 9587 + }, + { + "epoch": 143.1, + "learning_rate": 1.6350877192982454e-05, + "loss": 0.0002, + "step": 9588 + }, + { + "epoch": 143.12, + "learning_rate": 1.631578947368421e-05, + "loss": 0.0002, + "step": 9589 + }, + { + "epoch": 143.13, + "learning_rate": 1.6280701754385965e-05, + "loss": 0.0002, + "step": 9590 + }, + { + "epoch": 143.15, + "learning_rate": 1.6245614035087717e-05, + "loss": 0.0002, + "step": 9591 + }, + { + "epoch": 143.16, + "learning_rate": 1.6210526315789473e-05, + "loss": 0.0002, + "step": 9592 + }, + { + "epoch": 143.18, + "learning_rate": 1.6175438596491228e-05, + "loss": 0.0002, + "step": 9593 + }, + { + "epoch": 143.19, + "learning_rate": 1.614035087719298e-05, + "loss": 0.0002, + "step": 9594 + }, + { + "epoch": 143.21, + "learning_rate": 1.6105263157894736e-05, + "loss": 0.0002, + "step": 9595 + }, + { + "epoch": 143.22, + "learning_rate": 1.607017543859649e-05, + "loss": 0.0002, + "step": 9596 + }, + { + "epoch": 143.24, + "learning_rate": 1.6035087719298244e-05, + "loss": 0.0865, + "step": 9597 + }, + { + "epoch": 143.25, + "learning_rate": 1.6e-05, + "loss": 0.0002, + "step": 9598 + }, + { + "epoch": 143.27, + "learning_rate": 1.5964912280701755e-05, + "loss": 0.0003, + "step": 9599 + }, + { + "epoch": 143.28, + "learning_rate": 1.5929824561403507e-05, + "loss": 0.0002, + "step": 9600 + }, + { + "epoch": 143.28, + "eval_accuracy": 0.8913362701908958, + "eval_f1": 0.8906088969378417, + "eval_loss": 0.6095054745674133, + "eval_runtime": 344.9289, + "eval_samples_per_second": 11.846, + "eval_steps_per_second": 0.742, + "step": 9600 + }, + { + "epoch": 143.3, + "learning_rate": 1.589473684210526e-05, + "loss": 0.0002, + "step": 9601 + }, + { + "epoch": 143.31, + "learning_rate": 1.5859649122807018e-05, + "loss": 0.0002, + "step": 9602 + }, + { + "epoch": 143.33, + "learning_rate": 1.582456140350877e-05, + "loss": 0.0002, + "step": 9603 + }, + { + "epoch": 143.34, + "learning_rate": 1.5789473684210522e-05, + "loss": 0.0002, + "step": 9604 + }, + { + "epoch": 143.36, + "learning_rate": 1.575438596491228e-05, + "loss": 0.0002, + "step": 9605 + }, + { + "epoch": 143.37, + "learning_rate": 1.5719298245614034e-05, + "loss": 0.0002, + "step": 9606 + }, + { + "epoch": 143.39, + "learning_rate": 1.5684210526315786e-05, + "loss": 0.0002, + "step": 9607 + }, + { + "epoch": 143.4, + "learning_rate": 1.5649122807017545e-05, + "loss": 0.0006, + "step": 9608 + }, + { + "epoch": 143.42, + "learning_rate": 1.5614035087719297e-05, + "loss": 0.0002, + "step": 9609 + }, + { + "epoch": 143.43, + "learning_rate": 1.557894736842105e-05, + "loss": 0.0002, + "step": 9610 + }, + { + "epoch": 143.45, + "learning_rate": 1.5543859649122808e-05, + "loss": 0.1441, + "step": 9611 + }, + { + "epoch": 143.46, + "learning_rate": 1.550877192982456e-05, + "loss": 0.0002, + "step": 9612 + }, + { + "epoch": 143.48, + "learning_rate": 1.5473684210526312e-05, + "loss": 0.0002, + "step": 9613 + }, + { + "epoch": 143.49, + "learning_rate": 1.543859649122807e-05, + "loss": 0.0002, + "step": 9614 + }, + { + "epoch": 143.51, + "learning_rate": 1.5403508771929823e-05, + "loss": 0.0002, + "step": 9615 + }, + { + "epoch": 143.52, + "learning_rate": 1.5368421052631576e-05, + "loss": 0.0002, + "step": 9616 + }, + { + "epoch": 143.54, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.0002, + "step": 9617 + }, + { + "epoch": 143.55, + "learning_rate": 1.5298245614035087e-05, + "loss": 0.0002, + "step": 9618 + }, + { + "epoch": 143.57, + "learning_rate": 1.526315789473684e-05, + "loss": 0.0002, + "step": 9619 + }, + { + "epoch": 143.58, + "learning_rate": 1.5228070175438596e-05, + "loss": 0.0002, + "step": 9620 + }, + { + "epoch": 143.59, + "learning_rate": 1.519298245614035e-05, + "loss": 0.0003, + "step": 9621 + }, + { + "epoch": 143.61, + "learning_rate": 1.5157894736842104e-05, + "loss": 0.0002, + "step": 9622 + }, + { + "epoch": 143.62, + "learning_rate": 1.5122807017543858e-05, + "loss": 0.0003, + "step": 9623 + }, + { + "epoch": 143.64, + "learning_rate": 1.5087719298245613e-05, + "loss": 0.0002, + "step": 9624 + }, + { + "epoch": 143.65, + "learning_rate": 1.5052631578947367e-05, + "loss": 0.0002, + "step": 9625 + }, + { + "epoch": 143.67, + "learning_rate": 1.5017543859649121e-05, + "loss": 0.0002, + "step": 9626 + }, + { + "epoch": 143.68, + "learning_rate": 1.4982456140350876e-05, + "loss": 0.0007, + "step": 9627 + }, + { + "epoch": 143.7, + "learning_rate": 1.494736842105263e-05, + "loss": 0.0002, + "step": 9628 + }, + { + "epoch": 143.71, + "learning_rate": 1.4912280701754384e-05, + "loss": 0.0002, + "step": 9629 + }, + { + "epoch": 143.73, + "learning_rate": 1.487719298245614e-05, + "loss": 0.0002, + "step": 9630 + }, + { + "epoch": 143.74, + "learning_rate": 1.4842105263157894e-05, + "loss": 0.0002, + "step": 9631 + }, + { + "epoch": 143.76, + "learning_rate": 1.4807017543859647e-05, + "loss": 0.0002, + "step": 9632 + }, + { + "epoch": 143.77, + "learning_rate": 1.4771929824561403e-05, + "loss": 0.0002, + "step": 9633 + }, + { + "epoch": 143.79, + "learning_rate": 1.4736842105263157e-05, + "loss": 0.0002, + "step": 9634 + }, + { + "epoch": 143.8, + "learning_rate": 1.470175438596491e-05, + "loss": 0.0003, + "step": 9635 + }, + { + "epoch": 143.82, + "learning_rate": 1.4666666666666666e-05, + "loss": 0.1042, + "step": 9636 + }, + { + "epoch": 143.83, + "learning_rate": 1.463157894736842e-05, + "loss": 0.0003, + "step": 9637 + }, + { + "epoch": 143.85, + "learning_rate": 1.4596491228070174e-05, + "loss": 0.0002, + "step": 9638 + }, + { + "epoch": 143.86, + "learning_rate": 1.456140350877193e-05, + "loss": 0.0002, + "step": 9639 + }, + { + "epoch": 143.88, + "learning_rate": 1.4526315789473683e-05, + "loss": 0.0002, + "step": 9640 + }, + { + "epoch": 143.89, + "learning_rate": 1.4491228070175437e-05, + "loss": 0.0002, + "step": 9641 + }, + { + "epoch": 143.91, + "learning_rate": 1.4456140350877193e-05, + "loss": 0.0002, + "step": 9642 + }, + { + "epoch": 143.92, + "learning_rate": 1.4421052631578947e-05, + "loss": 0.0002, + "step": 9643 + }, + { + "epoch": 143.94, + "learning_rate": 1.43859649122807e-05, + "loss": 0.0001, + "step": 9644 + }, + { + "epoch": 143.95, + "learning_rate": 1.4350877192982456e-05, + "loss": 0.0002, + "step": 9645 + }, + { + "epoch": 143.97, + "learning_rate": 1.431578947368421e-05, + "loss": 0.0002, + "step": 9646 + }, + { + "epoch": 143.98, + "learning_rate": 1.4280701754385964e-05, + "loss": 0.0945, + "step": 9647 + }, + { + "epoch": 144.0, + "learning_rate": 1.4245614035087718e-05, + "loss": 0.0022, + "step": 9648 + }, + { + "epoch": 144.01, + "learning_rate": 1.4210526315789473e-05, + "loss": 0.0151, + "step": 9649 + }, + { + "epoch": 144.03, + "learning_rate": 1.4175438596491227e-05, + "loss": 0.0002, + "step": 9650 + }, + { + "epoch": 144.04, + "learning_rate": 1.4140350877192981e-05, + "loss": 0.0002, + "step": 9651 + }, + { + "epoch": 144.06, + "learning_rate": 1.4105263157894737e-05, + "loss": 0.0002, + "step": 9652 + }, + { + "epoch": 144.07, + "learning_rate": 1.407017543859649e-05, + "loss": 0.0001, + "step": 9653 + }, + { + "epoch": 144.09, + "learning_rate": 1.4035087719298244e-05, + "loss": 0.0002, + "step": 9654 + }, + { + "epoch": 144.1, + "learning_rate": 1.4e-05, + "loss": 0.0003, + "step": 9655 + }, + { + "epoch": 144.12, + "learning_rate": 1.3964912280701754e-05, + "loss": 0.0001, + "step": 9656 + }, + { + "epoch": 144.13, + "learning_rate": 1.3929824561403508e-05, + "loss": 0.0002, + "step": 9657 + }, + { + "epoch": 144.15, + "learning_rate": 1.3894736842105263e-05, + "loss": 0.0001, + "step": 9658 + }, + { + "epoch": 144.16, + "learning_rate": 1.3859649122807017e-05, + "loss": 0.0002, + "step": 9659 + }, + { + "epoch": 144.18, + "learning_rate": 1.382456140350877e-05, + "loss": 0.0002, + "step": 9660 + }, + { + "epoch": 144.19, + "learning_rate": 1.3789473684210526e-05, + "loss": 0.0002, + "step": 9661 + }, + { + "epoch": 144.21, + "learning_rate": 1.375438596491228e-05, + "loss": 0.0002, + "step": 9662 + }, + { + "epoch": 144.22, + "learning_rate": 1.3719298245614034e-05, + "loss": 0.0002, + "step": 9663 + }, + { + "epoch": 144.24, + "learning_rate": 1.368421052631579e-05, + "loss": 0.0002, + "step": 9664 + }, + { + "epoch": 144.25, + "learning_rate": 1.3649122807017543e-05, + "loss": 0.0002, + "step": 9665 + }, + { + "epoch": 144.27, + "learning_rate": 1.3614035087719297e-05, + "loss": 0.0002, + "step": 9666 + }, + { + "epoch": 144.28, + "learning_rate": 1.3578947368421053e-05, + "loss": 0.0002, + "step": 9667 + }, + { + "epoch": 144.3, + "learning_rate": 1.3543859649122807e-05, + "loss": 0.0003, + "step": 9668 + }, + { + "epoch": 144.31, + "learning_rate": 1.350877192982456e-05, + "loss": 0.0002, + "step": 9669 + }, + { + "epoch": 144.33, + "learning_rate": 1.3473684210526313e-05, + "loss": 0.0004, + "step": 9670 + }, + { + "epoch": 144.34, + "learning_rate": 1.343859649122807e-05, + "loss": 0.0002, + "step": 9671 + }, + { + "epoch": 144.36, + "learning_rate": 1.3403508771929824e-05, + "loss": 0.0098, + "step": 9672 + }, + { + "epoch": 144.37, + "learning_rate": 1.3368421052631576e-05, + "loss": 0.0002, + "step": 9673 + }, + { + "epoch": 144.39, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.0002, + "step": 9674 + }, + { + "epoch": 144.4, + "learning_rate": 1.3298245614035087e-05, + "loss": 0.0002, + "step": 9675 + }, + { + "epoch": 144.42, + "learning_rate": 1.326315789473684e-05, + "loss": 0.0001, + "step": 9676 + }, + { + "epoch": 144.43, + "learning_rate": 1.3228070175438597e-05, + "loss": 0.0001, + "step": 9677 + }, + { + "epoch": 144.45, + "learning_rate": 1.319298245614035e-05, + "loss": 0.0002, + "step": 9678 + }, + { + "epoch": 144.46, + "learning_rate": 1.3157894736842103e-05, + "loss": 0.0068, + "step": 9679 + }, + { + "epoch": 144.48, + "learning_rate": 1.312280701754386e-05, + "loss": 0.0002, + "step": 9680 + }, + { + "epoch": 144.49, + "learning_rate": 1.3087719298245614e-05, + "loss": 0.0005, + "step": 9681 + }, + { + "epoch": 144.51, + "learning_rate": 1.3052631578947366e-05, + "loss": 0.0002, + "step": 9682 + }, + { + "epoch": 144.52, + "learning_rate": 1.3017543859649123e-05, + "loss": 0.0002, + "step": 9683 + }, + { + "epoch": 144.54, + "learning_rate": 1.2982456140350877e-05, + "loss": 0.0002, + "step": 9684 + }, + { + "epoch": 144.55, + "learning_rate": 1.2947368421052629e-05, + "loss": 0.049, + "step": 9685 + }, + { + "epoch": 144.57, + "learning_rate": 1.2912280701754386e-05, + "loss": 0.0002, + "step": 9686 + }, + { + "epoch": 144.58, + "learning_rate": 1.287719298245614e-05, + "loss": 0.3249, + "step": 9687 + }, + { + "epoch": 144.59, + "learning_rate": 1.2842105263157892e-05, + "loss": 0.0002, + "step": 9688 + }, + { + "epoch": 144.61, + "learning_rate": 1.280701754385965e-05, + "loss": 0.0002, + "step": 9689 + }, + { + "epoch": 144.62, + "learning_rate": 1.2771929824561404e-05, + "loss": 0.0002, + "step": 9690 + }, + { + "epoch": 144.64, + "learning_rate": 1.2736842105263156e-05, + "loss": 0.0002, + "step": 9691 + }, + { + "epoch": 144.65, + "learning_rate": 1.270175438596491e-05, + "loss": 0.0001, + "step": 9692 + }, + { + "epoch": 144.67, + "learning_rate": 1.2666666666666665e-05, + "loss": 0.0002, + "step": 9693 + }, + { + "epoch": 144.68, + "learning_rate": 1.2631578947368419e-05, + "loss": 0.0002, + "step": 9694 + }, + { + "epoch": 144.7, + "learning_rate": 1.2596491228070173e-05, + "loss": 0.0002, + "step": 9695 + }, + { + "epoch": 144.71, + "learning_rate": 1.2561403508771928e-05, + "loss": 0.0002, + "step": 9696 + }, + { + "epoch": 144.73, + "learning_rate": 1.2526315789473682e-05, + "loss": 0.0002, + "step": 9697 + }, + { + "epoch": 144.74, + "learning_rate": 1.2491228070175436e-05, + "loss": 0.0001, + "step": 9698 + }, + { + "epoch": 144.76, + "learning_rate": 1.2456140350877192e-05, + "loss": 0.0012, + "step": 9699 + }, + { + "epoch": 144.77, + "learning_rate": 1.2421052631578946e-05, + "loss": 0.0013, + "step": 9700 + }, + { + "epoch": 144.79, + "learning_rate": 1.23859649122807e-05, + "loss": 0.0001, + "step": 9701 + }, + { + "epoch": 144.8, + "learning_rate": 1.2350877192982455e-05, + "loss": 0.0005, + "step": 9702 + }, + { + "epoch": 144.82, + "learning_rate": 1.2315789473684209e-05, + "loss": 0.0002, + "step": 9703 + }, + { + "epoch": 144.83, + "learning_rate": 1.2280701754385963e-05, + "loss": 0.0002, + "step": 9704 + }, + { + "epoch": 144.85, + "learning_rate": 1.2245614035087718e-05, + "loss": 0.0002, + "step": 9705 + }, + { + "epoch": 144.86, + "learning_rate": 1.2210526315789472e-05, + "loss": 0.0003, + "step": 9706 + }, + { + "epoch": 144.88, + "learning_rate": 1.2175438596491226e-05, + "loss": 0.0002, + "step": 9707 + }, + { + "epoch": 144.89, + "learning_rate": 1.2140350877192981e-05, + "loss": 0.0002, + "step": 9708 + }, + { + "epoch": 144.91, + "learning_rate": 1.2105263157894735e-05, + "loss": 0.0002, + "step": 9709 + }, + { + "epoch": 144.92, + "learning_rate": 1.207017543859649e-05, + "loss": 0.0002, + "step": 9710 + }, + { + "epoch": 144.94, + "learning_rate": 1.2035087719298245e-05, + "loss": 0.0002, + "step": 9711 + }, + { + "epoch": 144.95, + "learning_rate": 1.1999999999999999e-05, + "loss": 0.0002, + "step": 9712 + }, + { + "epoch": 144.97, + "learning_rate": 1.1964912280701752e-05, + "loss": 0.0002, + "step": 9713 + }, + { + "epoch": 144.98, + "learning_rate": 1.1929824561403508e-05, + "loss": 0.0002, + "step": 9714 + }, + { + "epoch": 145.0, + "learning_rate": 1.1894736842105262e-05, + "loss": 0.0002, + "step": 9715 + }, + { + "epoch": 145.01, + "learning_rate": 1.1859649122807016e-05, + "loss": 0.0002, + "step": 9716 + }, + { + "epoch": 145.03, + "learning_rate": 1.182456140350877e-05, + "loss": 0.0542, + "step": 9717 + }, + { + "epoch": 145.04, + "learning_rate": 1.1789473684210525e-05, + "loss": 0.0002, + "step": 9718 + }, + { + "epoch": 145.06, + "learning_rate": 1.1754385964912279e-05, + "loss": 0.0002, + "step": 9719 + }, + { + "epoch": 145.07, + "learning_rate": 1.1719298245614033e-05, + "loss": 0.0002, + "step": 9720 + }, + { + "epoch": 145.09, + "learning_rate": 1.1684210526315788e-05, + "loss": 0.0002, + "step": 9721 + }, + { + "epoch": 145.1, + "learning_rate": 1.1649122807017542e-05, + "loss": 0.0002, + "step": 9722 + }, + { + "epoch": 145.12, + "learning_rate": 1.1614035087719296e-05, + "loss": 0.0002, + "step": 9723 + }, + { + "epoch": 145.13, + "learning_rate": 1.1578947368421052e-05, + "loss": 0.0002, + "step": 9724 + }, + { + "epoch": 145.15, + "learning_rate": 1.1543859649122806e-05, + "loss": 0.0002, + "step": 9725 + }, + { + "epoch": 145.16, + "learning_rate": 1.150877192982456e-05, + "loss": 0.0002, + "step": 9726 + }, + { + "epoch": 145.18, + "learning_rate": 1.1473684210526315e-05, + "loss": 0.0002, + "step": 9727 + }, + { + "epoch": 145.19, + "learning_rate": 1.1438596491228069e-05, + "loss": 0.0002, + "step": 9728 + }, + { + "epoch": 145.21, + "learning_rate": 1.1403508771929823e-05, + "loss": 0.0002, + "step": 9729 + }, + { + "epoch": 145.22, + "learning_rate": 1.1368421052631578e-05, + "loss": 0.0002, + "step": 9730 + }, + { + "epoch": 145.24, + "learning_rate": 1.1333333333333332e-05, + "loss": 0.0002, + "step": 9731 + }, + { + "epoch": 145.25, + "learning_rate": 1.1298245614035086e-05, + "loss": 0.0002, + "step": 9732 + }, + { + "epoch": 145.27, + "learning_rate": 1.1263157894736842e-05, + "loss": 0.0004, + "step": 9733 + }, + { + "epoch": 145.28, + "learning_rate": 1.1228070175438595e-05, + "loss": 0.0002, + "step": 9734 + }, + { + "epoch": 145.3, + "learning_rate": 1.119298245614035e-05, + "loss": 0.0002, + "step": 9735 + }, + { + "epoch": 145.31, + "learning_rate": 1.1157894736842105e-05, + "loss": 0.0009, + "step": 9736 + }, + { + "epoch": 145.33, + "learning_rate": 1.1122807017543859e-05, + "loss": 0.0002, + "step": 9737 + }, + { + "epoch": 145.34, + "learning_rate": 1.1087719298245613e-05, + "loss": 0.0002, + "step": 9738 + }, + { + "epoch": 145.36, + "learning_rate": 1.1052631578947366e-05, + "loss": 0.0002, + "step": 9739 + }, + { + "epoch": 145.37, + "learning_rate": 1.1017543859649122e-05, + "loss": 0.0002, + "step": 9740 + }, + { + "epoch": 145.39, + "learning_rate": 1.0982456140350876e-05, + "loss": 0.0002, + "step": 9741 + }, + { + "epoch": 145.4, + "learning_rate": 1.094736842105263e-05, + "loss": 0.0002, + "step": 9742 + }, + { + "epoch": 145.42, + "learning_rate": 1.0912280701754385e-05, + "loss": 0.0002, + "step": 9743 + }, + { + "epoch": 145.43, + "learning_rate": 1.0877192982456139e-05, + "loss": 0.0002, + "step": 9744 + }, + { + "epoch": 145.45, + "learning_rate": 1.0842105263157893e-05, + "loss": 0.0002, + "step": 9745 + }, + { + "epoch": 145.46, + "learning_rate": 1.0807017543859648e-05, + "loss": 0.0002, + "step": 9746 + }, + { + "epoch": 145.48, + "learning_rate": 1.0771929824561402e-05, + "loss": 0.0002, + "step": 9747 + }, + { + "epoch": 145.49, + "learning_rate": 1.0736842105263156e-05, + "loss": 0.0002, + "step": 9748 + }, + { + "epoch": 145.51, + "learning_rate": 1.0701754385964912e-05, + "loss": 0.0002, + "step": 9749 + }, + { + "epoch": 145.52, + "learning_rate": 1.0666666666666666e-05, + "loss": 0.0006, + "step": 9750 + }, + { + "epoch": 145.54, + "learning_rate": 1.063157894736842e-05, + "loss": 0.0002, + "step": 9751 + }, + { + "epoch": 145.55, + "learning_rate": 1.0596491228070175e-05, + "loss": 0.0002, + "step": 9752 + }, + { + "epoch": 145.57, + "learning_rate": 1.0561403508771929e-05, + "loss": 0.0002, + "step": 9753 + }, + { + "epoch": 145.58, + "learning_rate": 1.0526315789473683e-05, + "loss": 0.0002, + "step": 9754 + }, + { + "epoch": 145.59, + "learning_rate": 1.0491228070175438e-05, + "loss": 0.0002, + "step": 9755 + }, + { + "epoch": 145.61, + "learning_rate": 1.0456140350877192e-05, + "loss": 0.0002, + "step": 9756 + }, + { + "epoch": 145.62, + "learning_rate": 1.0421052631578946e-05, + "loss": 0.0002, + "step": 9757 + }, + { + "epoch": 145.64, + "learning_rate": 1.0385964912280702e-05, + "loss": 0.0002, + "step": 9758 + }, + { + "epoch": 145.65, + "learning_rate": 1.0350877192982455e-05, + "loss": 0.0002, + "step": 9759 + }, + { + "epoch": 145.67, + "learning_rate": 1.031578947368421e-05, + "loss": 0.0001, + "step": 9760 + }, + { + "epoch": 145.68, + "learning_rate": 1.0280701754385965e-05, + "loss": 0.0002, + "step": 9761 + }, + { + "epoch": 145.7, + "learning_rate": 1.0245614035087719e-05, + "loss": 0.0004, + "step": 9762 + }, + { + "epoch": 145.71, + "learning_rate": 1.0210526315789473e-05, + "loss": 0.0005, + "step": 9763 + }, + { + "epoch": 145.73, + "learning_rate": 1.0175438596491226e-05, + "loss": 0.0002, + "step": 9764 + }, + { + "epoch": 145.74, + "learning_rate": 1.0140350877192982e-05, + "loss": 0.0002, + "step": 9765 + }, + { + "epoch": 145.76, + "learning_rate": 1.0105263157894736e-05, + "loss": 0.0002, + "step": 9766 + }, + { + "epoch": 145.77, + "learning_rate": 1.007017543859649e-05, + "loss": 0.0002, + "step": 9767 + }, + { + "epoch": 145.79, + "learning_rate": 1.0035087719298245e-05, + "loss": 0.0001, + "step": 9768 + }, + { + "epoch": 145.8, + "learning_rate": 9.999999999999999e-06, + "loss": 0.0002, + "step": 9769 + }, + { + "epoch": 145.82, + "learning_rate": 9.964912280701753e-06, + "loss": 0.0003, + "step": 9770 + }, + { + "epoch": 145.83, + "learning_rate": 9.929824561403509e-06, + "loss": 0.0002, + "step": 9771 + }, + { + "epoch": 145.85, + "learning_rate": 9.894736842105262e-06, + "loss": 0.0002, + "step": 9772 + }, + { + "epoch": 145.86, + "learning_rate": 9.859649122807016e-06, + "loss": 0.0009, + "step": 9773 + }, + { + "epoch": 145.88, + "learning_rate": 9.824561403508772e-06, + "loss": 0.0002, + "step": 9774 + }, + { + "epoch": 145.89, + "learning_rate": 9.789473684210526e-06, + "loss": 0.0002, + "step": 9775 + }, + { + "epoch": 145.91, + "learning_rate": 9.75438596491228e-06, + "loss": 0.0802, + "step": 9776 + }, + { + "epoch": 145.92, + "learning_rate": 9.719298245614035e-06, + "loss": 0.029, + "step": 9777 + }, + { + "epoch": 145.94, + "learning_rate": 9.684210526315789e-06, + "loss": 0.0002, + "step": 9778 + }, + { + "epoch": 145.95, + "learning_rate": 9.649122807017543e-06, + "loss": 0.0002, + "step": 9779 + }, + { + "epoch": 145.97, + "learning_rate": 9.614035087719298e-06, + "loss": 0.0002, + "step": 9780 + }, + { + "epoch": 145.98, + "learning_rate": 9.578947368421052e-06, + "loss": 0.0001, + "step": 9781 + }, + { + "epoch": 146.0, + "learning_rate": 9.543859649122806e-06, + "loss": 0.0002, + "step": 9782 + }, + { + "epoch": 146.01, + "learning_rate": 9.508771929824562e-06, + "loss": 0.0002, + "step": 9783 + }, + { + "epoch": 146.03, + "learning_rate": 9.473684210526315e-06, + "loss": 0.0002, + "step": 9784 + }, + { + "epoch": 146.04, + "learning_rate": 9.43859649122807e-06, + "loss": 0.0003, + "step": 9785 + }, + { + "epoch": 146.06, + "learning_rate": 9.403508771929823e-06, + "loss": 0.0002, + "step": 9786 + }, + { + "epoch": 146.07, + "learning_rate": 9.368421052631579e-06, + "loss": 0.0002, + "step": 9787 + }, + { + "epoch": 146.09, + "learning_rate": 9.333333333333333e-06, + "loss": 0.0002, + "step": 9788 + }, + { + "epoch": 146.1, + "learning_rate": 9.298245614035086e-06, + "loss": 0.0002, + "step": 9789 + }, + { + "epoch": 146.12, + "learning_rate": 9.263157894736842e-06, + "loss": 0.0002, + "step": 9790 + }, + { + "epoch": 146.13, + "learning_rate": 9.228070175438596e-06, + "loss": 0.0002, + "step": 9791 + }, + { + "epoch": 146.15, + "learning_rate": 9.19298245614035e-06, + "loss": 0.0002, + "step": 9792 + }, + { + "epoch": 146.16, + "learning_rate": 9.157894736842104e-06, + "loss": 0.0002, + "step": 9793 + }, + { + "epoch": 146.18, + "learning_rate": 9.12280701754386e-06, + "loss": 0.0002, + "step": 9794 + }, + { + "epoch": 146.19, + "learning_rate": 9.087719298245613e-06, + "loss": 0.0002, + "step": 9795 + }, + { + "epoch": 146.21, + "learning_rate": 9.052631578947367e-06, + "loss": 0.0002, + "step": 9796 + }, + { + "epoch": 146.22, + "learning_rate": 9.017543859649122e-06, + "loss": 0.0004, + "step": 9797 + }, + { + "epoch": 146.24, + "learning_rate": 8.982456140350876e-06, + "loss": 0.0002, + "step": 9798 + }, + { + "epoch": 146.25, + "learning_rate": 8.94736842105263e-06, + "loss": 0.0001, + "step": 9799 + }, + { + "epoch": 146.27, + "learning_rate": 8.912280701754386e-06, + "loss": 0.0002, + "step": 9800 + }, + { + "epoch": 146.27, + "eval_accuracy": 0.8913362701908958, + "eval_f1": 0.8909876245610147, + "eval_loss": 0.6056233644485474, + "eval_runtime": 345.3935, + "eval_samples_per_second": 11.83, + "eval_steps_per_second": 0.741, + "step": 9800 + }, + { + "epoch": 146.28, + "learning_rate": 8.87719298245614e-06, + "loss": 0.0002, + "step": 9801 + }, + { + "epoch": 146.3, + "learning_rate": 8.842105263157893e-06, + "loss": 0.0002, + "step": 9802 + }, + { + "epoch": 146.31, + "learning_rate": 8.807017543859649e-06, + "loss": 0.0002, + "step": 9803 + }, + { + "epoch": 146.33, + "learning_rate": 8.771929824561403e-06, + "loss": 0.0002, + "step": 9804 + }, + { + "epoch": 146.34, + "learning_rate": 8.736842105263157e-06, + "loss": 0.0002, + "step": 9805 + }, + { + "epoch": 146.36, + "learning_rate": 8.701754385964912e-06, + "loss": 0.0696, + "step": 9806 + }, + { + "epoch": 146.37, + "learning_rate": 8.666666666666666e-06, + "loss": 0.0002, + "step": 9807 + }, + { + "epoch": 146.39, + "learning_rate": 8.63157894736842e-06, + "loss": 0.0002, + "step": 9808 + }, + { + "epoch": 146.4, + "learning_rate": 8.596491228070176e-06, + "loss": 0.0002, + "step": 9809 + }, + { + "epoch": 146.42, + "learning_rate": 8.56140350877193e-06, + "loss": 0.0002, + "step": 9810 + }, + { + "epoch": 146.43, + "learning_rate": 8.526315789473683e-06, + "loss": 0.0002, + "step": 9811 + }, + { + "epoch": 146.45, + "learning_rate": 8.491228070175439e-06, + "loss": 0.0002, + "step": 9812 + }, + { + "epoch": 146.46, + "learning_rate": 8.456140350877193e-06, + "loss": 0.0002, + "step": 9813 + }, + { + "epoch": 146.48, + "learning_rate": 8.421052631578947e-06, + "loss": 0.0002, + "step": 9814 + }, + { + "epoch": 146.49, + "learning_rate": 8.3859649122807e-06, + "loss": 0.1118, + "step": 9815 + }, + { + "epoch": 146.51, + "learning_rate": 8.350877192982456e-06, + "loss": 0.0002, + "step": 9816 + }, + { + "epoch": 146.52, + "learning_rate": 8.31578947368421e-06, + "loss": 0.0014, + "step": 9817 + }, + { + "epoch": 146.54, + "learning_rate": 8.280701754385964e-06, + "loss": 0.0001, + "step": 9818 + }, + { + "epoch": 146.55, + "learning_rate": 8.24561403508772e-06, + "loss": 0.0002, + "step": 9819 + }, + { + "epoch": 146.57, + "learning_rate": 8.210526315789473e-06, + "loss": 0.0002, + "step": 9820 + }, + { + "epoch": 146.58, + "learning_rate": 8.175438596491227e-06, + "loss": 0.0005, + "step": 9821 + }, + { + "epoch": 146.59, + "learning_rate": 8.140350877192983e-06, + "loss": 0.0003, + "step": 9822 + }, + { + "epoch": 146.61, + "learning_rate": 8.105263157894736e-06, + "loss": 0.0002, + "step": 9823 + }, + { + "epoch": 146.62, + "learning_rate": 8.07017543859649e-06, + "loss": 0.0002, + "step": 9824 + }, + { + "epoch": 146.64, + "learning_rate": 8.035087719298246e-06, + "loss": 0.0002, + "step": 9825 + }, + { + "epoch": 146.65, + "learning_rate": 8e-06, + "loss": 0.0002, + "step": 9826 + }, + { + "epoch": 146.67, + "learning_rate": 7.964912280701753e-06, + "loss": 0.0012, + "step": 9827 + }, + { + "epoch": 146.68, + "learning_rate": 7.929824561403509e-06, + "loss": 0.0002, + "step": 9828 + }, + { + "epoch": 146.7, + "learning_rate": 7.894736842105261e-06, + "loss": 0.0003, + "step": 9829 + }, + { + "epoch": 146.71, + "learning_rate": 7.859649122807017e-06, + "loss": 0.0002, + "step": 9830 + }, + { + "epoch": 146.73, + "learning_rate": 7.824561403508772e-06, + "loss": 0.0002, + "step": 9831 + }, + { + "epoch": 146.74, + "learning_rate": 7.789473684210524e-06, + "loss": 0.0002, + "step": 9832 + }, + { + "epoch": 146.76, + "learning_rate": 7.75438596491228e-06, + "loss": 0.0002, + "step": 9833 + }, + { + "epoch": 146.77, + "learning_rate": 7.719298245614036e-06, + "loss": 0.0002, + "step": 9834 + }, + { + "epoch": 146.79, + "learning_rate": 7.684210526315788e-06, + "loss": 0.0002, + "step": 9835 + }, + { + "epoch": 146.8, + "learning_rate": 7.649122807017543e-06, + "loss": 0.0002, + "step": 9836 + }, + { + "epoch": 146.82, + "learning_rate": 7.614035087719298e-06, + "loss": 0.0002, + "step": 9837 + }, + { + "epoch": 146.83, + "learning_rate": 7.578947368421052e-06, + "loss": 0.0004, + "step": 9838 + }, + { + "epoch": 146.85, + "learning_rate": 7.543859649122807e-06, + "loss": 0.0002, + "step": 9839 + }, + { + "epoch": 146.86, + "learning_rate": 7.5087719298245605e-06, + "loss": 0.0002, + "step": 9840 + }, + { + "epoch": 146.88, + "learning_rate": 7.473684210526315e-06, + "loss": 0.0002, + "step": 9841 + }, + { + "epoch": 146.89, + "learning_rate": 7.43859649122807e-06, + "loss": 0.0002, + "step": 9842 + }, + { + "epoch": 146.91, + "learning_rate": 7.403508771929824e-06, + "loss": 0.0001, + "step": 9843 + }, + { + "epoch": 146.92, + "learning_rate": 7.3684210526315784e-06, + "loss": 0.0001, + "step": 9844 + }, + { + "epoch": 146.94, + "learning_rate": 7.333333333333333e-06, + "loss": 0.0002, + "step": 9845 + }, + { + "epoch": 146.95, + "learning_rate": 7.298245614035087e-06, + "loss": 0.0004, + "step": 9846 + }, + { + "epoch": 146.97, + "learning_rate": 7.263157894736842e-06, + "loss": 0.0003, + "step": 9847 + }, + { + "epoch": 146.98, + "learning_rate": 7.228070175438596e-06, + "loss": 0.0001, + "step": 9848 + }, + { + "epoch": 147.0, + "learning_rate": 7.19298245614035e-06, + "loss": 0.0002, + "step": 9849 + }, + { + "epoch": 147.01, + "learning_rate": 7.157894736842105e-06, + "loss": 0.0002, + "step": 9850 + }, + { + "epoch": 147.03, + "learning_rate": 7.122807017543859e-06, + "loss": 0.0002, + "step": 9851 + }, + { + "epoch": 147.04, + "learning_rate": 7.0877192982456136e-06, + "loss": 0.0002, + "step": 9852 + }, + { + "epoch": 147.06, + "learning_rate": 7.052631578947368e-06, + "loss": 0.0002, + "step": 9853 + }, + { + "epoch": 147.07, + "learning_rate": 7.017543859649122e-06, + "loss": 0.0002, + "step": 9854 + }, + { + "epoch": 147.09, + "learning_rate": 6.982456140350877e-06, + "loss": 0.0002, + "step": 9855 + }, + { + "epoch": 147.1, + "learning_rate": 6.9473684210526315e-06, + "loss": 0.0002, + "step": 9856 + }, + { + "epoch": 147.12, + "learning_rate": 6.912280701754385e-06, + "loss": 0.0002, + "step": 9857 + }, + { + "epoch": 147.13, + "learning_rate": 6.87719298245614e-06, + "loss": 0.0002, + "step": 9858 + }, + { + "epoch": 147.15, + "learning_rate": 6.842105263157895e-06, + "loss": 0.0002, + "step": 9859 + }, + { + "epoch": 147.16, + "learning_rate": 6.807017543859649e-06, + "loss": 0.0002, + "step": 9860 + }, + { + "epoch": 147.18, + "learning_rate": 6.771929824561403e-06, + "loss": 0.0002, + "step": 9861 + }, + { + "epoch": 147.19, + "learning_rate": 6.736842105263156e-06, + "loss": 0.0002, + "step": 9862 + }, + { + "epoch": 147.21, + "learning_rate": 6.701754385964912e-06, + "loss": 0.0002, + "step": 9863 + }, + { + "epoch": 147.22, + "learning_rate": 6.666666666666667e-06, + "loss": 0.0002, + "step": 9864 + }, + { + "epoch": 147.24, + "learning_rate": 6.63157894736842e-06, + "loss": 0.0004, + "step": 9865 + }, + { + "epoch": 147.25, + "learning_rate": 6.596491228070175e-06, + "loss": 0.0002, + "step": 9866 + }, + { + "epoch": 147.27, + "learning_rate": 6.56140350877193e-06, + "loss": 0.0002, + "step": 9867 + }, + { + "epoch": 147.28, + "learning_rate": 6.526315789473683e-06, + "loss": 0.0002, + "step": 9868 + }, + { + "epoch": 147.3, + "learning_rate": 6.4912280701754385e-06, + "loss": 0.0001, + "step": 9869 + }, + { + "epoch": 147.31, + "learning_rate": 6.456140350877193e-06, + "loss": 0.0002, + "step": 9870 + }, + { + "epoch": 147.33, + "learning_rate": 6.421052631578946e-06, + "loss": 0.0002, + "step": 9871 + }, + { + "epoch": 147.34, + "learning_rate": 6.385964912280702e-06, + "loss": 0.0004, + "step": 9872 + }, + { + "epoch": 147.36, + "learning_rate": 6.350877192982455e-06, + "loss": 0.0002, + "step": 9873 + }, + { + "epoch": 147.37, + "learning_rate": 6.3157894736842095e-06, + "loss": 0.0002, + "step": 9874 + }, + { + "epoch": 147.39, + "learning_rate": 6.280701754385964e-06, + "loss": 0.0002, + "step": 9875 + }, + { + "epoch": 147.4, + "learning_rate": 6.245614035087718e-06, + "loss": 0.0002, + "step": 9876 + }, + { + "epoch": 147.42, + "learning_rate": 6.210526315789473e-06, + "loss": 0.0002, + "step": 9877 + }, + { + "epoch": 147.43, + "learning_rate": 6.1754385964912275e-06, + "loss": 0.0002, + "step": 9878 + }, + { + "epoch": 147.45, + "learning_rate": 6.140350877192981e-06, + "loss": 0.0002, + "step": 9879 + }, + { + "epoch": 147.46, + "learning_rate": 6.105263157894736e-06, + "loss": 0.0002, + "step": 9880 + }, + { + "epoch": 147.48, + "learning_rate": 6.070175438596491e-06, + "loss": 0.0002, + "step": 9881 + }, + { + "epoch": 147.49, + "learning_rate": 6.035087719298245e-06, + "loss": 0.0002, + "step": 9882 + }, + { + "epoch": 147.51, + "learning_rate": 5.999999999999999e-06, + "loss": 0.0217, + "step": 9883 + }, + { + "epoch": 147.52, + "learning_rate": 5.964912280701754e-06, + "loss": 0.0003, + "step": 9884 + }, + { + "epoch": 147.54, + "learning_rate": 5.929824561403508e-06, + "loss": 0.0002, + "step": 9885 + }, + { + "epoch": 147.55, + "learning_rate": 5.894736842105263e-06, + "loss": 0.0002, + "step": 9886 + }, + { + "epoch": 147.57, + "learning_rate": 5.8596491228070164e-06, + "loss": 0.0001, + "step": 9887 + }, + { + "epoch": 147.58, + "learning_rate": 5.824561403508771e-06, + "loss": 0.0003, + "step": 9888 + }, + { + "epoch": 147.59, + "learning_rate": 5.789473684210526e-06, + "loss": 0.0002, + "step": 9889 + }, + { + "epoch": 147.61, + "learning_rate": 5.75438596491228e-06, + "loss": 0.0005, + "step": 9890 + }, + { + "epoch": 147.62, + "learning_rate": 5.719298245614034e-06, + "loss": 0.0002, + "step": 9891 + }, + { + "epoch": 147.64, + "learning_rate": 5.684210526315789e-06, + "loss": 0.0004, + "step": 9892 + }, + { + "epoch": 147.65, + "learning_rate": 5.649122807017543e-06, + "loss": 0.0002, + "step": 9893 + }, + { + "epoch": 147.67, + "learning_rate": 5.614035087719298e-06, + "loss": 0.0001, + "step": 9894 + }, + { + "epoch": 147.68, + "learning_rate": 5.578947368421052e-06, + "loss": 0.0002, + "step": 9895 + }, + { + "epoch": 147.7, + "learning_rate": 5.543859649122806e-06, + "loss": 0.0002, + "step": 9896 + }, + { + "epoch": 147.71, + "learning_rate": 5.508771929824561e-06, + "loss": 0.0002, + "step": 9897 + }, + { + "epoch": 147.73, + "learning_rate": 5.473684210526315e-06, + "loss": 0.0001, + "step": 9898 + }, + { + "epoch": 147.74, + "learning_rate": 5.4385964912280695e-06, + "loss": 0.0002, + "step": 9899 + }, + { + "epoch": 147.76, + "learning_rate": 5.403508771929824e-06, + "loss": 0.0002, + "step": 9900 + }, + { + "epoch": 147.77, + "learning_rate": 5.368421052631578e-06, + "loss": 0.0004, + "step": 9901 + }, + { + "epoch": 147.79, + "learning_rate": 5.333333333333333e-06, + "loss": 0.0039, + "step": 9902 + }, + { + "epoch": 147.8, + "learning_rate": 5.2982456140350875e-06, + "loss": 0.0002, + "step": 9903 + }, + { + "epoch": 147.82, + "learning_rate": 5.263157894736841e-06, + "loss": 0.0002, + "step": 9904 + }, + { + "epoch": 147.83, + "learning_rate": 5.228070175438596e-06, + "loss": 0.0002, + "step": 9905 + }, + { + "epoch": 147.85, + "learning_rate": 5.192982456140351e-06, + "loss": 0.0002, + "step": 9906 + }, + { + "epoch": 147.86, + "learning_rate": 5.157894736842105e-06, + "loss": 0.0001, + "step": 9907 + }, + { + "epoch": 147.88, + "learning_rate": 5.122807017543859e-06, + "loss": 0.0092, + "step": 9908 + }, + { + "epoch": 147.89, + "learning_rate": 5.087719298245613e-06, + "loss": 0.0002, + "step": 9909 + }, + { + "epoch": 147.91, + "learning_rate": 5.052631578947368e-06, + "loss": 0.0002, + "step": 9910 + }, + { + "epoch": 147.92, + "learning_rate": 5.017543859649123e-06, + "loss": 0.0002, + "step": 9911 + }, + { + "epoch": 147.94, + "learning_rate": 4.9824561403508765e-06, + "loss": 0.0002, + "step": 9912 + }, + { + "epoch": 147.95, + "learning_rate": 4.947368421052631e-06, + "loss": 0.0002, + "step": 9913 + }, + { + "epoch": 147.97, + "learning_rate": 4.912280701754386e-06, + "loss": 0.0002, + "step": 9914 + }, + { + "epoch": 147.98, + "learning_rate": 4.87719298245614e-06, + "loss": 0.0002, + "step": 9915 + }, + { + "epoch": 148.0, + "learning_rate": 4.8421052631578945e-06, + "loss": 0.0002, + "step": 9916 + }, + { + "epoch": 148.01, + "learning_rate": 4.807017543859649e-06, + "loss": 0.0004, + "step": 9917 + }, + { + "epoch": 148.03, + "learning_rate": 4.771929824561403e-06, + "loss": 0.0002, + "step": 9918 + }, + { + "epoch": 148.04, + "learning_rate": 4.736842105263158e-06, + "loss": 0.0002, + "step": 9919 + }, + { + "epoch": 148.06, + "learning_rate": 4.701754385964912e-06, + "loss": 0.0002, + "step": 9920 + }, + { + "epoch": 148.07, + "learning_rate": 4.666666666666666e-06, + "loss": 0.0002, + "step": 9921 + }, + { + "epoch": 148.09, + "learning_rate": 4.631578947368421e-06, + "loss": 0.0002, + "step": 9922 + }, + { + "epoch": 148.1, + "learning_rate": 4.596491228070175e-06, + "loss": 0.0002, + "step": 9923 + }, + { + "epoch": 148.12, + "learning_rate": 4.56140350877193e-06, + "loss": 0.0003, + "step": 9924 + }, + { + "epoch": 148.13, + "learning_rate": 4.5263157894736834e-06, + "loss": 0.0001, + "step": 9925 + }, + { + "epoch": 148.15, + "learning_rate": 4.491228070175438e-06, + "loss": 0.0003, + "step": 9926 + }, + { + "epoch": 148.16, + "learning_rate": 4.456140350877193e-06, + "loss": 0.0002, + "step": 9927 + }, + { + "epoch": 148.18, + "learning_rate": 4.421052631578947e-06, + "loss": 0.0002, + "step": 9928 + }, + { + "epoch": 148.19, + "learning_rate": 4.3859649122807014e-06, + "loss": 0.0002, + "step": 9929 + }, + { + "epoch": 148.21, + "learning_rate": 4.350877192982456e-06, + "loss": 0.0006, + "step": 9930 + }, + { + "epoch": 148.22, + "learning_rate": 4.31578947368421e-06, + "loss": 0.0001, + "step": 9931 + }, + { + "epoch": 148.24, + "learning_rate": 4.280701754385965e-06, + "loss": 0.0002, + "step": 9932 + }, + { + "epoch": 148.25, + "learning_rate": 4.280701754385965e-06, + "loss": 0.1554, + "step": 9933 + }, + { + "epoch": 148.27, + "learning_rate": 4.245614035087719e-06, + "loss": 0.0004, + "step": 9934 + }, + { + "epoch": 148.28, + "learning_rate": 4.210526315789473e-06, + "loss": 0.0002, + "step": 9935 + }, + { + "epoch": 148.3, + "learning_rate": 4.175438596491228e-06, + "loss": 0.0001, + "step": 9936 + }, + { + "epoch": 148.31, + "learning_rate": 4.140350877192982e-06, + "loss": 0.0002, + "step": 9937 + }, + { + "epoch": 148.33, + "learning_rate": 4.1052631578947365e-06, + "loss": 0.0002, + "step": 9938 + }, + { + "epoch": 148.34, + "learning_rate": 4.070175438596491e-06, + "loss": 0.0002, + "step": 9939 + }, + { + "epoch": 148.36, + "learning_rate": 4.035087719298245e-06, + "loss": 0.0002, + "step": 9940 + }, + { + "epoch": 148.37, + "learning_rate": 4e-06, + "loss": 0.0003, + "step": 9941 + }, + { + "epoch": 148.39, + "learning_rate": 3.9649122807017545e-06, + "loss": 0.0002, + "step": 9942 + }, + { + "epoch": 148.4, + "learning_rate": 3.929824561403508e-06, + "loss": 0.0002, + "step": 9943 + }, + { + "epoch": 148.42, + "learning_rate": 3.894736842105262e-06, + "loss": 0.0001, + "step": 9944 + }, + { + "epoch": 148.43, + "learning_rate": 3.859649122807018e-06, + "loss": 0.0014, + "step": 9945 + }, + { + "epoch": 148.45, + "learning_rate": 3.824561403508772e-06, + "loss": 0.0005, + "step": 9946 + }, + { + "epoch": 148.46, + "learning_rate": 3.789473684210526e-06, + "loss": 0.0002, + "step": 9947 + }, + { + "epoch": 148.48, + "learning_rate": 3.7543859649122802e-06, + "loss": 0.0002, + "step": 9948 + }, + { + "epoch": 148.49, + "learning_rate": 3.719298245614035e-06, + "loss": 0.0002, + "step": 9949 + }, + { + "epoch": 148.51, + "learning_rate": 3.6842105263157892e-06, + "loss": 0.0002, + "step": 9950 + }, + { + "epoch": 148.52, + "learning_rate": 3.6491228070175435e-06, + "loss": 0.0002, + "step": 9951 + }, + { + "epoch": 148.54, + "learning_rate": 3.614035087719298e-06, + "loss": 0.0001, + "step": 9952 + }, + { + "epoch": 148.55, + "learning_rate": 3.5789473684210525e-06, + "loss": 0.0002, + "step": 9953 + }, + { + "epoch": 148.57, + "learning_rate": 3.5438596491228068e-06, + "loss": 0.003, + "step": 9954 + }, + { + "epoch": 148.58, + "learning_rate": 3.508771929824561e-06, + "loss": 0.0002, + "step": 9955 + }, + { + "epoch": 148.59, + "learning_rate": 3.4736842105263158e-06, + "loss": 0.0002, + "step": 9956 + }, + { + "epoch": 148.61, + "learning_rate": 3.43859649122807e-06, + "loss": 0.0182, + "step": 9957 + }, + { + "epoch": 148.62, + "learning_rate": 3.4035087719298243e-06, + "loss": 0.0002, + "step": 9958 + }, + { + "epoch": 148.64, + "learning_rate": 3.368421052631578e-06, + "loss": 0.0002, + "step": 9959 + }, + { + "epoch": 148.65, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.0002, + "step": 9960 + }, + { + "epoch": 148.67, + "learning_rate": 3.2982456140350876e-06, + "loss": 0.0002, + "step": 9961 + }, + { + "epoch": 148.68, + "learning_rate": 3.2631578947368415e-06, + "loss": 0.0002, + "step": 9962 + }, + { + "epoch": 148.7, + "learning_rate": 3.2280701754385966e-06, + "loss": 0.0002, + "step": 9963 + }, + { + "epoch": 148.71, + "learning_rate": 3.192982456140351e-06, + "loss": 0.0002, + "step": 9964 + }, + { + "epoch": 148.73, + "learning_rate": 3.1578947368421047e-06, + "loss": 0.0002, + "step": 9965 + }, + { + "epoch": 148.74, + "learning_rate": 3.122807017543859e-06, + "loss": 0.0002, + "step": 9966 + }, + { + "epoch": 148.76, + "learning_rate": 3.0877192982456137e-06, + "loss": 0.0002, + "step": 9967 + }, + { + "epoch": 148.77, + "learning_rate": 3.052631578947368e-06, + "loss": 0.0002, + "step": 9968 + }, + { + "epoch": 148.79, + "learning_rate": 3.0175438596491223e-06, + "loss": 0.0002, + "step": 9969 + }, + { + "epoch": 148.8, + "learning_rate": 2.982456140350877e-06, + "loss": 0.0002, + "step": 9970 + }, + { + "epoch": 148.82, + "learning_rate": 2.9473684210526313e-06, + "loss": 0.0002, + "step": 9971 + }, + { + "epoch": 148.83, + "learning_rate": 2.9122807017543856e-06, + "loss": 0.0002, + "step": 9972 + }, + { + "epoch": 148.85, + "learning_rate": 2.87719298245614e-06, + "loss": 0.0002, + "step": 9973 + }, + { + "epoch": 148.86, + "learning_rate": 2.8421052631578946e-06, + "loss": 0.0002, + "step": 9974 + }, + { + "epoch": 148.88, + "learning_rate": 2.807017543859649e-06, + "loss": 0.0002, + "step": 9975 + }, + { + "epoch": 148.89, + "learning_rate": 2.771929824561403e-06, + "loss": 0.0002, + "step": 9976 + }, + { + "epoch": 148.91, + "learning_rate": 2.7368421052631574e-06, + "loss": 0.0001, + "step": 9977 + }, + { + "epoch": 148.92, + "learning_rate": 2.701754385964912e-06, + "loss": 0.0002, + "step": 9978 + }, + { + "epoch": 148.94, + "learning_rate": 2.6666666666666664e-06, + "loss": 0.0019, + "step": 9979 + }, + { + "epoch": 148.95, + "learning_rate": 2.6315789473684207e-06, + "loss": 0.0003, + "step": 9980 + }, + { + "epoch": 148.97, + "learning_rate": 2.5964912280701754e-06, + "loss": 0.0002, + "step": 9981 + }, + { + "epoch": 148.98, + "learning_rate": 2.5614035087719297e-06, + "loss": 0.0002, + "step": 9982 + }, + { + "epoch": 149.0, + "learning_rate": 2.526315789473684e-06, + "loss": 0.0001, + "step": 9983 + }, + { + "epoch": 149.01, + "learning_rate": 2.4912280701754382e-06, + "loss": 0.0002, + "step": 9984 + }, + { + "epoch": 149.03, + "learning_rate": 2.456140350877193e-06, + "loss": 0.0977, + "step": 9985 + }, + { + "epoch": 149.04, + "learning_rate": 2.4210526315789472e-06, + "loss": 0.0002, + "step": 9986 + }, + { + "epoch": 149.06, + "learning_rate": 2.3859649122807015e-06, + "loss": 0.0002, + "step": 9987 + }, + { + "epoch": 149.07, + "learning_rate": 2.350877192982456e-06, + "loss": 0.0002, + "step": 9988 + }, + { + "epoch": 149.09, + "learning_rate": 2.3157894736842105e-06, + "loss": 0.0002, + "step": 9989 + }, + { + "epoch": 149.1, + "learning_rate": 2.280701754385965e-06, + "loss": 0.0002, + "step": 9990 + }, + { + "epoch": 149.12, + "learning_rate": 2.245614035087719e-06, + "loss": 0.0002, + "step": 9991 + }, + { + "epoch": 149.13, + "learning_rate": 2.2105263157894734e-06, + "loss": 0.0002, + "step": 9992 + }, + { + "epoch": 149.15, + "learning_rate": 2.175438596491228e-06, + "loss": 0.0002, + "step": 9993 + }, + { + "epoch": 149.16, + "learning_rate": 2.1403508771929824e-06, + "loss": 0.1007, + "step": 9994 + }, + { + "epoch": 149.18, + "learning_rate": 2.1052631578947366e-06, + "loss": 0.0002, + "step": 9995 + }, + { + "epoch": 149.19, + "learning_rate": 2.070175438596491e-06, + "loss": 0.0002, + "step": 9996 + }, + { + "epoch": 149.21, + "learning_rate": 2.0350877192982456e-06, + "loss": 0.0002, + "step": 9997 + }, + { + "epoch": 149.22, + "learning_rate": 2e-06, + "loss": 0.0002, + "step": 9998 + }, + { + "epoch": 149.24, + "learning_rate": 1.964912280701754e-06, + "loss": 0.0002, + "step": 9999 + }, + { + "epoch": 149.25, + "learning_rate": 1.929824561403509e-06, + "loss": 0.0002, + "step": 10000 + }, + { + "epoch": 149.25, + "eval_accuracy": 0.8920704845814978, + "eval_f1": 0.8917851618940525, + "eval_loss": 0.5978561639785767, + "eval_runtime": 345.0023, + "eval_samples_per_second": 11.843, + "eval_steps_per_second": 0.742, + "step": 10000 + }, + { + "epoch": 149.27, + "learning_rate": 1.894736842105263e-06, + "loss": 0.0002, + "step": 10001 + }, + { + "epoch": 149.28, + "learning_rate": 1.8596491228070175e-06, + "loss": 0.0001, + "step": 10002 + }, + { + "epoch": 149.3, + "learning_rate": 1.8245614035087718e-06, + "loss": 0.0001, + "step": 10003 + }, + { + "epoch": 149.31, + "learning_rate": 1.7894736842105262e-06, + "loss": 0.0002, + "step": 10004 + }, + { + "epoch": 149.33, + "learning_rate": 1.7543859649122805e-06, + "loss": 0.0002, + "step": 10005 + }, + { + "epoch": 149.34, + "learning_rate": 1.719298245614035e-06, + "loss": 0.0002, + "step": 10006 + }, + { + "epoch": 149.36, + "learning_rate": 1.684210526315789e-06, + "loss": 0.0002, + "step": 10007 + }, + { + "epoch": 149.37, + "learning_rate": 1.6491228070175438e-06, + "loss": 0.0002, + "step": 10008 + }, + { + "epoch": 149.39, + "learning_rate": 1.6140350877192983e-06, + "loss": 0.0001, + "step": 10009 + }, + { + "epoch": 149.4, + "learning_rate": 1.5789473684210524e-06, + "loss": 0.0002, + "step": 10010 + }, + { + "epoch": 149.42, + "learning_rate": 1.5438596491228069e-06, + "loss": 0.0003, + "step": 10011 + }, + { + "epoch": 149.43, + "learning_rate": 1.5087719298245611e-06, + "loss": 0.0002, + "step": 10012 + }, + { + "epoch": 149.45, + "learning_rate": 1.4736842105263156e-06, + "loss": 0.0002, + "step": 10013 + }, + { + "epoch": 149.46, + "learning_rate": 1.43859649122807e-06, + "loss": 0.0027, + "step": 10014 + }, + { + "epoch": 149.48, + "learning_rate": 1.4035087719298244e-06, + "loss": 0.0002, + "step": 10015 + }, + { + "epoch": 149.49, + "learning_rate": 1.3684210526315787e-06, + "loss": 0.0001, + "step": 10016 + }, + { + "epoch": 149.51, + "learning_rate": 1.3333333333333332e-06, + "loss": 0.0002, + "step": 10017 + }, + { + "epoch": 149.52, + "learning_rate": 1.2982456140350877e-06, + "loss": 0.0002, + "step": 10018 + }, + { + "epoch": 149.54, + "learning_rate": 1.263157894736842e-06, + "loss": 0.0002, + "step": 10019 + }, + { + "epoch": 149.55, + "learning_rate": 1.2280701754385965e-06, + "loss": 0.0002, + "step": 10020 + }, + { + "epoch": 149.57, + "learning_rate": 1.1929824561403508e-06, + "loss": 0.0002, + "step": 10021 + }, + { + "epoch": 149.58, + "learning_rate": 1.1578947368421053e-06, + "loss": 0.0002, + "step": 10022 + }, + { + "epoch": 149.59, + "learning_rate": 1.1228070175438595e-06, + "loss": 0.0002, + "step": 10023 + }, + { + "epoch": 149.61, + "learning_rate": 1.087719298245614e-06, + "loss": 0.0002, + "step": 10024 + }, + { + "epoch": 149.62, + "learning_rate": 1.0526315789473683e-06, + "loss": 0.0002, + "step": 10025 + }, + { + "epoch": 149.64, + "learning_rate": 1.0175438596491228e-06, + "loss": 0.0002, + "step": 10026 + }, + { + "epoch": 149.65, + "learning_rate": 9.82456140350877e-07, + "loss": 0.0002, + "step": 10027 + }, + { + "epoch": 149.67, + "learning_rate": 9.473684210526315e-07, + "loss": 0.0002, + "step": 10028 + }, + { + "epoch": 149.68, + "learning_rate": 9.122807017543859e-07, + "loss": 0.0003, + "step": 10029 + }, + { + "epoch": 149.7, + "learning_rate": 8.771929824561403e-07, + "loss": 0.0025, + "step": 10030 + }, + { + "epoch": 149.71, + "learning_rate": 8.421052631578945e-07, + "loss": 0.0769, + "step": 10031 + }, + { + "epoch": 149.73, + "learning_rate": 8.070175438596491e-07, + "loss": 0.0002, + "step": 10032 + }, + { + "epoch": 149.74, + "learning_rate": 7.719298245614034e-07, + "loss": 0.0001, + "step": 10033 + }, + { + "epoch": 149.76, + "learning_rate": 7.368421052631578e-07, + "loss": 0.0003, + "step": 10034 + }, + { + "epoch": 149.77, + "learning_rate": 7.017543859649122e-07, + "loss": 0.0002, + "step": 10035 + }, + { + "epoch": 149.79, + "learning_rate": 6.666666666666666e-07, + "loss": 0.0001, + "step": 10036 + }, + { + "epoch": 149.8, + "learning_rate": 6.31578947368421e-07, + "loss": 0.0002, + "step": 10037 + }, + { + "epoch": 149.82, + "learning_rate": 5.964912280701754e-07, + "loss": 0.0002, + "step": 10038 + }, + { + "epoch": 149.83, + "learning_rate": 5.614035087719298e-07, + "loss": 0.0002, + "step": 10039 + }, + { + "epoch": 149.85, + "learning_rate": 5.263157894736842e-07, + "loss": 0.0002, + "step": 10040 + }, + { + "epoch": 149.86, + "learning_rate": 4.912280701754385e-07, + "loss": 0.0002, + "step": 10041 + }, + { + "epoch": 149.88, + "learning_rate": 4.5614035087719294e-07, + "loss": 0.0003, + "step": 10042 + }, + { + "epoch": 149.89, + "learning_rate": 4.210526315789473e-07, + "loss": 0.0002, + "step": 10043 + }, + { + "epoch": 149.91, + "learning_rate": 3.859649122807017e-07, + "loss": 0.0002, + "step": 10044 + }, + { + "epoch": 149.92, + "learning_rate": 3.508771929824561e-07, + "loss": 0.0145, + "step": 10045 + }, + { + "epoch": 149.94, + "learning_rate": 3.157894736842105e-07, + "loss": 0.0002, + "step": 10046 + }, + { + "epoch": 149.95, + "learning_rate": 2.807017543859649e-07, + "loss": 0.2392, + "step": 10047 + }, + { + "epoch": 149.97, + "learning_rate": 2.456140350877193e-07, + "loss": 0.0002, + "step": 10048 + }, + { + "epoch": 149.98, + "learning_rate": 2.1052631578947364e-07, + "loss": 0.0002, + "step": 10049 + }, + { + "epoch": 150.0, + "learning_rate": 1.7543859649122805e-07, + "loss": 0.0001, + "step": 10050 + }, + { + "epoch": 150.0, + "step": 10050, + "total_flos": 1.3732582771233128e+20, + "train_loss": 0.2665309856276014, + "train_runtime": 44149.9371, + "train_samples_per_second": 7.308, + "train_steps_per_second": 0.228 + } + ], + "max_steps": 10050, + "num_train_epochs": 150, + "total_flos": 1.3732582771233128e+20, + "trial_name": null, + "trial_params": null +}