{ "best_metric": 0.6538461538461539, "best_model_checkpoint": "wav2vec2-5Class-train-test-finetune-V7/checkpoint-552", "epoch": 230.76923076923077, "eval_steps": 500, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.92, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6025639772415161, "eval_runtime": 25.1775, "eval_samples_per_second": 11.359, "eval_steps_per_second": 0.119, "step": 3 }, { "epoch": 1.85, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6023768186569214, "eval_runtime": 13.2244, "eval_samples_per_second": 21.627, "eval_steps_per_second": 0.227, "step": 6 }, { "epoch": 2.77, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6020615100860596, "eval_runtime": 19.1459, "eval_samples_per_second": 14.938, "eval_steps_per_second": 0.157, "step": 9 }, { "epoch": 4.0, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6014316082000732, "eval_runtime": 24.6871, "eval_samples_per_second": 11.585, "eval_steps_per_second": 0.122, "step": 13 }, { "epoch": 4.92, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6007977724075317, "eval_runtime": 14.5871, "eval_samples_per_second": 19.606, "eval_steps_per_second": 0.206, "step": 16 }, { "epoch": 5.85, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.600056529045105, "eval_runtime": 15.3427, "eval_samples_per_second": 18.641, "eval_steps_per_second": 0.196, "step": 19 }, { "epoch": 6.77, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.599173903465271, "eval_runtime": 23.1969, "eval_samples_per_second": 12.329, "eval_steps_per_second": 0.129, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.17482517482517482, "eval_loss": 1.5977518558502197, "eval_runtime": 24.249, "eval_samples_per_second": 11.794, "eval_steps_per_second": 0.124, "step": 26 }, { "epoch": 8.92, "eval_accuracy": 0.1888111888111888, "eval_loss": 1.5965358018875122, "eval_runtime": 14.7479, "eval_samples_per_second": 19.393, "eval_steps_per_second": 0.203, "step": 29 }, { "epoch": 9.85, "eval_accuracy": 0.2097902097902098, "eval_loss": 1.5952231884002686, "eval_runtime": 18.4251, "eval_samples_per_second": 15.522, "eval_steps_per_second": 0.163, "step": 32 }, { "epoch": 10.77, "eval_accuracy": 0.22727272727272727, "eval_loss": 1.593780517578125, "eval_runtime": 20.5455, "eval_samples_per_second": 13.92, "eval_steps_per_second": 0.146, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.23426573426573427, "eval_loss": 1.5916367769241333, "eval_runtime": 16.0385, "eval_samples_per_second": 17.832, "eval_steps_per_second": 0.187, "step": 39 }, { "epoch": 12.92, "eval_accuracy": 0.2692307692307692, "eval_loss": 1.5898847579956055, "eval_runtime": 21.1768, "eval_samples_per_second": 13.505, "eval_steps_per_second": 0.142, "step": 42 }, { "epoch": 13.85, "eval_accuracy": 0.2727272727272727, "eval_loss": 1.5879966020584106, "eval_runtime": 13.5248, "eval_samples_per_second": 21.146, "eval_steps_per_second": 0.222, "step": 45 }, { "epoch": 14.77, "eval_accuracy": 0.3076923076923077, "eval_loss": 1.5860111713409424, "eval_runtime": 12.9444, "eval_samples_per_second": 22.094, "eval_steps_per_second": 0.232, "step": 48 }, { "epoch": 16.0, "eval_accuracy": 0.35664335664335667, "eval_loss": 1.583256483078003, "eval_runtime": 25.0161, "eval_samples_per_second": 11.433, "eval_steps_per_second": 0.12, "step": 52 }, { "epoch": 16.92, "eval_accuracy": 0.3881118881118881, "eval_loss": 1.5810612440109253, "eval_runtime": 14.5666, "eval_samples_per_second": 19.634, "eval_steps_per_second": 0.206, "step": 55 }, { "epoch": 17.85, "eval_accuracy": 0.3811188811188811, "eval_loss": 1.578792929649353, "eval_runtime": 13.0625, "eval_samples_per_second": 21.895, "eval_steps_per_second": 0.23, "step": 58 }, { "epoch": 18.77, "eval_accuracy": 0.36713286713286714, "eval_loss": 1.576446294784546, "eval_runtime": 18.6648, "eval_samples_per_second": 15.323, "eval_steps_per_second": 0.161, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.34965034965034963, "eval_loss": 1.5730595588684082, "eval_runtime": 11.5386, "eval_samples_per_second": 24.786, "eval_steps_per_second": 0.26, "step": 65 }, { "epoch": 20.92, "eval_accuracy": 0.32867132867132864, "eval_loss": 1.5702308416366577, "eval_runtime": 15.7041, "eval_samples_per_second": 18.212, "eval_steps_per_second": 0.191, "step": 68 }, { "epoch": 21.85, "eval_accuracy": 0.32517482517482516, "eval_loss": 1.5671954154968262, "eval_runtime": 11.3961, "eval_samples_per_second": 25.096, "eval_steps_per_second": 0.263, "step": 71 }, { "epoch": 22.77, "eval_accuracy": 0.3146853146853147, "eval_loss": 1.5641109943389893, "eval_runtime": 15.1673, "eval_samples_per_second": 18.856, "eval_steps_per_second": 0.198, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.3111888111888112, "eval_loss": 1.5597317218780518, "eval_runtime": 24.7249, "eval_samples_per_second": 11.567, "eval_steps_per_second": 0.121, "step": 78 }, { "epoch": 24.92, "eval_accuracy": 0.3076923076923077, "eval_loss": 1.5563873052597046, "eval_runtime": 13.1713, "eval_samples_per_second": 21.714, "eval_steps_per_second": 0.228, "step": 81 }, { "epoch": 25.85, "eval_accuracy": 0.3041958041958042, "eval_loss": 1.553160548210144, "eval_runtime": 12.194, "eval_samples_per_second": 23.454, "eval_steps_per_second": 0.246, "step": 84 }, { "epoch": 26.77, "eval_accuracy": 0.2937062937062937, "eval_loss": 1.549930214881897, "eval_runtime": 25.3259, "eval_samples_per_second": 11.293, "eval_steps_per_second": 0.118, "step": 87 }, { "epoch": 28.0, "eval_accuracy": 0.2902097902097902, "eval_loss": 1.5454081296920776, "eval_runtime": 10.4422, "eval_samples_per_second": 27.389, "eval_steps_per_second": 0.287, "step": 91 }, { "epoch": 28.92, "eval_accuracy": 0.2867132867132867, "eval_loss": 1.5419245958328247, "eval_runtime": 16.2077, "eval_samples_per_second": 17.646, "eval_steps_per_second": 0.185, "step": 94 }, { "epoch": 29.85, "eval_accuracy": 0.28321678321678323, "eval_loss": 1.5383468866348267, "eval_runtime": 27.5909, "eval_samples_per_second": 10.366, "eval_steps_per_second": 0.109, "step": 97 }, { "epoch": 30.77, "grad_norm": 65491.41796875, "learning_rate": 2.8888888888888888e-05, "loss": 1.5563, "step": 100 }, { "epoch": 30.77, "eval_accuracy": 0.2762237762237762, "eval_loss": 1.5348902940750122, "eval_runtime": 14.2201, "eval_samples_per_second": 20.112, "eval_steps_per_second": 0.211, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.27972027972027974, "eval_loss": 1.5304443836212158, "eval_runtime": 11.5662, "eval_samples_per_second": 24.727, "eval_steps_per_second": 0.259, "step": 104 }, { "epoch": 32.92, "eval_accuracy": 0.2762237762237762, "eval_loss": 1.5273348093032837, "eval_runtime": 10.0608, "eval_samples_per_second": 28.427, "eval_steps_per_second": 0.298, "step": 107 }, { "epoch": 33.85, "eval_accuracy": 0.26573426573426573, "eval_loss": 1.524675965309143, "eval_runtime": 16.3502, "eval_samples_per_second": 17.492, "eval_steps_per_second": 0.183, "step": 110 }, { "epoch": 34.77, "eval_accuracy": 0.2517482517482518, "eval_loss": 1.5223478078842163, "eval_runtime": 11.4239, "eval_samples_per_second": 25.035, "eval_steps_per_second": 0.263, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.24825174825174826, "eval_loss": 1.5194127559661865, "eval_runtime": 12.5757, "eval_samples_per_second": 22.742, "eval_steps_per_second": 0.239, "step": 117 }, { "epoch": 36.92, "eval_accuracy": 0.24125874125874125, "eval_loss": 1.5177949666976929, "eval_runtime": 29.9608, "eval_samples_per_second": 9.546, "eval_steps_per_second": 0.1, "step": 120 }, { "epoch": 37.85, "eval_accuracy": 0.23776223776223776, "eval_loss": 1.5167657136917114, "eval_runtime": 11.3556, "eval_samples_per_second": 25.186, "eval_steps_per_second": 0.264, "step": 123 }, { "epoch": 38.77, "eval_accuracy": 0.24475524475524477, "eval_loss": 1.5161939859390259, "eval_runtime": 8.3481, "eval_samples_per_second": 34.259, "eval_steps_per_second": 0.359, "step": 126 }, { "epoch": 40.0, "eval_accuracy": 0.24475524475524477, "eval_loss": 1.5161994695663452, "eval_runtime": 11.5276, "eval_samples_per_second": 24.81, "eval_steps_per_second": 0.26, "step": 130 }, { "epoch": 40.92, "eval_accuracy": 0.24825174825174826, "eval_loss": 1.5166517496109009, "eval_runtime": 16.6347, "eval_samples_per_second": 17.193, "eval_steps_per_second": 0.18, "step": 133 }, { "epoch": 41.85, "eval_accuracy": 0.24825174825174826, "eval_loss": 1.518052101135254, "eval_runtime": 9.0049, "eval_samples_per_second": 31.76, "eval_steps_per_second": 0.333, "step": 136 }, { "epoch": 42.77, "eval_accuracy": 0.25874125874125875, "eval_loss": 1.520302414894104, "eval_runtime": 9.9294, "eval_samples_per_second": 28.803, "eval_steps_per_second": 0.302, "step": 139 }, { "epoch": 44.0, "eval_accuracy": 0.2692307692307692, "eval_loss": 1.5227454900741577, "eval_runtime": 11.2365, "eval_samples_per_second": 25.453, "eval_steps_per_second": 0.267, "step": 143 }, { "epoch": 44.92, "eval_accuracy": 0.28321678321678323, "eval_loss": 1.5243264436721802, "eval_runtime": 19.5213, "eval_samples_per_second": 14.651, "eval_steps_per_second": 0.154, "step": 146 }, { "epoch": 45.85, "eval_accuracy": 0.27972027972027974, "eval_loss": 1.5238893032073975, "eval_runtime": 10.1208, "eval_samples_per_second": 28.259, "eval_steps_per_second": 0.296, "step": 149 }, { "epoch": 46.77, "eval_accuracy": 0.3006993006993007, "eval_loss": 1.5224095582962036, "eval_runtime": 11.1479, "eval_samples_per_second": 25.655, "eval_steps_per_second": 0.269, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.3076923076923077, "eval_loss": 1.5170344114303589, "eval_runtime": 23.385, "eval_samples_per_second": 12.23, "eval_steps_per_second": 0.128, "step": 156 }, { "epoch": 48.92, "eval_accuracy": 0.32867132867132864, "eval_loss": 1.5102859735488892, "eval_runtime": 13.363, "eval_samples_per_second": 21.402, "eval_steps_per_second": 0.225, "step": 159 }, { "epoch": 49.85, "eval_accuracy": 0.34965034965034963, "eval_loss": 1.5032000541687012, "eval_runtime": 10.5466, "eval_samples_per_second": 27.118, "eval_steps_per_second": 0.284, "step": 162 }, { "epoch": 50.77, "eval_accuracy": 0.36013986013986016, "eval_loss": 1.4958622455596924, "eval_runtime": 13.1319, "eval_samples_per_second": 21.779, "eval_steps_per_second": 0.228, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.36363636363636365, "eval_loss": 1.4857254028320312, "eval_runtime": 11.3199, "eval_samples_per_second": 25.265, "eval_steps_per_second": 0.265, "step": 169 }, { "epoch": 52.92, "eval_accuracy": 0.36713286713286714, "eval_loss": 1.4788074493408203, "eval_runtime": 10.1044, "eval_samples_per_second": 28.304, "eval_steps_per_second": 0.297, "step": 172 }, { "epoch": 53.85, "eval_accuracy": 0.3741258741258741, "eval_loss": 1.4713162183761597, "eval_runtime": 21.314, "eval_samples_per_second": 13.418, "eval_steps_per_second": 0.141, "step": 175 }, { "epoch": 54.77, "eval_accuracy": 0.3811188811188811, "eval_loss": 1.4641575813293457, "eval_runtime": 11.6546, "eval_samples_per_second": 24.54, "eval_steps_per_second": 0.257, "step": 178 }, { "epoch": 56.0, "eval_accuracy": 0.3881118881118881, "eval_loss": 1.455277442932129, "eval_runtime": 11.9334, "eval_samples_per_second": 23.966, "eval_steps_per_second": 0.251, "step": 182 }, { "epoch": 56.92, "eval_accuracy": 0.3986013986013986, "eval_loss": 1.4481216669082642, "eval_runtime": 10.9879, "eval_samples_per_second": 26.029, "eval_steps_per_second": 0.273, "step": 185 }, { "epoch": 57.85, "eval_accuracy": 0.4020979020979021, "eval_loss": 1.4420974254608154, "eval_runtime": 8.8556, "eval_samples_per_second": 32.296, "eval_steps_per_second": 0.339, "step": 188 }, { "epoch": 58.77, "eval_accuracy": 0.4125874125874126, "eval_loss": 1.4357017278671265, "eval_runtime": 9.9507, "eval_samples_per_second": 28.742, "eval_steps_per_second": 0.301, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.4195804195804196, "eval_loss": 1.4284459352493286, "eval_runtime": 10.9332, "eval_samples_per_second": 26.159, "eval_steps_per_second": 0.274, "step": 195 }, { "epoch": 60.92, "eval_accuracy": 0.4195804195804196, "eval_loss": 1.4218381643295288, "eval_runtime": 15.3761, "eval_samples_per_second": 18.6, "eval_steps_per_second": 0.195, "step": 198 }, { "epoch": 61.54, "grad_norm": 26744.056640625, "learning_rate": 2.4444444444444445e-05, "loss": 1.3138, "step": 200 }, { "epoch": 61.85, "eval_accuracy": 0.43006993006993005, "eval_loss": 1.4166817665100098, "eval_runtime": 13.1906, "eval_samples_per_second": 21.682, "eval_steps_per_second": 0.227, "step": 201 }, { "epoch": 62.77, "eval_accuracy": 0.43006993006993005, "eval_loss": 1.409144639968872, "eval_runtime": 21.121, "eval_samples_per_second": 13.541, "eval_steps_per_second": 0.142, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.4370629370629371, "eval_loss": 1.3994969129562378, "eval_runtime": 11.3707, "eval_samples_per_second": 25.152, "eval_steps_per_second": 0.264, "step": 208 }, { "epoch": 64.92, "eval_accuracy": 0.4405594405594406, "eval_loss": 1.3911248445510864, "eval_runtime": 15.8386, "eval_samples_per_second": 18.057, "eval_steps_per_second": 0.189, "step": 211 }, { "epoch": 65.85, "eval_accuracy": 0.43356643356643354, "eval_loss": 1.382477045059204, "eval_runtime": 15.9417, "eval_samples_per_second": 17.94, "eval_steps_per_second": 0.188, "step": 214 }, { "epoch": 66.77, "eval_accuracy": 0.44405594405594406, "eval_loss": 1.373460054397583, "eval_runtime": 22.8178, "eval_samples_per_second": 12.534, "eval_steps_per_second": 0.131, "step": 217 }, { "epoch": 68.0, "eval_accuracy": 0.44755244755244755, "eval_loss": 1.3632004261016846, "eval_runtime": 8.8406, "eval_samples_per_second": 32.351, "eval_steps_per_second": 0.339, "step": 221 }, { "epoch": 68.92, "eval_accuracy": 0.45104895104895104, "eval_loss": 1.355635166168213, "eval_runtime": 22.4517, "eval_samples_per_second": 12.738, "eval_steps_per_second": 0.134, "step": 224 }, { "epoch": 69.85, "eval_accuracy": 0.45104895104895104, "eval_loss": 1.349207878112793, "eval_runtime": 19.129, "eval_samples_per_second": 14.951, "eval_steps_per_second": 0.157, "step": 227 }, { "epoch": 70.77, "eval_accuracy": 0.45104895104895104, "eval_loss": 1.3441168069839478, "eval_runtime": 10.9281, "eval_samples_per_second": 26.171, "eval_steps_per_second": 0.275, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.458041958041958, "eval_loss": 1.3352017402648926, "eval_runtime": 9.8412, "eval_samples_per_second": 29.062, "eval_steps_per_second": 0.305, "step": 234 }, { "epoch": 72.92, "eval_accuracy": 0.46153846153846156, "eval_loss": 1.326931357383728, "eval_runtime": 9.1291, "eval_samples_per_second": 31.329, "eval_steps_per_second": 0.329, "step": 237 }, { "epoch": 73.85, "eval_accuracy": 0.4755244755244755, "eval_loss": 1.3185527324676514, "eval_runtime": 8.2766, "eval_samples_per_second": 34.555, "eval_steps_per_second": 0.362, "step": 240 }, { "epoch": 74.77, "eval_accuracy": 0.4755244755244755, "eval_loss": 1.3105268478393555, "eval_runtime": 11.5807, "eval_samples_per_second": 24.696, "eval_steps_per_second": 0.259, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.479020979020979, "eval_loss": 1.299223780632019, "eval_runtime": 26.8999, "eval_samples_per_second": 10.632, "eval_steps_per_second": 0.112, "step": 247 }, { "epoch": 76.92, "eval_accuracy": 0.4825174825174825, "eval_loss": 1.2896299362182617, "eval_runtime": 22.9313, "eval_samples_per_second": 12.472, "eval_steps_per_second": 0.131, "step": 250 }, { "epoch": 77.85, "eval_accuracy": 0.4825174825174825, "eval_loss": 1.2797133922576904, "eval_runtime": 12.0479, "eval_samples_per_second": 23.739, "eval_steps_per_second": 0.249, "step": 253 }, { "epoch": 78.77, "eval_accuracy": 0.486013986013986, "eval_loss": 1.270691156387329, "eval_runtime": 21.8623, "eval_samples_per_second": 13.082, "eval_steps_per_second": 0.137, "step": 256 }, { "epoch": 80.0, "eval_accuracy": 0.493006993006993, "eval_loss": 1.2586532831192017, "eval_runtime": 21.4963, "eval_samples_per_second": 13.305, "eval_steps_per_second": 0.14, "step": 260 }, { "epoch": 80.92, "eval_accuracy": 0.493006993006993, "eval_loss": 1.2493596076965332, "eval_runtime": 16.0914, "eval_samples_per_second": 17.774, "eval_steps_per_second": 0.186, "step": 263 }, { "epoch": 81.85, "eval_accuracy": 0.493006993006993, "eval_loss": 1.2407375574111938, "eval_runtime": 16.0788, "eval_samples_per_second": 17.787, "eval_steps_per_second": 0.187, "step": 266 }, { "epoch": 82.77, "eval_accuracy": 0.5104895104895105, "eval_loss": 1.2313759326934814, "eval_runtime": 13.8028, "eval_samples_per_second": 20.72, "eval_steps_per_second": 0.217, "step": 269 }, { "epoch": 84.0, "eval_accuracy": 0.513986013986014, "eval_loss": 1.2205084562301636, "eval_runtime": 15.155, "eval_samples_per_second": 18.872, "eval_steps_per_second": 0.198, "step": 273 }, { "epoch": 84.92, "eval_accuracy": 0.5209790209790209, "eval_loss": 1.2124475240707397, "eval_runtime": 24.3109, "eval_samples_per_second": 11.764, "eval_steps_per_second": 0.123, "step": 276 }, { "epoch": 85.85, "eval_accuracy": 0.5314685314685315, "eval_loss": 1.2043241262435913, "eval_runtime": 12.0961, "eval_samples_per_second": 23.644, "eval_steps_per_second": 0.248, "step": 279 }, { "epoch": 86.77, "eval_accuracy": 0.534965034965035, "eval_loss": 1.1973276138305664, "eval_runtime": 18.6151, "eval_samples_per_second": 15.364, "eval_steps_per_second": 0.161, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.5524475524475524, "eval_loss": 1.1870229244232178, "eval_runtime": 19.514, "eval_samples_per_second": 14.656, "eval_steps_per_second": 0.154, "step": 286 }, { "epoch": 88.92, "eval_accuracy": 0.5629370629370629, "eval_loss": 1.178816318511963, "eval_runtime": 17.8959, "eval_samples_per_second": 15.981, "eval_steps_per_second": 0.168, "step": 289 }, { "epoch": 89.85, "eval_accuracy": 0.5629370629370629, "eval_loss": 1.1699650287628174, "eval_runtime": 14.6752, "eval_samples_per_second": 19.489, "eval_steps_per_second": 0.204, "step": 292 }, { "epoch": 90.77, "eval_accuracy": 0.5699300699300699, "eval_loss": 1.1613417863845825, "eval_runtime": 23.2377, "eval_samples_per_second": 12.308, "eval_steps_per_second": 0.129, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.583916083916084, "eval_loss": 1.1498184204101562, "eval_runtime": 16.8037, "eval_samples_per_second": 17.02, "eval_steps_per_second": 0.179, "step": 299 }, { "epoch": 92.31, "grad_norm": 27893.916015625, "learning_rate": 1.9999999999999998e-05, "loss": 1.047, "step": 300 }, { "epoch": 92.92, "eval_accuracy": 0.5874125874125874, "eval_loss": 1.1410824060440063, "eval_runtime": 17.7012, "eval_samples_per_second": 16.157, "eval_steps_per_second": 0.169, "step": 302 }, { "epoch": 93.85, "eval_accuracy": 0.5944055944055944, "eval_loss": 1.133009433746338, "eval_runtime": 21.3624, "eval_samples_per_second": 13.388, "eval_steps_per_second": 0.14, "step": 305 }, { "epoch": 94.77, "eval_accuracy": 0.5944055944055944, "eval_loss": 1.1261355876922607, "eval_runtime": 12.5743, "eval_samples_per_second": 22.745, "eval_steps_per_second": 0.239, "step": 308 }, { "epoch": 96.0, "eval_accuracy": 0.6013986013986014, "eval_loss": 1.1161199808120728, "eval_runtime": 10.714, "eval_samples_per_second": 26.694, "eval_steps_per_second": 0.28, "step": 312 }, { "epoch": 96.92, "eval_accuracy": 0.6013986013986014, "eval_loss": 1.1083568334579468, "eval_runtime": 23.7041, "eval_samples_per_second": 12.065, "eval_steps_per_second": 0.127, "step": 315 }, { "epoch": 97.85, "eval_accuracy": 0.6048951048951049, "eval_loss": 1.1002540588378906, "eval_runtime": 18.2723, "eval_samples_per_second": 15.652, "eval_steps_per_second": 0.164, "step": 318 }, { "epoch": 98.77, "eval_accuracy": 0.6048951048951049, "eval_loss": 1.0926333665847778, "eval_runtime": 15.4521, "eval_samples_per_second": 18.509, "eval_steps_per_second": 0.194, "step": 321 }, { "epoch": 100.0, "eval_accuracy": 0.6083916083916084, "eval_loss": 1.0821202993392944, "eval_runtime": 14.9648, "eval_samples_per_second": 19.112, "eval_steps_per_second": 0.2, "step": 325 }, { "epoch": 100.92, "eval_accuracy": 0.6083916083916084, "eval_loss": 1.075362205505371, "eval_runtime": 18.8741, "eval_samples_per_second": 15.153, "eval_steps_per_second": 0.159, "step": 328 }, { "epoch": 101.85, "eval_accuracy": 0.6083916083916084, "eval_loss": 1.06904935836792, "eval_runtime": 12.1488, "eval_samples_per_second": 23.541, "eval_steps_per_second": 0.247, "step": 331 }, { "epoch": 102.77, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.0637034177780151, "eval_runtime": 21.2582, "eval_samples_per_second": 13.454, "eval_steps_per_second": 0.141, "step": 334 }, { "epoch": 104.0, "eval_accuracy": 0.6188811188811189, "eval_loss": 1.0549243688583374, "eval_runtime": 19.5513, "eval_samples_per_second": 14.628, "eval_steps_per_second": 0.153, "step": 338 }, { "epoch": 104.92, "eval_accuracy": 0.6223776223776224, "eval_loss": 1.047833800315857, "eval_runtime": 17.2191, "eval_samples_per_second": 16.609, "eval_steps_per_second": 0.174, "step": 341 }, { "epoch": 105.85, "eval_accuracy": 0.6258741258741258, "eval_loss": 1.0420035123825073, "eval_runtime": 25.1625, "eval_samples_per_second": 11.366, "eval_steps_per_second": 0.119, "step": 344 }, { "epoch": 106.77, "eval_accuracy": 0.6293706293706294, "eval_loss": 1.0369622707366943, "eval_runtime": 14.0552, "eval_samples_per_second": 20.348, "eval_steps_per_second": 0.213, "step": 347 }, { "epoch": 108.0, "eval_accuracy": 0.6293706293706294, "eval_loss": 1.0308306217193604, "eval_runtime": 15.7213, "eval_samples_per_second": 18.192, "eval_steps_per_second": 0.191, "step": 351 }, { "epoch": 108.92, "eval_accuracy": 0.6258741258741258, "eval_loss": 1.0263006687164307, "eval_runtime": 19.1306, "eval_samples_per_second": 14.95, "eval_steps_per_second": 0.157, "step": 354 }, { "epoch": 109.85, "eval_accuracy": 0.6258741258741258, "eval_loss": 1.0230927467346191, "eval_runtime": 15.401, "eval_samples_per_second": 18.57, "eval_steps_per_second": 0.195, "step": 357 }, { "epoch": 110.77, "eval_accuracy": 0.6328671328671329, "eval_loss": 1.0204286575317383, "eval_runtime": 11.4397, "eval_samples_per_second": 25.001, "eval_steps_per_second": 0.262, "step": 360 }, { "epoch": 112.0, "eval_accuracy": 0.6293706293706294, "eval_loss": 1.0166879892349243, "eval_runtime": 14.7547, "eval_samples_per_second": 19.384, "eval_steps_per_second": 0.203, "step": 364 }, { "epoch": 112.92, "eval_accuracy": 0.6293706293706294, "eval_loss": 1.0145463943481445, "eval_runtime": 12.2752, "eval_samples_per_second": 23.299, "eval_steps_per_second": 0.244, "step": 367 }, { "epoch": 113.85, "eval_accuracy": 0.6328671328671329, "eval_loss": 1.0118980407714844, "eval_runtime": 29.1643, "eval_samples_per_second": 9.807, "eval_steps_per_second": 0.103, "step": 370 }, { "epoch": 114.77, "eval_accuracy": 0.6293706293706294, "eval_loss": 1.0077061653137207, "eval_runtime": 15.736, "eval_samples_per_second": 18.175, "eval_steps_per_second": 0.191, "step": 373 }, { "epoch": 116.0, "eval_accuracy": 0.6363636363636364, "eval_loss": 1.001206398010254, "eval_runtime": 13.5478, "eval_samples_per_second": 21.11, "eval_steps_per_second": 0.221, "step": 377 }, { "epoch": 116.92, "eval_accuracy": 0.6363636363636364, "eval_loss": 0.9975122809410095, "eval_runtime": 12.3425, "eval_samples_per_second": 23.172, "eval_steps_per_second": 0.243, "step": 380 }, { "epoch": 117.85, "eval_accuracy": 0.6363636363636364, "eval_loss": 0.993752121925354, "eval_runtime": 16.3699, "eval_samples_per_second": 17.471, "eval_steps_per_second": 0.183, "step": 383 }, { "epoch": 118.77, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9913222193717957, "eval_runtime": 15.9587, "eval_samples_per_second": 17.921, "eval_steps_per_second": 0.188, "step": 386 }, { "epoch": 120.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9886361360549927, "eval_runtime": 14.2728, "eval_samples_per_second": 20.038, "eval_steps_per_second": 0.21, "step": 390 }, { "epoch": 120.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.987022340297699, "eval_runtime": 28.4357, "eval_samples_per_second": 10.058, "eval_steps_per_second": 0.106, "step": 393 }, { "epoch": 121.85, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9861236810684204, "eval_runtime": 12.6739, "eval_samples_per_second": 22.566, "eval_steps_per_second": 0.237, "step": 396 }, { "epoch": 122.77, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9857375025749207, "eval_runtime": 14.4905, "eval_samples_per_second": 19.737, "eval_steps_per_second": 0.207, "step": 399 }, { "epoch": 123.08, "grad_norm": 28786.830078125, "learning_rate": 1.5555555555555555e-05, "loss": 0.8183, "step": 400 }, { "epoch": 124.0, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9855088591575623, "eval_runtime": 23.5291, "eval_samples_per_second": 12.155, "eval_steps_per_second": 0.128, "step": 403 }, { "epoch": 124.92, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.986443817615509, "eval_runtime": 11.267, "eval_samples_per_second": 25.384, "eval_steps_per_second": 0.266, "step": 406 }, { "epoch": 125.85, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9856669902801514, "eval_runtime": 15.102, "eval_samples_per_second": 18.938, "eval_steps_per_second": 0.199, "step": 409 }, { "epoch": 126.77, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9817618131637573, "eval_runtime": 13.7486, "eval_samples_per_second": 20.802, "eval_steps_per_second": 0.218, "step": 412 }, { "epoch": 128.0, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9765364527702332, "eval_runtime": 15.0977, "eval_samples_per_second": 18.943, "eval_steps_per_second": 0.199, "step": 416 }, { "epoch": 128.92, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9739550352096558, "eval_runtime": 9.9261, "eval_samples_per_second": 28.813, "eval_steps_per_second": 0.302, "step": 419 }, { "epoch": 129.85, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9736567139625549, "eval_runtime": 23.6705, "eval_samples_per_second": 12.083, "eval_steps_per_second": 0.127, "step": 422 }, { "epoch": 130.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9753543734550476, "eval_runtime": 13.1489, "eval_samples_per_second": 21.751, "eval_steps_per_second": 0.228, "step": 425 }, { "epoch": 132.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.975328803062439, "eval_runtime": 14.1291, "eval_samples_per_second": 20.242, "eval_steps_per_second": 0.212, "step": 429 }, { "epoch": 132.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9739886522293091, "eval_runtime": 23.3295, "eval_samples_per_second": 12.259, "eval_steps_per_second": 0.129, "step": 432 }, { "epoch": 133.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9709957242012024, "eval_runtime": 12.6523, "eval_samples_per_second": 22.605, "eval_steps_per_second": 0.237, "step": 435 }, { "epoch": 134.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9685706496238708, "eval_runtime": 12.6672, "eval_samples_per_second": 22.578, "eval_steps_per_second": 0.237, "step": 438 }, { "epoch": 136.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9670514464378357, "eval_runtime": 14.1037, "eval_samples_per_second": 20.278, "eval_steps_per_second": 0.213, "step": 442 }, { "epoch": 136.92, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.966910719871521, "eval_runtime": 17.8781, "eval_samples_per_second": 15.997, "eval_steps_per_second": 0.168, "step": 445 }, { "epoch": 137.85, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9659475088119507, "eval_runtime": 14.4986, "eval_samples_per_second": 19.726, "eval_steps_per_second": 0.207, "step": 448 }, { "epoch": 138.77, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9662479758262634, "eval_runtime": 9.0453, "eval_samples_per_second": 31.619, "eval_steps_per_second": 0.332, "step": 451 }, { "epoch": 140.0, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.967424750328064, "eval_runtime": 15.45, "eval_samples_per_second": 18.511, "eval_steps_per_second": 0.194, "step": 455 }, { "epoch": 140.92, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9694240689277649, "eval_runtime": 25.3846, "eval_samples_per_second": 11.267, "eval_steps_per_second": 0.118, "step": 458 }, { "epoch": 141.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9716474413871765, "eval_runtime": 13.4283, "eval_samples_per_second": 21.298, "eval_steps_per_second": 0.223, "step": 461 }, { "epoch": 142.77, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9738869667053223, "eval_runtime": 14.4683, "eval_samples_per_second": 19.767, "eval_steps_per_second": 0.207, "step": 464 }, { "epoch": 144.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9711639881134033, "eval_runtime": 16.7189, "eval_samples_per_second": 17.106, "eval_steps_per_second": 0.179, "step": 468 }, { "epoch": 144.92, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9670231938362122, "eval_runtime": 17.2531, "eval_samples_per_second": 16.577, "eval_steps_per_second": 0.174, "step": 471 }, { "epoch": 145.85, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9637404084205627, "eval_runtime": 7.2246, "eval_samples_per_second": 39.587, "eval_steps_per_second": 0.415, "step": 474 }, { "epoch": 146.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9625027775764465, "eval_runtime": 22.2959, "eval_samples_per_second": 12.827, "eval_steps_per_second": 0.135, "step": 477 }, { "epoch": 148.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9634200930595398, "eval_runtime": 13.0089, "eval_samples_per_second": 21.985, "eval_steps_per_second": 0.231, "step": 481 }, { "epoch": 148.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9658861756324768, "eval_runtime": 13.9249, "eval_samples_per_second": 20.539, "eval_steps_per_second": 0.215, "step": 484 }, { "epoch": 149.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9662710428237915, "eval_runtime": 26.5045, "eval_samples_per_second": 10.791, "eval_steps_per_second": 0.113, "step": 487 }, { "epoch": 150.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9648845195770264, "eval_runtime": 13.8454, "eval_samples_per_second": 20.657, "eval_steps_per_second": 0.217, "step": 490 }, { "epoch": 152.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9654809832572937, "eval_runtime": 11.8579, "eval_samples_per_second": 24.119, "eval_steps_per_second": 0.253, "step": 494 }, { "epoch": 152.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9648337364196777, "eval_runtime": 13.0859, "eval_samples_per_second": 21.856, "eval_steps_per_second": 0.229, "step": 497 }, { "epoch": 153.85, "grad_norm": 27909.80078125, "learning_rate": 1.111111111111111e-05, "loss": 0.7321, "step": 500 }, { "epoch": 153.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.963828980922699, "eval_runtime": 8.9676, "eval_samples_per_second": 31.892, "eval_steps_per_second": 0.335, "step": 500 }, { "epoch": 154.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9631070494651794, "eval_runtime": 32.863, "eval_samples_per_second": 8.703, "eval_steps_per_second": 0.091, "step": 503 }, { "epoch": 156.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.964668333530426, "eval_runtime": 11.8735, "eval_samples_per_second": 24.087, "eval_steps_per_second": 0.253, "step": 507 }, { "epoch": 156.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9652780294418335, "eval_runtime": 12.5571, "eval_samples_per_second": 22.776, "eval_steps_per_second": 0.239, "step": 510 }, { "epoch": 157.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9662439227104187, "eval_runtime": 22.8916, "eval_samples_per_second": 12.494, "eval_steps_per_second": 0.131, "step": 513 }, { "epoch": 158.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9679471254348755, "eval_runtime": 13.713, "eval_samples_per_second": 20.856, "eval_steps_per_second": 0.219, "step": 516 }, { "epoch": 160.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9674729704856873, "eval_runtime": 13.3059, "eval_samples_per_second": 21.494, "eval_steps_per_second": 0.225, "step": 520 }, { "epoch": 160.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9663581252098083, "eval_runtime": 14.0564, "eval_samples_per_second": 20.347, "eval_steps_per_second": 0.213, "step": 523 }, { "epoch": 161.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9654582738876343, "eval_runtime": 17.3488, "eval_samples_per_second": 16.485, "eval_steps_per_second": 0.173, "step": 526 }, { "epoch": 162.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9642163515090942, "eval_runtime": 28.2809, "eval_samples_per_second": 10.113, "eval_steps_per_second": 0.106, "step": 529 }, { "epoch": 164.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9635214805603027, "eval_runtime": 13.2297, "eval_samples_per_second": 21.618, "eval_steps_per_second": 0.227, "step": 533 }, { "epoch": 164.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9632530808448792, "eval_runtime": 14.959, "eval_samples_per_second": 19.119, "eval_steps_per_second": 0.201, "step": 536 }, { "epoch": 165.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9644965529441833, "eval_runtime": 27.3169, "eval_samples_per_second": 10.47, "eval_steps_per_second": 0.11, "step": 539 }, { "epoch": 166.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9649432897567749, "eval_runtime": 16.2467, "eval_samples_per_second": 17.604, "eval_steps_per_second": 0.185, "step": 542 }, { "epoch": 168.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9651181101799011, "eval_runtime": 14.8641, "eval_samples_per_second": 19.241, "eval_steps_per_second": 0.202, "step": 546 }, { "epoch": 168.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9657481908798218, "eval_runtime": 14.6637, "eval_samples_per_second": 19.504, "eval_steps_per_second": 0.205, "step": 549 }, { "epoch": 169.85, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9662555456161499, "eval_runtime": 16.1862, "eval_samples_per_second": 17.669, "eval_steps_per_second": 0.185, "step": 552 }, { "epoch": 170.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9652681946754456, "eval_runtime": 13.8601, "eval_samples_per_second": 20.635, "eval_steps_per_second": 0.216, "step": 555 }, { "epoch": 172.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9637671113014221, "eval_runtime": 14.0763, "eval_samples_per_second": 20.318, "eval_steps_per_second": 0.213, "step": 559 }, { "epoch": 172.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9616447687149048, "eval_runtime": 14.3281, "eval_samples_per_second": 19.961, "eval_steps_per_second": 0.209, "step": 562 }, { "epoch": 173.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9601487517356873, "eval_runtime": 14.8622, "eval_samples_per_second": 19.243, "eval_steps_per_second": 0.202, "step": 565 }, { "epoch": 174.77, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9610276818275452, "eval_runtime": 11.3604, "eval_samples_per_second": 25.175, "eval_steps_per_second": 0.264, "step": 568 }, { "epoch": 176.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.962989866733551, "eval_runtime": 33.9341, "eval_samples_per_second": 8.428, "eval_steps_per_second": 0.088, "step": 572 }, { "epoch": 176.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9633333683013916, "eval_runtime": 11.5929, "eval_samples_per_second": 24.67, "eval_steps_per_second": 0.259, "step": 575 }, { "epoch": 177.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9645780324935913, "eval_runtime": 13.4891, "eval_samples_per_second": 21.202, "eval_steps_per_second": 0.222, "step": 578 }, { "epoch": 178.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9654965996742249, "eval_runtime": 28.6917, "eval_samples_per_second": 9.968, "eval_steps_per_second": 0.105, "step": 581 }, { "epoch": 180.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9672998785972595, "eval_runtime": 14.4786, "eval_samples_per_second": 19.753, "eval_steps_per_second": 0.207, "step": 585 }, { "epoch": 180.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9679505825042725, "eval_runtime": 13.6779, "eval_samples_per_second": 20.91, "eval_steps_per_second": 0.219, "step": 588 }, { "epoch": 181.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9686614274978638, "eval_runtime": 21.421, "eval_samples_per_second": 13.351, "eval_steps_per_second": 0.14, "step": 591 }, { "epoch": 182.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9691935777664185, "eval_runtime": 12.6562, "eval_samples_per_second": 22.598, "eval_steps_per_second": 0.237, "step": 594 }, { "epoch": 184.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9683831334114075, "eval_runtime": 18.1378, "eval_samples_per_second": 15.768, "eval_steps_per_second": 0.165, "step": 598 }, { "epoch": 184.62, "grad_norm": 28079.01171875, "learning_rate": 6.666666666666667e-06, "loss": 0.6941, "step": 600 }, { "epoch": 184.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9676913022994995, "eval_runtime": 22.197, "eval_samples_per_second": 12.885, "eval_steps_per_second": 0.135, "step": 601 }, { "epoch": 185.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9673991203308105, "eval_runtime": 13.8487, "eval_samples_per_second": 20.652, "eval_steps_per_second": 0.217, "step": 604 }, { "epoch": 186.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9671489000320435, "eval_runtime": 16.6344, "eval_samples_per_second": 17.193, "eval_steps_per_second": 0.18, "step": 607 }, { "epoch": 188.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.96697998046875, "eval_runtime": 14.8813, "eval_samples_per_second": 19.219, "eval_steps_per_second": 0.202, "step": 611 }, { "epoch": 188.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9661853313446045, "eval_runtime": 15.8068, "eval_samples_per_second": 18.094, "eval_steps_per_second": 0.19, "step": 614 }, { "epoch": 189.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9652778506278992, "eval_runtime": 27.4384, "eval_samples_per_second": 10.423, "eval_steps_per_second": 0.109, "step": 617 }, { "epoch": 190.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9644954800605774, "eval_runtime": 12.0079, "eval_samples_per_second": 23.818, "eval_steps_per_second": 0.25, "step": 620 }, { "epoch": 192.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9648195505142212, "eval_runtime": 15.7494, "eval_samples_per_second": 18.159, "eval_steps_per_second": 0.19, "step": 624 }, { "epoch": 192.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9652000665664673, "eval_runtime": 14.0294, "eval_samples_per_second": 20.386, "eval_steps_per_second": 0.214, "step": 627 }, { "epoch": 193.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9662671089172363, "eval_runtime": 16.6283, "eval_samples_per_second": 17.2, "eval_steps_per_second": 0.18, "step": 630 }, { "epoch": 194.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9662363529205322, "eval_runtime": 12.434, "eval_samples_per_second": 23.001, "eval_steps_per_second": 0.241, "step": 633 }, { "epoch": 196.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.966452419757843, "eval_runtime": 27.5274, "eval_samples_per_second": 10.39, "eval_steps_per_second": 0.109, "step": 637 }, { "epoch": 196.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9667707085609436, "eval_runtime": 14.0792, "eval_samples_per_second": 20.314, "eval_steps_per_second": 0.213, "step": 640 }, { "epoch": 197.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9669297337532043, "eval_runtime": 11.4357, "eval_samples_per_second": 25.009, "eval_steps_per_second": 0.262, "step": 643 }, { "epoch": 198.77, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.967424213886261, "eval_runtime": 19.968, "eval_samples_per_second": 14.323, "eval_steps_per_second": 0.15, "step": 646 }, { "epoch": 200.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9668986797332764, "eval_runtime": 11.3124, "eval_samples_per_second": 25.282, "eval_steps_per_second": 0.265, "step": 650 }, { "epoch": 200.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9671657681465149, "eval_runtime": 15.4547, "eval_samples_per_second": 18.506, "eval_steps_per_second": 0.194, "step": 653 }, { "epoch": 201.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9671434760093689, "eval_runtime": 11.8747, "eval_samples_per_second": 24.085, "eval_steps_per_second": 0.253, "step": 656 }, { "epoch": 202.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.967323362827301, "eval_runtime": 13.4706, "eval_samples_per_second": 21.231, "eval_steps_per_second": 0.223, "step": 659 }, { "epoch": 204.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9666022658348083, "eval_runtime": 11.809, "eval_samples_per_second": 24.219, "eval_steps_per_second": 0.254, "step": 663 }, { "epoch": 204.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.966048002243042, "eval_runtime": 30.3868, "eval_samples_per_second": 9.412, "eval_steps_per_second": 0.099, "step": 666 }, { "epoch": 205.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9656386971473694, "eval_runtime": 10.0795, "eval_samples_per_second": 28.375, "eval_steps_per_second": 0.298, "step": 669 }, { "epoch": 206.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9650998711585999, "eval_runtime": 11.1232, "eval_samples_per_second": 25.712, "eval_steps_per_second": 0.27, "step": 672 }, { "epoch": 208.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9660706520080566, "eval_runtime": 14.3325, "eval_samples_per_second": 19.955, "eval_steps_per_second": 0.209, "step": 676 }, { "epoch": 208.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9666641354560852, "eval_runtime": 18.7486, "eval_samples_per_second": 15.254, "eval_steps_per_second": 0.16, "step": 679 }, { "epoch": 209.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9667859077453613, "eval_runtime": 11.9596, "eval_samples_per_second": 23.914, "eval_steps_per_second": 0.251, "step": 682 }, { "epoch": 210.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9668753147125244, "eval_runtime": 21.049, "eval_samples_per_second": 13.587, "eval_steps_per_second": 0.143, "step": 685 }, { "epoch": 212.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665384292602539, "eval_runtime": 12.916, "eval_samples_per_second": 22.143, "eval_steps_per_second": 0.232, "step": 689 }, { "epoch": 212.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665247797966003, "eval_runtime": 13.5797, "eval_samples_per_second": 21.061, "eval_steps_per_second": 0.221, "step": 692 }, { "epoch": 213.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9663905501365662, "eval_runtime": 11.9452, "eval_samples_per_second": 23.943, "eval_steps_per_second": 0.251, "step": 695 }, { "epoch": 214.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9662503600120544, "eval_runtime": 11.3523, "eval_samples_per_second": 25.193, "eval_steps_per_second": 0.264, "step": 698 }, { "epoch": 215.38, "grad_norm": 25925.685546875, "learning_rate": 2.222222222222222e-06, "loss": 0.6696, "step": 700 }, { "epoch": 216.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9666140079498291, "eval_runtime": 11.4254, "eval_samples_per_second": 25.032, "eval_steps_per_second": 0.263, "step": 702 }, { "epoch": 216.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9667044878005981, "eval_runtime": 9.4078, "eval_samples_per_second": 30.4, "eval_steps_per_second": 0.319, "step": 705 }, { "epoch": 217.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665290713310242, "eval_runtime": 8.9463, "eval_samples_per_second": 31.969, "eval_steps_per_second": 0.335, "step": 708 }, { "epoch": 218.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9663181900978088, "eval_runtime": 12.4146, "eval_samples_per_second": 23.037, "eval_steps_per_second": 0.242, "step": 711 }, { "epoch": 220.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9661046862602234, "eval_runtime": 12.2235, "eval_samples_per_second": 23.398, "eval_steps_per_second": 0.245, "step": 715 }, { "epoch": 220.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.966101348400116, "eval_runtime": 12.7423, "eval_samples_per_second": 22.445, "eval_steps_per_second": 0.235, "step": 718 }, { "epoch": 221.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9661813974380493, "eval_runtime": 11.077, "eval_samples_per_second": 25.819, "eval_steps_per_second": 0.271, "step": 721 }, { "epoch": 222.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9663574695587158, "eval_runtime": 13.5789, "eval_samples_per_second": 21.062, "eval_steps_per_second": 0.221, "step": 724 }, { "epoch": 224.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9663906693458557, "eval_runtime": 11.7803, "eval_samples_per_second": 24.278, "eval_steps_per_second": 0.255, "step": 728 }, { "epoch": 224.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9664490222930908, "eval_runtime": 13.0369, "eval_samples_per_second": 21.938, "eval_steps_per_second": 0.23, "step": 731 }, { "epoch": 225.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665610790252686, "eval_runtime": 10.8415, "eval_samples_per_second": 26.38, "eval_steps_per_second": 0.277, "step": 734 }, { "epoch": 226.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665590524673462, "eval_runtime": 14.6312, "eval_samples_per_second": 19.547, "eval_steps_per_second": 0.205, "step": 737 }, { "epoch": 228.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665370583534241, "eval_runtime": 11.8502, "eval_samples_per_second": 24.135, "eval_steps_per_second": 0.253, "step": 741 }, { "epoch": 228.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665577411651611, "eval_runtime": 12.7745, "eval_samples_per_second": 22.388, "eval_steps_per_second": 0.235, "step": 744 }, { "epoch": 229.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665866494178772, "eval_runtime": 13.6538, "eval_samples_per_second": 20.947, "eval_steps_per_second": 0.22, "step": 747 }, { "epoch": 230.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9665878415107727, "eval_runtime": 11.2039, "eval_samples_per_second": 25.527, "eval_steps_per_second": 0.268, "step": 750 }, { "epoch": 230.77, "step": 750, "total_flos": 3.12964745557632e+18, "train_loss": 0.9558865051269532, "train_runtime": 10052.9365, "train_samples_per_second": 38.496, "train_steps_per_second": 0.075 } ], "logging_steps": 100, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 250, "save_steps": 500, "total_flos": 3.12964745557632e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }