|
{ |
|
"best_metric": 0.6538461538461539, |
|
"best_model_checkpoint": "wav2vec2-5Class-train-test-finetune-V7/checkpoint-552", |
|
"epoch": 230.76923076923077, |
|
"eval_steps": 500, |
|
"global_step": 750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.6025639772415161, |
|
"eval_runtime": 25.1775, |
|
"eval_samples_per_second": 11.359, |
|
"eval_steps_per_second": 0.119, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.6023768186569214, |
|
"eval_runtime": 13.2244, |
|
"eval_samples_per_second": 21.627, |
|
"eval_steps_per_second": 0.227, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.6020615100860596, |
|
"eval_runtime": 19.1459, |
|
"eval_samples_per_second": 14.938, |
|
"eval_steps_per_second": 0.157, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.6014316082000732, |
|
"eval_runtime": 24.6871, |
|
"eval_samples_per_second": 11.585, |
|
"eval_steps_per_second": 0.122, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.6007977724075317, |
|
"eval_runtime": 14.5871, |
|
"eval_samples_per_second": 19.606, |
|
"eval_steps_per_second": 0.206, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.600056529045105, |
|
"eval_runtime": 15.3427, |
|
"eval_samples_per_second": 18.641, |
|
"eval_steps_per_second": 0.196, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_accuracy": 0.16083916083916083, |
|
"eval_loss": 1.599173903465271, |
|
"eval_runtime": 23.1969, |
|
"eval_samples_per_second": 12.329, |
|
"eval_steps_per_second": 0.129, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.17482517482517482, |
|
"eval_loss": 1.5977518558502197, |
|
"eval_runtime": 24.249, |
|
"eval_samples_per_second": 11.794, |
|
"eval_steps_per_second": 0.124, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"eval_accuracy": 0.1888111888111888, |
|
"eval_loss": 1.5965358018875122, |
|
"eval_runtime": 14.7479, |
|
"eval_samples_per_second": 19.393, |
|
"eval_steps_per_second": 0.203, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_accuracy": 0.2097902097902098, |
|
"eval_loss": 1.5952231884002686, |
|
"eval_runtime": 18.4251, |
|
"eval_samples_per_second": 15.522, |
|
"eval_steps_per_second": 0.163, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.593780517578125, |
|
"eval_runtime": 20.5455, |
|
"eval_samples_per_second": 13.92, |
|
"eval_steps_per_second": 0.146, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.23426573426573427, |
|
"eval_loss": 1.5916367769241333, |
|
"eval_runtime": 16.0385, |
|
"eval_samples_per_second": 17.832, |
|
"eval_steps_per_second": 0.187, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.5898847579956055, |
|
"eval_runtime": 21.1768, |
|
"eval_samples_per_second": 13.505, |
|
"eval_steps_per_second": 0.142, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"eval_accuracy": 0.2727272727272727, |
|
"eval_loss": 1.5879966020584106, |
|
"eval_runtime": 13.5248, |
|
"eval_samples_per_second": 21.146, |
|
"eval_steps_per_second": 0.222, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_loss": 1.5860111713409424, |
|
"eval_runtime": 12.9444, |
|
"eval_samples_per_second": 22.094, |
|
"eval_steps_per_second": 0.232, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.35664335664335667, |
|
"eval_loss": 1.583256483078003, |
|
"eval_runtime": 25.0161, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 0.12, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"eval_accuracy": 0.3881118881118881, |
|
"eval_loss": 1.5810612440109253, |
|
"eval_runtime": 14.5666, |
|
"eval_samples_per_second": 19.634, |
|
"eval_steps_per_second": 0.206, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_accuracy": 0.3811188811188811, |
|
"eval_loss": 1.578792929649353, |
|
"eval_runtime": 13.0625, |
|
"eval_samples_per_second": 21.895, |
|
"eval_steps_per_second": 0.23, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"eval_accuracy": 0.36713286713286714, |
|
"eval_loss": 1.576446294784546, |
|
"eval_runtime": 18.6648, |
|
"eval_samples_per_second": 15.323, |
|
"eval_steps_per_second": 0.161, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.34965034965034963, |
|
"eval_loss": 1.5730595588684082, |
|
"eval_runtime": 11.5386, |
|
"eval_samples_per_second": 24.786, |
|
"eval_steps_per_second": 0.26, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_accuracy": 0.32867132867132864, |
|
"eval_loss": 1.5702308416366577, |
|
"eval_runtime": 15.7041, |
|
"eval_samples_per_second": 18.212, |
|
"eval_steps_per_second": 0.191, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"eval_accuracy": 0.32517482517482516, |
|
"eval_loss": 1.5671954154968262, |
|
"eval_runtime": 11.3961, |
|
"eval_samples_per_second": 25.096, |
|
"eval_steps_per_second": 0.263, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"eval_accuracy": 0.3146853146853147, |
|
"eval_loss": 1.5641109943389893, |
|
"eval_runtime": 15.1673, |
|
"eval_samples_per_second": 18.856, |
|
"eval_steps_per_second": 0.198, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.3111888111888112, |
|
"eval_loss": 1.5597317218780518, |
|
"eval_runtime": 24.7249, |
|
"eval_samples_per_second": 11.567, |
|
"eval_steps_per_second": 0.121, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.92, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_loss": 1.5563873052597046, |
|
"eval_runtime": 13.1713, |
|
"eval_samples_per_second": 21.714, |
|
"eval_steps_per_second": 0.228, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"eval_accuracy": 0.3041958041958042, |
|
"eval_loss": 1.553160548210144, |
|
"eval_runtime": 12.194, |
|
"eval_samples_per_second": 23.454, |
|
"eval_steps_per_second": 0.246, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.77, |
|
"eval_accuracy": 0.2937062937062937, |
|
"eval_loss": 1.549930214881897, |
|
"eval_runtime": 25.3259, |
|
"eval_samples_per_second": 11.293, |
|
"eval_steps_per_second": 0.118, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2902097902097902, |
|
"eval_loss": 1.5454081296920776, |
|
"eval_runtime": 10.4422, |
|
"eval_samples_per_second": 27.389, |
|
"eval_steps_per_second": 0.287, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"eval_accuracy": 0.2867132867132867, |
|
"eval_loss": 1.5419245958328247, |
|
"eval_runtime": 16.2077, |
|
"eval_samples_per_second": 17.646, |
|
"eval_steps_per_second": 0.185, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_accuracy": 0.28321678321678323, |
|
"eval_loss": 1.5383468866348267, |
|
"eval_runtime": 27.5909, |
|
"eval_samples_per_second": 10.366, |
|
"eval_steps_per_second": 0.109, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"grad_norm": 65491.41796875, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 1.5563, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"eval_accuracy": 0.2762237762237762, |
|
"eval_loss": 1.5348902940750122, |
|
"eval_runtime": 14.2201, |
|
"eval_samples_per_second": 20.112, |
|
"eval_steps_per_second": 0.211, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.27972027972027974, |
|
"eval_loss": 1.5304443836212158, |
|
"eval_runtime": 11.5662, |
|
"eval_samples_per_second": 24.727, |
|
"eval_steps_per_second": 0.259, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92, |
|
"eval_accuracy": 0.2762237762237762, |
|
"eval_loss": 1.5273348093032837, |
|
"eval_runtime": 10.0608, |
|
"eval_samples_per_second": 28.427, |
|
"eval_steps_per_second": 0.298, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_accuracy": 0.26573426573426573, |
|
"eval_loss": 1.524675965309143, |
|
"eval_runtime": 16.3502, |
|
"eval_samples_per_second": 17.492, |
|
"eval_steps_per_second": 0.183, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"eval_accuracy": 0.2517482517482518, |
|
"eval_loss": 1.5223478078842163, |
|
"eval_runtime": 11.4239, |
|
"eval_samples_per_second": 25.035, |
|
"eval_steps_per_second": 0.263, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.24825174825174826, |
|
"eval_loss": 1.5194127559661865, |
|
"eval_runtime": 12.5757, |
|
"eval_samples_per_second": 22.742, |
|
"eval_steps_per_second": 0.239, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"eval_accuracy": 0.24125874125874125, |
|
"eval_loss": 1.5177949666976929, |
|
"eval_runtime": 29.9608, |
|
"eval_samples_per_second": 9.546, |
|
"eval_steps_per_second": 0.1, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"eval_accuracy": 0.23776223776223776, |
|
"eval_loss": 1.5167657136917114, |
|
"eval_runtime": 11.3556, |
|
"eval_samples_per_second": 25.186, |
|
"eval_steps_per_second": 0.264, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"eval_accuracy": 0.24475524475524477, |
|
"eval_loss": 1.5161939859390259, |
|
"eval_runtime": 8.3481, |
|
"eval_samples_per_second": 34.259, |
|
"eval_steps_per_second": 0.359, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.24475524475524477, |
|
"eval_loss": 1.5161994695663452, |
|
"eval_runtime": 11.5276, |
|
"eval_samples_per_second": 24.81, |
|
"eval_steps_per_second": 0.26, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92, |
|
"eval_accuracy": 0.24825174825174826, |
|
"eval_loss": 1.5166517496109009, |
|
"eval_runtime": 16.6347, |
|
"eval_samples_per_second": 17.193, |
|
"eval_steps_per_second": 0.18, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.85, |
|
"eval_accuracy": 0.24825174825174826, |
|
"eval_loss": 1.518052101135254, |
|
"eval_runtime": 9.0049, |
|
"eval_samples_per_second": 31.76, |
|
"eval_steps_per_second": 0.333, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.77, |
|
"eval_accuracy": 0.25874125874125875, |
|
"eval_loss": 1.520302414894104, |
|
"eval_runtime": 9.9294, |
|
"eval_samples_per_second": 28.803, |
|
"eval_steps_per_second": 0.302, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.5227454900741577, |
|
"eval_runtime": 11.2365, |
|
"eval_samples_per_second": 25.453, |
|
"eval_steps_per_second": 0.267, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92, |
|
"eval_accuracy": 0.28321678321678323, |
|
"eval_loss": 1.5243264436721802, |
|
"eval_runtime": 19.5213, |
|
"eval_samples_per_second": 14.651, |
|
"eval_steps_per_second": 0.154, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.85, |
|
"eval_accuracy": 0.27972027972027974, |
|
"eval_loss": 1.5238893032073975, |
|
"eval_runtime": 10.1208, |
|
"eval_samples_per_second": 28.259, |
|
"eval_steps_per_second": 0.296, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"eval_accuracy": 0.3006993006993007, |
|
"eval_loss": 1.5224095582962036, |
|
"eval_runtime": 11.1479, |
|
"eval_samples_per_second": 25.655, |
|
"eval_steps_per_second": 0.269, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_loss": 1.5170344114303589, |
|
"eval_runtime": 23.385, |
|
"eval_samples_per_second": 12.23, |
|
"eval_steps_per_second": 0.128, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92, |
|
"eval_accuracy": 0.32867132867132864, |
|
"eval_loss": 1.5102859735488892, |
|
"eval_runtime": 13.363, |
|
"eval_samples_per_second": 21.402, |
|
"eval_steps_per_second": 0.225, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"eval_accuracy": 0.34965034965034963, |
|
"eval_loss": 1.5032000541687012, |
|
"eval_runtime": 10.5466, |
|
"eval_samples_per_second": 27.118, |
|
"eval_steps_per_second": 0.284, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.77, |
|
"eval_accuracy": 0.36013986013986016, |
|
"eval_loss": 1.4958622455596924, |
|
"eval_runtime": 13.1319, |
|
"eval_samples_per_second": 21.779, |
|
"eval_steps_per_second": 0.228, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 1.4857254028320312, |
|
"eval_runtime": 11.3199, |
|
"eval_samples_per_second": 25.265, |
|
"eval_steps_per_second": 0.265, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"eval_accuracy": 0.36713286713286714, |
|
"eval_loss": 1.4788074493408203, |
|
"eval_runtime": 10.1044, |
|
"eval_samples_per_second": 28.304, |
|
"eval_steps_per_second": 0.297, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.85, |
|
"eval_accuracy": 0.3741258741258741, |
|
"eval_loss": 1.4713162183761597, |
|
"eval_runtime": 21.314, |
|
"eval_samples_per_second": 13.418, |
|
"eval_steps_per_second": 0.141, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.77, |
|
"eval_accuracy": 0.3811188811188811, |
|
"eval_loss": 1.4641575813293457, |
|
"eval_runtime": 11.6546, |
|
"eval_samples_per_second": 24.54, |
|
"eval_steps_per_second": 0.257, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.3881118881118881, |
|
"eval_loss": 1.455277442932129, |
|
"eval_runtime": 11.9334, |
|
"eval_samples_per_second": 23.966, |
|
"eval_steps_per_second": 0.251, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92, |
|
"eval_accuracy": 0.3986013986013986, |
|
"eval_loss": 1.4481216669082642, |
|
"eval_runtime": 10.9879, |
|
"eval_samples_per_second": 26.029, |
|
"eval_steps_per_second": 0.273, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.85, |
|
"eval_accuracy": 0.4020979020979021, |
|
"eval_loss": 1.4420974254608154, |
|
"eval_runtime": 8.8556, |
|
"eval_samples_per_second": 32.296, |
|
"eval_steps_per_second": 0.339, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.77, |
|
"eval_accuracy": 0.4125874125874126, |
|
"eval_loss": 1.4357017278671265, |
|
"eval_runtime": 9.9507, |
|
"eval_samples_per_second": 28.742, |
|
"eval_steps_per_second": 0.301, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.4195804195804196, |
|
"eval_loss": 1.4284459352493286, |
|
"eval_runtime": 10.9332, |
|
"eval_samples_per_second": 26.159, |
|
"eval_steps_per_second": 0.274, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"eval_accuracy": 0.4195804195804196, |
|
"eval_loss": 1.4218381643295288, |
|
"eval_runtime": 15.3761, |
|
"eval_samples_per_second": 18.6, |
|
"eval_steps_per_second": 0.195, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"grad_norm": 26744.056640625, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 1.3138, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.85, |
|
"eval_accuracy": 0.43006993006993005, |
|
"eval_loss": 1.4166817665100098, |
|
"eval_runtime": 13.1906, |
|
"eval_samples_per_second": 21.682, |
|
"eval_steps_per_second": 0.227, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.77, |
|
"eval_accuracy": 0.43006993006993005, |
|
"eval_loss": 1.409144639968872, |
|
"eval_runtime": 21.121, |
|
"eval_samples_per_second": 13.541, |
|
"eval_steps_per_second": 0.142, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.4370629370629371, |
|
"eval_loss": 1.3994969129562378, |
|
"eval_runtime": 11.3707, |
|
"eval_samples_per_second": 25.152, |
|
"eval_steps_per_second": 0.264, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.92, |
|
"eval_accuracy": 0.4405594405594406, |
|
"eval_loss": 1.3911248445510864, |
|
"eval_runtime": 15.8386, |
|
"eval_samples_per_second": 18.057, |
|
"eval_steps_per_second": 0.189, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.85, |
|
"eval_accuracy": 0.43356643356643354, |
|
"eval_loss": 1.382477045059204, |
|
"eval_runtime": 15.9417, |
|
"eval_samples_per_second": 17.94, |
|
"eval_steps_per_second": 0.188, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.77, |
|
"eval_accuracy": 0.44405594405594406, |
|
"eval_loss": 1.373460054397583, |
|
"eval_runtime": 22.8178, |
|
"eval_samples_per_second": 12.534, |
|
"eval_steps_per_second": 0.131, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.44755244755244755, |
|
"eval_loss": 1.3632004261016846, |
|
"eval_runtime": 8.8406, |
|
"eval_samples_per_second": 32.351, |
|
"eval_steps_per_second": 0.339, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92, |
|
"eval_accuracy": 0.45104895104895104, |
|
"eval_loss": 1.355635166168213, |
|
"eval_runtime": 22.4517, |
|
"eval_samples_per_second": 12.738, |
|
"eval_steps_per_second": 0.134, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.85, |
|
"eval_accuracy": 0.45104895104895104, |
|
"eval_loss": 1.349207878112793, |
|
"eval_runtime": 19.129, |
|
"eval_samples_per_second": 14.951, |
|
"eval_steps_per_second": 0.157, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.77, |
|
"eval_accuracy": 0.45104895104895104, |
|
"eval_loss": 1.3441168069839478, |
|
"eval_runtime": 10.9281, |
|
"eval_samples_per_second": 26.171, |
|
"eval_steps_per_second": 0.275, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.458041958041958, |
|
"eval_loss": 1.3352017402648926, |
|
"eval_runtime": 9.8412, |
|
"eval_samples_per_second": 29.062, |
|
"eval_steps_per_second": 0.305, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92, |
|
"eval_accuracy": 0.46153846153846156, |
|
"eval_loss": 1.326931357383728, |
|
"eval_runtime": 9.1291, |
|
"eval_samples_per_second": 31.329, |
|
"eval_steps_per_second": 0.329, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.85, |
|
"eval_accuracy": 0.4755244755244755, |
|
"eval_loss": 1.3185527324676514, |
|
"eval_runtime": 8.2766, |
|
"eval_samples_per_second": 34.555, |
|
"eval_steps_per_second": 0.362, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.77, |
|
"eval_accuracy": 0.4755244755244755, |
|
"eval_loss": 1.3105268478393555, |
|
"eval_runtime": 11.5807, |
|
"eval_samples_per_second": 24.696, |
|
"eval_steps_per_second": 0.259, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.479020979020979, |
|
"eval_loss": 1.299223780632019, |
|
"eval_runtime": 26.8999, |
|
"eval_samples_per_second": 10.632, |
|
"eval_steps_per_second": 0.112, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_accuracy": 0.4825174825174825, |
|
"eval_loss": 1.2896299362182617, |
|
"eval_runtime": 22.9313, |
|
"eval_samples_per_second": 12.472, |
|
"eval_steps_per_second": 0.131, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.85, |
|
"eval_accuracy": 0.4825174825174825, |
|
"eval_loss": 1.2797133922576904, |
|
"eval_runtime": 12.0479, |
|
"eval_samples_per_second": 23.739, |
|
"eval_steps_per_second": 0.249, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.77, |
|
"eval_accuracy": 0.486013986013986, |
|
"eval_loss": 1.270691156387329, |
|
"eval_runtime": 21.8623, |
|
"eval_samples_per_second": 13.082, |
|
"eval_steps_per_second": 0.137, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.493006993006993, |
|
"eval_loss": 1.2586532831192017, |
|
"eval_runtime": 21.4963, |
|
"eval_samples_per_second": 13.305, |
|
"eval_steps_per_second": 0.14, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92, |
|
"eval_accuracy": 0.493006993006993, |
|
"eval_loss": 1.2493596076965332, |
|
"eval_runtime": 16.0914, |
|
"eval_samples_per_second": 17.774, |
|
"eval_steps_per_second": 0.186, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.85, |
|
"eval_accuracy": 0.493006993006993, |
|
"eval_loss": 1.2407375574111938, |
|
"eval_runtime": 16.0788, |
|
"eval_samples_per_second": 17.787, |
|
"eval_steps_per_second": 0.187, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.77, |
|
"eval_accuracy": 0.5104895104895105, |
|
"eval_loss": 1.2313759326934814, |
|
"eval_runtime": 13.8028, |
|
"eval_samples_per_second": 20.72, |
|
"eval_steps_per_second": 0.217, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.513986013986014, |
|
"eval_loss": 1.2205084562301636, |
|
"eval_runtime": 15.155, |
|
"eval_samples_per_second": 18.872, |
|
"eval_steps_per_second": 0.198, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92, |
|
"eval_accuracy": 0.5209790209790209, |
|
"eval_loss": 1.2124475240707397, |
|
"eval_runtime": 24.3109, |
|
"eval_samples_per_second": 11.764, |
|
"eval_steps_per_second": 0.123, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_accuracy": 0.5314685314685315, |
|
"eval_loss": 1.2043241262435913, |
|
"eval_runtime": 12.0961, |
|
"eval_samples_per_second": 23.644, |
|
"eval_steps_per_second": 0.248, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.77, |
|
"eval_accuracy": 0.534965034965035, |
|
"eval_loss": 1.1973276138305664, |
|
"eval_runtime": 18.6151, |
|
"eval_samples_per_second": 15.364, |
|
"eval_steps_per_second": 0.161, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.5524475524475524, |
|
"eval_loss": 1.1870229244232178, |
|
"eval_runtime": 19.514, |
|
"eval_samples_per_second": 14.656, |
|
"eval_steps_per_second": 0.154, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92, |
|
"eval_accuracy": 0.5629370629370629, |
|
"eval_loss": 1.178816318511963, |
|
"eval_runtime": 17.8959, |
|
"eval_samples_per_second": 15.981, |
|
"eval_steps_per_second": 0.168, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.85, |
|
"eval_accuracy": 0.5629370629370629, |
|
"eval_loss": 1.1699650287628174, |
|
"eval_runtime": 14.6752, |
|
"eval_samples_per_second": 19.489, |
|
"eval_steps_per_second": 0.204, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.77, |
|
"eval_accuracy": 0.5699300699300699, |
|
"eval_loss": 1.1613417863845825, |
|
"eval_runtime": 23.2377, |
|
"eval_samples_per_second": 12.308, |
|
"eval_steps_per_second": 0.129, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.583916083916084, |
|
"eval_loss": 1.1498184204101562, |
|
"eval_runtime": 16.8037, |
|
"eval_samples_per_second": 17.02, |
|
"eval_steps_per_second": 0.179, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"grad_norm": 27893.916015625, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.047, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"eval_accuracy": 0.5874125874125874, |
|
"eval_loss": 1.1410824060440063, |
|
"eval_runtime": 17.7012, |
|
"eval_samples_per_second": 16.157, |
|
"eval_steps_per_second": 0.169, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 93.85, |
|
"eval_accuracy": 0.5944055944055944, |
|
"eval_loss": 1.133009433746338, |
|
"eval_runtime": 21.3624, |
|
"eval_samples_per_second": 13.388, |
|
"eval_steps_per_second": 0.14, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 94.77, |
|
"eval_accuracy": 0.5944055944055944, |
|
"eval_loss": 1.1261355876922607, |
|
"eval_runtime": 12.5743, |
|
"eval_samples_per_second": 22.745, |
|
"eval_steps_per_second": 0.239, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.6013986013986014, |
|
"eval_loss": 1.1161199808120728, |
|
"eval_runtime": 10.714, |
|
"eval_samples_per_second": 26.694, |
|
"eval_steps_per_second": 0.28, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 96.92, |
|
"eval_accuracy": 0.6013986013986014, |
|
"eval_loss": 1.1083568334579468, |
|
"eval_runtime": 23.7041, |
|
"eval_samples_per_second": 12.065, |
|
"eval_steps_per_second": 0.127, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 97.85, |
|
"eval_accuracy": 0.6048951048951049, |
|
"eval_loss": 1.1002540588378906, |
|
"eval_runtime": 18.2723, |
|
"eval_samples_per_second": 15.652, |
|
"eval_steps_per_second": 0.164, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 98.77, |
|
"eval_accuracy": 0.6048951048951049, |
|
"eval_loss": 1.0926333665847778, |
|
"eval_runtime": 15.4521, |
|
"eval_samples_per_second": 18.509, |
|
"eval_steps_per_second": 0.194, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.6083916083916084, |
|
"eval_loss": 1.0821202993392944, |
|
"eval_runtime": 14.9648, |
|
"eval_samples_per_second": 19.112, |
|
"eval_steps_per_second": 0.2, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 100.92, |
|
"eval_accuracy": 0.6083916083916084, |
|
"eval_loss": 1.075362205505371, |
|
"eval_runtime": 18.8741, |
|
"eval_samples_per_second": 15.153, |
|
"eval_steps_per_second": 0.159, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 101.85, |
|
"eval_accuracy": 0.6083916083916084, |
|
"eval_loss": 1.06904935836792, |
|
"eval_runtime": 12.1488, |
|
"eval_samples_per_second": 23.541, |
|
"eval_steps_per_second": 0.247, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 102.77, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 1.0637034177780151, |
|
"eval_runtime": 21.2582, |
|
"eval_samples_per_second": 13.454, |
|
"eval_steps_per_second": 0.141, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.6188811188811189, |
|
"eval_loss": 1.0549243688583374, |
|
"eval_runtime": 19.5513, |
|
"eval_samples_per_second": 14.628, |
|
"eval_steps_per_second": 0.153, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 104.92, |
|
"eval_accuracy": 0.6223776223776224, |
|
"eval_loss": 1.047833800315857, |
|
"eval_runtime": 17.2191, |
|
"eval_samples_per_second": 16.609, |
|
"eval_steps_per_second": 0.174, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 105.85, |
|
"eval_accuracy": 0.6258741258741258, |
|
"eval_loss": 1.0420035123825073, |
|
"eval_runtime": 25.1625, |
|
"eval_samples_per_second": 11.366, |
|
"eval_steps_per_second": 0.119, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 106.77, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 1.0369622707366943, |
|
"eval_runtime": 14.0552, |
|
"eval_samples_per_second": 20.348, |
|
"eval_steps_per_second": 0.213, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 1.0308306217193604, |
|
"eval_runtime": 15.7213, |
|
"eval_samples_per_second": 18.192, |
|
"eval_steps_per_second": 0.191, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 108.92, |
|
"eval_accuracy": 0.6258741258741258, |
|
"eval_loss": 1.0263006687164307, |
|
"eval_runtime": 19.1306, |
|
"eval_samples_per_second": 14.95, |
|
"eval_steps_per_second": 0.157, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 109.85, |
|
"eval_accuracy": 0.6258741258741258, |
|
"eval_loss": 1.0230927467346191, |
|
"eval_runtime": 15.401, |
|
"eval_samples_per_second": 18.57, |
|
"eval_steps_per_second": 0.195, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 110.77, |
|
"eval_accuracy": 0.6328671328671329, |
|
"eval_loss": 1.0204286575317383, |
|
"eval_runtime": 11.4397, |
|
"eval_samples_per_second": 25.001, |
|
"eval_steps_per_second": 0.262, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 1.0166879892349243, |
|
"eval_runtime": 14.7547, |
|
"eval_samples_per_second": 19.384, |
|
"eval_steps_per_second": 0.203, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 112.92, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 1.0145463943481445, |
|
"eval_runtime": 12.2752, |
|
"eval_samples_per_second": 23.299, |
|
"eval_steps_per_second": 0.244, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 113.85, |
|
"eval_accuracy": 0.6328671328671329, |
|
"eval_loss": 1.0118980407714844, |
|
"eval_runtime": 29.1643, |
|
"eval_samples_per_second": 9.807, |
|
"eval_steps_per_second": 0.103, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 114.77, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 1.0077061653137207, |
|
"eval_runtime": 15.736, |
|
"eval_samples_per_second": 18.175, |
|
"eval_steps_per_second": 0.191, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 1.001206398010254, |
|
"eval_runtime": 13.5478, |
|
"eval_samples_per_second": 21.11, |
|
"eval_steps_per_second": 0.221, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 116.92, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 0.9975122809410095, |
|
"eval_runtime": 12.3425, |
|
"eval_samples_per_second": 23.172, |
|
"eval_steps_per_second": 0.243, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 117.85, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 0.993752121925354, |
|
"eval_runtime": 16.3699, |
|
"eval_samples_per_second": 17.471, |
|
"eval_steps_per_second": 0.183, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 118.77, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9913222193717957, |
|
"eval_runtime": 15.9587, |
|
"eval_samples_per_second": 17.921, |
|
"eval_steps_per_second": 0.188, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9886361360549927, |
|
"eval_runtime": 14.2728, |
|
"eval_samples_per_second": 20.038, |
|
"eval_steps_per_second": 0.21, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 120.92, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.987022340297699, |
|
"eval_runtime": 28.4357, |
|
"eval_samples_per_second": 10.058, |
|
"eval_steps_per_second": 0.106, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 121.85, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9861236810684204, |
|
"eval_runtime": 12.6739, |
|
"eval_samples_per_second": 22.566, |
|
"eval_steps_per_second": 0.237, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 122.77, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9857375025749207, |
|
"eval_runtime": 14.4905, |
|
"eval_samples_per_second": 19.737, |
|
"eval_steps_per_second": 0.207, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 123.08, |
|
"grad_norm": 28786.830078125, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.8183, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9855088591575623, |
|
"eval_runtime": 23.5291, |
|
"eval_samples_per_second": 12.155, |
|
"eval_steps_per_second": 0.128, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 124.92, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.986443817615509, |
|
"eval_runtime": 11.267, |
|
"eval_samples_per_second": 25.384, |
|
"eval_steps_per_second": 0.266, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 125.85, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9856669902801514, |
|
"eval_runtime": 15.102, |
|
"eval_samples_per_second": 18.938, |
|
"eval_steps_per_second": 0.199, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 126.77, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9817618131637573, |
|
"eval_runtime": 13.7486, |
|
"eval_samples_per_second": 20.802, |
|
"eval_steps_per_second": 0.218, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9765364527702332, |
|
"eval_runtime": 15.0977, |
|
"eval_samples_per_second": 18.943, |
|
"eval_steps_per_second": 0.199, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 128.92, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9739550352096558, |
|
"eval_runtime": 9.9261, |
|
"eval_samples_per_second": 28.813, |
|
"eval_steps_per_second": 0.302, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 129.85, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9736567139625549, |
|
"eval_runtime": 23.6705, |
|
"eval_samples_per_second": 12.083, |
|
"eval_steps_per_second": 0.127, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 130.77, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9753543734550476, |
|
"eval_runtime": 13.1489, |
|
"eval_samples_per_second": 21.751, |
|
"eval_steps_per_second": 0.228, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.975328803062439, |
|
"eval_runtime": 14.1291, |
|
"eval_samples_per_second": 20.242, |
|
"eval_steps_per_second": 0.212, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 132.92, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9739886522293091, |
|
"eval_runtime": 23.3295, |
|
"eval_samples_per_second": 12.259, |
|
"eval_steps_per_second": 0.129, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 133.85, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9709957242012024, |
|
"eval_runtime": 12.6523, |
|
"eval_samples_per_second": 22.605, |
|
"eval_steps_per_second": 0.237, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 134.77, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9685706496238708, |
|
"eval_runtime": 12.6672, |
|
"eval_samples_per_second": 22.578, |
|
"eval_steps_per_second": 0.237, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9670514464378357, |
|
"eval_runtime": 14.1037, |
|
"eval_samples_per_second": 20.278, |
|
"eval_steps_per_second": 0.213, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 136.92, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.966910719871521, |
|
"eval_runtime": 17.8781, |
|
"eval_samples_per_second": 15.997, |
|
"eval_steps_per_second": 0.168, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 137.85, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9659475088119507, |
|
"eval_runtime": 14.4986, |
|
"eval_samples_per_second": 19.726, |
|
"eval_steps_per_second": 0.207, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 138.77, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9662479758262634, |
|
"eval_runtime": 9.0453, |
|
"eval_samples_per_second": 31.619, |
|
"eval_steps_per_second": 0.332, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.967424750328064, |
|
"eval_runtime": 15.45, |
|
"eval_samples_per_second": 18.511, |
|
"eval_steps_per_second": 0.194, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 140.92, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9694240689277649, |
|
"eval_runtime": 25.3846, |
|
"eval_samples_per_second": 11.267, |
|
"eval_steps_per_second": 0.118, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 141.85, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9716474413871765, |
|
"eval_runtime": 13.4283, |
|
"eval_samples_per_second": 21.298, |
|
"eval_steps_per_second": 0.223, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 142.77, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9738869667053223, |
|
"eval_runtime": 14.4683, |
|
"eval_samples_per_second": 19.767, |
|
"eval_steps_per_second": 0.207, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9711639881134033, |
|
"eval_runtime": 16.7189, |
|
"eval_samples_per_second": 17.106, |
|
"eval_steps_per_second": 0.179, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 144.92, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9670231938362122, |
|
"eval_runtime": 17.2531, |
|
"eval_samples_per_second": 16.577, |
|
"eval_steps_per_second": 0.174, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 145.85, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9637404084205627, |
|
"eval_runtime": 7.2246, |
|
"eval_samples_per_second": 39.587, |
|
"eval_steps_per_second": 0.415, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 146.77, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9625027775764465, |
|
"eval_runtime": 22.2959, |
|
"eval_samples_per_second": 12.827, |
|
"eval_steps_per_second": 0.135, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9634200930595398, |
|
"eval_runtime": 13.0089, |
|
"eval_samples_per_second": 21.985, |
|
"eval_steps_per_second": 0.231, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 148.92, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9658861756324768, |
|
"eval_runtime": 13.9249, |
|
"eval_samples_per_second": 20.539, |
|
"eval_steps_per_second": 0.215, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 149.85, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9662710428237915, |
|
"eval_runtime": 26.5045, |
|
"eval_samples_per_second": 10.791, |
|
"eval_steps_per_second": 0.113, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 150.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9648845195770264, |
|
"eval_runtime": 13.8454, |
|
"eval_samples_per_second": 20.657, |
|
"eval_steps_per_second": 0.217, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9654809832572937, |
|
"eval_runtime": 11.8579, |
|
"eval_samples_per_second": 24.119, |
|
"eval_steps_per_second": 0.253, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 152.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9648337364196777, |
|
"eval_runtime": 13.0859, |
|
"eval_samples_per_second": 21.856, |
|
"eval_steps_per_second": 0.229, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"grad_norm": 27909.80078125, |
|
"learning_rate": 1.111111111111111e-05, |
|
"loss": 0.7321, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.963828980922699, |
|
"eval_runtime": 8.9676, |
|
"eval_samples_per_second": 31.892, |
|
"eval_steps_per_second": 0.335, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 154.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9631070494651794, |
|
"eval_runtime": 32.863, |
|
"eval_samples_per_second": 8.703, |
|
"eval_steps_per_second": 0.091, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.964668333530426, |
|
"eval_runtime": 11.8735, |
|
"eval_samples_per_second": 24.087, |
|
"eval_steps_per_second": 0.253, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 156.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9652780294418335, |
|
"eval_runtime": 12.5571, |
|
"eval_samples_per_second": 22.776, |
|
"eval_steps_per_second": 0.239, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 157.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9662439227104187, |
|
"eval_runtime": 22.8916, |
|
"eval_samples_per_second": 12.494, |
|
"eval_steps_per_second": 0.131, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 158.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9679471254348755, |
|
"eval_runtime": 13.713, |
|
"eval_samples_per_second": 20.856, |
|
"eval_steps_per_second": 0.219, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9674729704856873, |
|
"eval_runtime": 13.3059, |
|
"eval_samples_per_second": 21.494, |
|
"eval_steps_per_second": 0.225, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 160.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9663581252098083, |
|
"eval_runtime": 14.0564, |
|
"eval_samples_per_second": 20.347, |
|
"eval_steps_per_second": 0.213, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 161.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9654582738876343, |
|
"eval_runtime": 17.3488, |
|
"eval_samples_per_second": 16.485, |
|
"eval_steps_per_second": 0.173, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 162.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9642163515090942, |
|
"eval_runtime": 28.2809, |
|
"eval_samples_per_second": 10.113, |
|
"eval_steps_per_second": 0.106, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9635214805603027, |
|
"eval_runtime": 13.2297, |
|
"eval_samples_per_second": 21.618, |
|
"eval_steps_per_second": 0.227, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 164.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9632530808448792, |
|
"eval_runtime": 14.959, |
|
"eval_samples_per_second": 19.119, |
|
"eval_steps_per_second": 0.201, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 165.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9644965529441833, |
|
"eval_runtime": 27.3169, |
|
"eval_samples_per_second": 10.47, |
|
"eval_steps_per_second": 0.11, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 166.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9649432897567749, |
|
"eval_runtime": 16.2467, |
|
"eval_samples_per_second": 17.604, |
|
"eval_steps_per_second": 0.185, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9651181101799011, |
|
"eval_runtime": 14.8641, |
|
"eval_samples_per_second": 19.241, |
|
"eval_steps_per_second": 0.202, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 168.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9657481908798218, |
|
"eval_runtime": 14.6637, |
|
"eval_samples_per_second": 19.504, |
|
"eval_steps_per_second": 0.205, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 169.85, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_loss": 0.9662555456161499, |
|
"eval_runtime": 16.1862, |
|
"eval_samples_per_second": 17.669, |
|
"eval_steps_per_second": 0.185, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 170.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9652681946754456, |
|
"eval_runtime": 13.8601, |
|
"eval_samples_per_second": 20.635, |
|
"eval_steps_per_second": 0.216, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9637671113014221, |
|
"eval_runtime": 14.0763, |
|
"eval_samples_per_second": 20.318, |
|
"eval_steps_per_second": 0.213, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 172.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9616447687149048, |
|
"eval_runtime": 14.3281, |
|
"eval_samples_per_second": 19.961, |
|
"eval_steps_per_second": 0.209, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 173.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9601487517356873, |
|
"eval_runtime": 14.8622, |
|
"eval_samples_per_second": 19.243, |
|
"eval_steps_per_second": 0.202, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 174.77, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_loss": 0.9610276818275452, |
|
"eval_runtime": 11.3604, |
|
"eval_samples_per_second": 25.175, |
|
"eval_steps_per_second": 0.264, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.962989866733551, |
|
"eval_runtime": 33.9341, |
|
"eval_samples_per_second": 8.428, |
|
"eval_steps_per_second": 0.088, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 176.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9633333683013916, |
|
"eval_runtime": 11.5929, |
|
"eval_samples_per_second": 24.67, |
|
"eval_steps_per_second": 0.259, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 177.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9645780324935913, |
|
"eval_runtime": 13.4891, |
|
"eval_samples_per_second": 21.202, |
|
"eval_steps_per_second": 0.222, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 178.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9654965996742249, |
|
"eval_runtime": 28.6917, |
|
"eval_samples_per_second": 9.968, |
|
"eval_steps_per_second": 0.105, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9672998785972595, |
|
"eval_runtime": 14.4786, |
|
"eval_samples_per_second": 19.753, |
|
"eval_steps_per_second": 0.207, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 180.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9679505825042725, |
|
"eval_runtime": 13.6779, |
|
"eval_samples_per_second": 20.91, |
|
"eval_steps_per_second": 0.219, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 181.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9686614274978638, |
|
"eval_runtime": 21.421, |
|
"eval_samples_per_second": 13.351, |
|
"eval_steps_per_second": 0.14, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 182.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9691935777664185, |
|
"eval_runtime": 12.6562, |
|
"eval_samples_per_second": 22.598, |
|
"eval_steps_per_second": 0.237, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9683831334114075, |
|
"eval_runtime": 18.1378, |
|
"eval_samples_per_second": 15.768, |
|
"eval_steps_per_second": 0.165, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 184.62, |
|
"grad_norm": 28079.01171875, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.6941, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 184.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9676913022994995, |
|
"eval_runtime": 22.197, |
|
"eval_samples_per_second": 12.885, |
|
"eval_steps_per_second": 0.135, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 185.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9673991203308105, |
|
"eval_runtime": 13.8487, |
|
"eval_samples_per_second": 20.652, |
|
"eval_steps_per_second": 0.217, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 186.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9671489000320435, |
|
"eval_runtime": 16.6344, |
|
"eval_samples_per_second": 17.193, |
|
"eval_steps_per_second": 0.18, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.96697998046875, |
|
"eval_runtime": 14.8813, |
|
"eval_samples_per_second": 19.219, |
|
"eval_steps_per_second": 0.202, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 188.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9661853313446045, |
|
"eval_runtime": 15.8068, |
|
"eval_samples_per_second": 18.094, |
|
"eval_steps_per_second": 0.19, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 189.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9652778506278992, |
|
"eval_runtime": 27.4384, |
|
"eval_samples_per_second": 10.423, |
|
"eval_steps_per_second": 0.109, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 190.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9644954800605774, |
|
"eval_runtime": 12.0079, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 0.25, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9648195505142212, |
|
"eval_runtime": 15.7494, |
|
"eval_samples_per_second": 18.159, |
|
"eval_steps_per_second": 0.19, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 192.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9652000665664673, |
|
"eval_runtime": 14.0294, |
|
"eval_samples_per_second": 20.386, |
|
"eval_steps_per_second": 0.214, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 193.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9662671089172363, |
|
"eval_runtime": 16.6283, |
|
"eval_samples_per_second": 17.2, |
|
"eval_steps_per_second": 0.18, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 194.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9662363529205322, |
|
"eval_runtime": 12.434, |
|
"eval_samples_per_second": 23.001, |
|
"eval_steps_per_second": 0.241, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.966452419757843, |
|
"eval_runtime": 27.5274, |
|
"eval_samples_per_second": 10.39, |
|
"eval_steps_per_second": 0.109, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 196.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9667707085609436, |
|
"eval_runtime": 14.0792, |
|
"eval_samples_per_second": 20.314, |
|
"eval_steps_per_second": 0.213, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 197.85, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9669297337532043, |
|
"eval_runtime": 11.4357, |
|
"eval_samples_per_second": 25.009, |
|
"eval_steps_per_second": 0.262, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 198.77, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.967424213886261, |
|
"eval_runtime": 19.968, |
|
"eval_samples_per_second": 14.323, |
|
"eval_steps_per_second": 0.15, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9668986797332764, |
|
"eval_runtime": 11.3124, |
|
"eval_samples_per_second": 25.282, |
|
"eval_steps_per_second": 0.265, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 200.92, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9671657681465149, |
|
"eval_runtime": 15.4547, |
|
"eval_samples_per_second": 18.506, |
|
"eval_steps_per_second": 0.194, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 201.85, |
|
"eval_accuracy": 0.6468531468531469, |
|
"eval_loss": 0.9671434760093689, |
|
"eval_runtime": 11.8747, |
|
"eval_samples_per_second": 24.085, |
|
"eval_steps_per_second": 0.253, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 202.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.967323362827301, |
|
"eval_runtime": 13.4706, |
|
"eval_samples_per_second": 21.231, |
|
"eval_steps_per_second": 0.223, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9666022658348083, |
|
"eval_runtime": 11.809, |
|
"eval_samples_per_second": 24.219, |
|
"eval_steps_per_second": 0.254, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 204.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.966048002243042, |
|
"eval_runtime": 30.3868, |
|
"eval_samples_per_second": 9.412, |
|
"eval_steps_per_second": 0.099, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 205.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9656386971473694, |
|
"eval_runtime": 10.0795, |
|
"eval_samples_per_second": 28.375, |
|
"eval_steps_per_second": 0.298, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 206.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9650998711585999, |
|
"eval_runtime": 11.1232, |
|
"eval_samples_per_second": 25.712, |
|
"eval_steps_per_second": 0.27, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9660706520080566, |
|
"eval_runtime": 14.3325, |
|
"eval_samples_per_second": 19.955, |
|
"eval_steps_per_second": 0.209, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 208.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9666641354560852, |
|
"eval_runtime": 18.7486, |
|
"eval_samples_per_second": 15.254, |
|
"eval_steps_per_second": 0.16, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 209.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9667859077453613, |
|
"eval_runtime": 11.9596, |
|
"eval_samples_per_second": 23.914, |
|
"eval_steps_per_second": 0.251, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 210.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9668753147125244, |
|
"eval_runtime": 21.049, |
|
"eval_samples_per_second": 13.587, |
|
"eval_steps_per_second": 0.143, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665384292602539, |
|
"eval_runtime": 12.916, |
|
"eval_samples_per_second": 22.143, |
|
"eval_steps_per_second": 0.232, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 212.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665247797966003, |
|
"eval_runtime": 13.5797, |
|
"eval_samples_per_second": 21.061, |
|
"eval_steps_per_second": 0.221, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 213.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9663905501365662, |
|
"eval_runtime": 11.9452, |
|
"eval_samples_per_second": 23.943, |
|
"eval_steps_per_second": 0.251, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 214.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9662503600120544, |
|
"eval_runtime": 11.3523, |
|
"eval_samples_per_second": 25.193, |
|
"eval_steps_per_second": 0.264, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 215.38, |
|
"grad_norm": 25925.685546875, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.6696, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9666140079498291, |
|
"eval_runtime": 11.4254, |
|
"eval_samples_per_second": 25.032, |
|
"eval_steps_per_second": 0.263, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 216.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9667044878005981, |
|
"eval_runtime": 9.4078, |
|
"eval_samples_per_second": 30.4, |
|
"eval_steps_per_second": 0.319, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 217.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665290713310242, |
|
"eval_runtime": 8.9463, |
|
"eval_samples_per_second": 31.969, |
|
"eval_steps_per_second": 0.335, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 218.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9663181900978088, |
|
"eval_runtime": 12.4146, |
|
"eval_samples_per_second": 23.037, |
|
"eval_steps_per_second": 0.242, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9661046862602234, |
|
"eval_runtime": 12.2235, |
|
"eval_samples_per_second": 23.398, |
|
"eval_steps_per_second": 0.245, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 220.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.966101348400116, |
|
"eval_runtime": 12.7423, |
|
"eval_samples_per_second": 22.445, |
|
"eval_steps_per_second": 0.235, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 221.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9661813974380493, |
|
"eval_runtime": 11.077, |
|
"eval_samples_per_second": 25.819, |
|
"eval_steps_per_second": 0.271, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 222.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9663574695587158, |
|
"eval_runtime": 13.5789, |
|
"eval_samples_per_second": 21.062, |
|
"eval_steps_per_second": 0.221, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9663906693458557, |
|
"eval_runtime": 11.7803, |
|
"eval_samples_per_second": 24.278, |
|
"eval_steps_per_second": 0.255, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 224.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9664490222930908, |
|
"eval_runtime": 13.0369, |
|
"eval_samples_per_second": 21.938, |
|
"eval_steps_per_second": 0.23, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 225.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665610790252686, |
|
"eval_runtime": 10.8415, |
|
"eval_samples_per_second": 26.38, |
|
"eval_steps_per_second": 0.277, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 226.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665590524673462, |
|
"eval_runtime": 14.6312, |
|
"eval_samples_per_second": 19.547, |
|
"eval_steps_per_second": 0.205, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665370583534241, |
|
"eval_runtime": 11.8502, |
|
"eval_samples_per_second": 24.135, |
|
"eval_steps_per_second": 0.253, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 228.92, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665577411651611, |
|
"eval_runtime": 12.7745, |
|
"eval_samples_per_second": 22.388, |
|
"eval_steps_per_second": 0.235, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 229.85, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665866494178772, |
|
"eval_runtime": 13.6538, |
|
"eval_samples_per_second": 20.947, |
|
"eval_steps_per_second": 0.22, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_accuracy": 0.6503496503496503, |
|
"eval_loss": 0.9665878415107727, |
|
"eval_runtime": 11.2039, |
|
"eval_samples_per_second": 25.527, |
|
"eval_steps_per_second": 0.268, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"step": 750, |
|
"total_flos": 3.12964745557632e+18, |
|
"train_loss": 0.9558865051269532, |
|
"train_runtime": 10052.9365, |
|
"train_samples_per_second": 38.496, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 250, |
|
"save_steps": 500, |
|
"total_flos": 3.12964745557632e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|