|
{ |
|
"best_metric": 0.7027972027972028, |
|
"best_model_checkpoint": "wav2vec2-5Class-train-test-finetune/checkpoint-721", |
|
"epoch": 323.0769230769231, |
|
"eval_steps": 500, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.59893798828125, |
|
"eval_runtime": 4.2802, |
|
"eval_samples_per_second": 66.819, |
|
"eval_steps_per_second": 0.701, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.5987956523895264, |
|
"eval_runtime": 4.8166, |
|
"eval_samples_per_second": 59.378, |
|
"eval_steps_per_second": 0.623, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.598555326461792, |
|
"eval_runtime": 3.989, |
|
"eval_samples_per_second": 71.697, |
|
"eval_steps_per_second": 0.752, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.598075270652771, |
|
"eval_runtime": 4.3871, |
|
"eval_samples_per_second": 65.191, |
|
"eval_steps_per_second": 0.684, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_accuracy": 0.3356643356643357, |
|
"eval_loss": 1.5975924730300903, |
|
"eval_runtime": 4.7955, |
|
"eval_samples_per_second": 59.639, |
|
"eval_steps_per_second": 0.626, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.5970256328582764, |
|
"eval_runtime": 4.4665, |
|
"eval_samples_per_second": 64.032, |
|
"eval_steps_per_second": 0.672, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_accuracy": 0.33916083916083917, |
|
"eval_loss": 1.5963499546051025, |
|
"eval_runtime": 4.3016, |
|
"eval_samples_per_second": 66.488, |
|
"eval_steps_per_second": 0.697, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.3356643356643357, |
|
"eval_loss": 1.5952636003494263, |
|
"eval_runtime": 3.9531, |
|
"eval_samples_per_second": 72.347, |
|
"eval_steps_per_second": 0.759, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"eval_accuracy": 0.32867132867132864, |
|
"eval_loss": 1.594333291053772, |
|
"eval_runtime": 5.6915, |
|
"eval_samples_per_second": 50.25, |
|
"eval_steps_per_second": 0.527, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_accuracy": 0.32867132867132864, |
|
"eval_loss": 1.5933252573013306, |
|
"eval_runtime": 4.4236, |
|
"eval_samples_per_second": 64.653, |
|
"eval_steps_per_second": 0.678, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"eval_accuracy": 0.32167832167832167, |
|
"eval_loss": 1.592211365699768, |
|
"eval_runtime": 4.9541, |
|
"eval_samples_per_second": 57.73, |
|
"eval_steps_per_second": 0.606, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3181818181818182, |
|
"eval_loss": 1.5905568599700928, |
|
"eval_runtime": 5.1955, |
|
"eval_samples_per_second": 55.047, |
|
"eval_steps_per_second": 0.577, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_accuracy": 0.3146853146853147, |
|
"eval_loss": 1.58920156955719, |
|
"eval_runtime": 3.6236, |
|
"eval_samples_per_second": 78.926, |
|
"eval_steps_per_second": 0.828, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"eval_accuracy": 0.3006993006993007, |
|
"eval_loss": 1.5877453088760376, |
|
"eval_runtime": 4.348, |
|
"eval_samples_per_second": 65.778, |
|
"eval_steps_per_second": 0.69, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"eval_accuracy": 0.2937062937062937, |
|
"eval_loss": 1.5862104892730713, |
|
"eval_runtime": 4.6902, |
|
"eval_samples_per_second": 60.978, |
|
"eval_steps_per_second": 0.64, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"grad_norm": 65952.0234375, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.5907, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.2972027972027972, |
|
"eval_loss": 1.5840750932693481, |
|
"eval_runtime": 4.547, |
|
"eval_samples_per_second": 62.899, |
|
"eval_steps_per_second": 0.66, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"eval_accuracy": 0.28321678321678323, |
|
"eval_loss": 1.5823713541030884, |
|
"eval_runtime": 5.3625, |
|
"eval_samples_per_second": 53.334, |
|
"eval_steps_per_second": 0.559, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_accuracy": 0.27972027972027974, |
|
"eval_loss": 1.5806101560592651, |
|
"eval_runtime": 4.7671, |
|
"eval_samples_per_second": 59.995, |
|
"eval_steps_per_second": 0.629, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.5787912607192993, |
|
"eval_runtime": 4.3086, |
|
"eval_samples_per_second": 66.378, |
|
"eval_steps_per_second": 0.696, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.576175332069397, |
|
"eval_runtime": 5.3175, |
|
"eval_samples_per_second": 53.784, |
|
"eval_steps_per_second": 0.564, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_accuracy": 0.26573426573426573, |
|
"eval_loss": 1.5740149021148682, |
|
"eval_runtime": 4.5172, |
|
"eval_samples_per_second": 63.314, |
|
"eval_steps_per_second": 0.664, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"eval_accuracy": 0.25524475524475526, |
|
"eval_loss": 1.5717105865478516, |
|
"eval_runtime": 3.9011, |
|
"eval_samples_per_second": 73.312, |
|
"eval_steps_per_second": 0.769, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"eval_accuracy": 0.2517482517482518, |
|
"eval_loss": 1.5693939924240112, |
|
"eval_runtime": 3.9307, |
|
"eval_samples_per_second": 72.76, |
|
"eval_steps_per_second": 0.763, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.23776223776223776, |
|
"eval_loss": 1.566083312034607, |
|
"eval_runtime": 3.7134, |
|
"eval_samples_per_second": 77.019, |
|
"eval_steps_per_second": 0.808, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.92, |
|
"eval_accuracy": 0.23426573426573427, |
|
"eval_loss": 1.5634570121765137, |
|
"eval_runtime": 4.5234, |
|
"eval_samples_per_second": 63.226, |
|
"eval_steps_per_second": 0.663, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"eval_accuracy": 0.22377622377622378, |
|
"eval_loss": 1.5608404874801636, |
|
"eval_runtime": 4.4129, |
|
"eval_samples_per_second": 64.81, |
|
"eval_steps_per_second": 0.68, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.77, |
|
"eval_accuracy": 0.22377622377622378, |
|
"eval_loss": 1.5581375360488892, |
|
"eval_runtime": 4.7168, |
|
"eval_samples_per_second": 60.635, |
|
"eval_steps_per_second": 0.636, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5542311668395996, |
|
"eval_runtime": 5.4736, |
|
"eval_samples_per_second": 52.251, |
|
"eval_steps_per_second": 0.548, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5511480569839478, |
|
"eval_runtime": 5.6532, |
|
"eval_samples_per_second": 50.591, |
|
"eval_steps_per_second": 0.531, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5479341745376587, |
|
"eval_runtime": 5.2852, |
|
"eval_samples_per_second": 54.113, |
|
"eval_steps_per_second": 0.568, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"grad_norm": 68930.8125, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.5431, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5448040962219238, |
|
"eval_runtime": 4.6157, |
|
"eval_samples_per_second": 61.962, |
|
"eval_steps_per_second": 0.65, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5407565832138062, |
|
"eval_runtime": 6.2131, |
|
"eval_samples_per_second": 46.032, |
|
"eval_steps_per_second": 0.483, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5379865169525146, |
|
"eval_runtime": 4.645, |
|
"eval_samples_per_second": 61.571, |
|
"eval_steps_per_second": 0.646, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5359200239181519, |
|
"eval_runtime": 5.5884, |
|
"eval_samples_per_second": 51.178, |
|
"eval_steps_per_second": 0.537, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5345218181610107, |
|
"eval_runtime": 4.5718, |
|
"eval_samples_per_second": 62.557, |
|
"eval_steps_per_second": 0.656, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5334985256195068, |
|
"eval_runtime": 5.3526, |
|
"eval_samples_per_second": 53.432, |
|
"eval_steps_per_second": 0.56, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5340909957885742, |
|
"eval_runtime": 4.471, |
|
"eval_samples_per_second": 63.967, |
|
"eval_steps_per_second": 0.671, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5361381769180298, |
|
"eval_runtime": 3.5623, |
|
"eval_samples_per_second": 80.286, |
|
"eval_steps_per_second": 0.842, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5397439002990723, |
|
"eval_runtime": 4.9023, |
|
"eval_samples_per_second": 58.34, |
|
"eval_steps_per_second": 0.612, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5478534698486328, |
|
"eval_runtime": 3.7352, |
|
"eval_samples_per_second": 76.569, |
|
"eval_steps_per_second": 0.803, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5564229488372803, |
|
"eval_runtime": 4.3225, |
|
"eval_samples_per_second": 66.166, |
|
"eval_steps_per_second": 0.694, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.85, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5678777694702148, |
|
"eval_runtime": 4.6076, |
|
"eval_samples_per_second": 62.072, |
|
"eval_steps_per_second": 0.651, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.77, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5821971893310547, |
|
"eval_runtime": 4.2697, |
|
"eval_samples_per_second": 66.983, |
|
"eval_steps_per_second": 0.703, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.6002099514007568, |
|
"eval_runtime": 4.533, |
|
"eval_samples_per_second": 63.094, |
|
"eval_steps_per_second": 0.662, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.6109449863433838, |
|
"eval_runtime": 3.9799, |
|
"eval_samples_per_second": 71.861, |
|
"eval_steps_per_second": 0.754, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.85, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.6145771741867065, |
|
"eval_runtime": 4.3613, |
|
"eval_samples_per_second": 65.576, |
|
"eval_steps_per_second": 0.688, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"grad_norm": 45833.69921875, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.4033, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.6130825281143188, |
|
"eval_runtime": 4.2963, |
|
"eval_samples_per_second": 66.568, |
|
"eval_steps_per_second": 0.698, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.6008453369140625, |
|
"eval_runtime": 4.063, |
|
"eval_samples_per_second": 70.391, |
|
"eval_steps_per_second": 0.738, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92, |
|
"eval_accuracy": 0.24125874125874125, |
|
"eval_loss": 1.586226224899292, |
|
"eval_runtime": 4.5029, |
|
"eval_samples_per_second": 63.515, |
|
"eval_steps_per_second": 0.666, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.572645902633667, |
|
"eval_runtime": 5.0597, |
|
"eval_samples_per_second": 56.525, |
|
"eval_steps_per_second": 0.593, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.77, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.559901237487793, |
|
"eval_runtime": 4.4174, |
|
"eval_samples_per_second": 64.744, |
|
"eval_steps_per_second": 0.679, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.2867132867132867, |
|
"eval_loss": 1.5458828210830688, |
|
"eval_runtime": 4.357, |
|
"eval_samples_per_second": 65.642, |
|
"eval_steps_per_second": 0.689, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"eval_accuracy": 0.2937062937062937, |
|
"eval_loss": 1.5382803678512573, |
|
"eval_runtime": 5.6394, |
|
"eval_samples_per_second": 50.714, |
|
"eval_steps_per_second": 0.532, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.85, |
|
"eval_accuracy": 0.3146853146853147, |
|
"eval_loss": 1.5310516357421875, |
|
"eval_runtime": 4.4695, |
|
"eval_samples_per_second": 63.989, |
|
"eval_steps_per_second": 0.671, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.77, |
|
"eval_accuracy": 0.32517482517482516, |
|
"eval_loss": 1.5242317914962769, |
|
"eval_runtime": 3.8554, |
|
"eval_samples_per_second": 74.181, |
|
"eval_steps_per_second": 0.778, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.3356643356643357, |
|
"eval_loss": 1.5169461965560913, |
|
"eval_runtime": 3.9817, |
|
"eval_samples_per_second": 71.828, |
|
"eval_steps_per_second": 0.753, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.5103094577789307, |
|
"eval_runtime": 3.9287, |
|
"eval_samples_per_second": 72.797, |
|
"eval_steps_per_second": 0.764, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.85, |
|
"eval_accuracy": 0.34615384615384615, |
|
"eval_loss": 1.5055506229400635, |
|
"eval_runtime": 4.3922, |
|
"eval_samples_per_second": 65.115, |
|
"eval_steps_per_second": 0.683, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.77, |
|
"eval_accuracy": 0.34615384615384615, |
|
"eval_loss": 1.4995349645614624, |
|
"eval_runtime": 4.2261, |
|
"eval_samples_per_second": 67.675, |
|
"eval_steps_per_second": 0.71, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.34965034965034963, |
|
"eval_loss": 1.4939184188842773, |
|
"eval_runtime": 3.9946, |
|
"eval_samples_per_second": 71.597, |
|
"eval_steps_per_second": 0.751, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"eval_accuracy": 0.36013986013986016, |
|
"eval_loss": 1.4870301485061646, |
|
"eval_runtime": 4.7123, |
|
"eval_samples_per_second": 60.693, |
|
"eval_steps_per_second": 0.637, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"grad_norm": 27324.4609375, |
|
"learning_rate": 2.6984126984126984e-05, |
|
"loss": 1.2485, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.85, |
|
"eval_accuracy": 0.36713286713286714, |
|
"eval_loss": 1.4828742742538452, |
|
"eval_runtime": 4.8484, |
|
"eval_samples_per_second": 58.989, |
|
"eval_steps_per_second": 0.619, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.77, |
|
"eval_accuracy": 0.3741258741258741, |
|
"eval_loss": 1.4735387563705444, |
|
"eval_runtime": 4.203, |
|
"eval_samples_per_second": 68.047, |
|
"eval_steps_per_second": 0.714, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3811188811188811, |
|
"eval_loss": 1.4612373113632202, |
|
"eval_runtime": 4.6341, |
|
"eval_samples_per_second": 61.716, |
|
"eval_steps_per_second": 0.647, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.92, |
|
"eval_accuracy": 0.3986013986013986, |
|
"eval_loss": 1.4491915702819824, |
|
"eval_runtime": 3.9863, |
|
"eval_samples_per_second": 71.745, |
|
"eval_steps_per_second": 0.753, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.85, |
|
"eval_accuracy": 0.4125874125874126, |
|
"eval_loss": 1.4364999532699585, |
|
"eval_runtime": 4.1321, |
|
"eval_samples_per_second": 69.214, |
|
"eval_steps_per_second": 0.726, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.77, |
|
"eval_accuracy": 0.4230769230769231, |
|
"eval_loss": 1.4226809740066528, |
|
"eval_runtime": 4.2397, |
|
"eval_samples_per_second": 67.458, |
|
"eval_steps_per_second": 0.708, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.43356643356643354, |
|
"eval_loss": 1.4095807075500488, |
|
"eval_runtime": 3.8645, |
|
"eval_samples_per_second": 74.007, |
|
"eval_steps_per_second": 0.776, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92, |
|
"eval_accuracy": 0.4370629370629371, |
|
"eval_loss": 1.4010183811187744, |
|
"eval_runtime": 4.5348, |
|
"eval_samples_per_second": 63.068, |
|
"eval_steps_per_second": 0.662, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.85, |
|
"eval_accuracy": 0.4405594405594406, |
|
"eval_loss": 1.3949679136276245, |
|
"eval_runtime": 4.4414, |
|
"eval_samples_per_second": 64.394, |
|
"eval_steps_per_second": 0.675, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.77, |
|
"eval_accuracy": 0.4370629370629371, |
|
"eval_loss": 1.3919552564620972, |
|
"eval_runtime": 4.3028, |
|
"eval_samples_per_second": 66.468, |
|
"eval_steps_per_second": 0.697, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.4405594405594406, |
|
"eval_loss": 1.3798925876617432, |
|
"eval_runtime": 3.4387, |
|
"eval_samples_per_second": 83.17, |
|
"eval_steps_per_second": 0.872, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92, |
|
"eval_accuracy": 0.44755244755244755, |
|
"eval_loss": 1.366864800453186, |
|
"eval_runtime": 4.6503, |
|
"eval_samples_per_second": 61.502, |
|
"eval_steps_per_second": 0.645, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.85, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 1.3514918088912964, |
|
"eval_runtime": 4.5609, |
|
"eval_samples_per_second": 62.707, |
|
"eval_steps_per_second": 0.658, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.77, |
|
"eval_accuracy": 0.47202797202797203, |
|
"eval_loss": 1.3400850296020508, |
|
"eval_runtime": 3.8017, |
|
"eval_samples_per_second": 75.229, |
|
"eval_steps_per_second": 0.789, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.4825174825174825, |
|
"eval_loss": 1.3286209106445312, |
|
"eval_runtime": 5.7477, |
|
"eval_samples_per_second": 49.759, |
|
"eval_steps_per_second": 0.522, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"grad_norm": 23198.236328125, |
|
"learning_rate": 2.5396825396825397e-05, |
|
"loss": 1.1198, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_accuracy": 0.486013986013986, |
|
"eval_loss": 1.317462682723999, |
|
"eval_runtime": 4.5266, |
|
"eval_samples_per_second": 63.182, |
|
"eval_steps_per_second": 0.663, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.85, |
|
"eval_accuracy": 0.48951048951048953, |
|
"eval_loss": 1.3067171573638916, |
|
"eval_runtime": 3.882, |
|
"eval_samples_per_second": 73.673, |
|
"eval_steps_per_second": 0.773, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.77, |
|
"eval_accuracy": 0.4825174825174825, |
|
"eval_loss": 1.3013015985488892, |
|
"eval_runtime": 4.0902, |
|
"eval_samples_per_second": 69.923, |
|
"eval_steps_per_second": 0.733, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.479020979020979, |
|
"eval_loss": 1.2954434156417847, |
|
"eval_runtime": 5.4081, |
|
"eval_samples_per_second": 52.884, |
|
"eval_steps_per_second": 0.555, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92, |
|
"eval_accuracy": 0.486013986013986, |
|
"eval_loss": 1.289677381515503, |
|
"eval_runtime": 4.384, |
|
"eval_samples_per_second": 65.238, |
|
"eval_steps_per_second": 0.684, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.85, |
|
"eval_accuracy": 0.486013986013986, |
|
"eval_loss": 1.283199667930603, |
|
"eval_runtime": 4.3325, |
|
"eval_samples_per_second": 66.013, |
|
"eval_steps_per_second": 0.692, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.77, |
|
"eval_accuracy": 0.4825174825174825, |
|
"eval_loss": 1.2712346315383911, |
|
"eval_runtime": 4.6039, |
|
"eval_samples_per_second": 62.121, |
|
"eval_steps_per_second": 0.652, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.493006993006993, |
|
"eval_loss": 1.2584125995635986, |
|
"eval_runtime": 4.5791, |
|
"eval_samples_per_second": 62.458, |
|
"eval_steps_per_second": 0.655, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92, |
|
"eval_accuracy": 0.4965034965034965, |
|
"eval_loss": 1.2516244649887085, |
|
"eval_runtime": 4.8825, |
|
"eval_samples_per_second": 58.577, |
|
"eval_steps_per_second": 0.614, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2455971240997314, |
|
"eval_runtime": 3.9744, |
|
"eval_samples_per_second": 71.96, |
|
"eval_steps_per_second": 0.755, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.77, |
|
"eval_accuracy": 0.5104895104895105, |
|
"eval_loss": 1.2443982362747192, |
|
"eval_runtime": 4.5207, |
|
"eval_samples_per_second": 63.265, |
|
"eval_steps_per_second": 0.664, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.5104895104895105, |
|
"eval_loss": 1.2373132705688477, |
|
"eval_runtime": 5.6152, |
|
"eval_samples_per_second": 50.933, |
|
"eval_steps_per_second": 0.534, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92, |
|
"eval_accuracy": 0.513986013986014, |
|
"eval_loss": 1.2309471368789673, |
|
"eval_runtime": 4.7969, |
|
"eval_samples_per_second": 59.622, |
|
"eval_steps_per_second": 0.625, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.85, |
|
"eval_accuracy": 0.5209790209790209, |
|
"eval_loss": 1.2219436168670654, |
|
"eval_runtime": 4.2518, |
|
"eval_samples_per_second": 67.266, |
|
"eval_steps_per_second": 0.706, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.77, |
|
"eval_accuracy": 0.5209790209790209, |
|
"eval_loss": 1.2145464420318604, |
|
"eval_runtime": 4.6368, |
|
"eval_samples_per_second": 61.68, |
|
"eval_steps_per_second": 0.647, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.527972027972028, |
|
"eval_loss": 1.2054263353347778, |
|
"eval_runtime": 4.2071, |
|
"eval_samples_per_second": 67.98, |
|
"eval_steps_per_second": 0.713, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"grad_norm": 29195.7578125, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.9915, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"eval_accuracy": 0.534965034965035, |
|
"eval_loss": 1.1981616020202637, |
|
"eval_runtime": 4.3609, |
|
"eval_samples_per_second": 65.583, |
|
"eval_steps_per_second": 0.688, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 93.85, |
|
"eval_accuracy": 0.5384615384615384, |
|
"eval_loss": 1.1913262605667114, |
|
"eval_runtime": 3.9073, |
|
"eval_samples_per_second": 73.197, |
|
"eval_steps_per_second": 0.768, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 94.77, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.185881495475769, |
|
"eval_runtime": 3.928, |
|
"eval_samples_per_second": 72.811, |
|
"eval_steps_per_second": 0.764, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.548951048951049, |
|
"eval_loss": 1.179394006729126, |
|
"eval_runtime": 4.1933, |
|
"eval_samples_per_second": 68.204, |
|
"eval_steps_per_second": 0.715, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 96.92, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.1733678579330444, |
|
"eval_runtime": 5.0205, |
|
"eval_samples_per_second": 56.967, |
|
"eval_steps_per_second": 0.598, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 97.85, |
|
"eval_accuracy": 0.5524475524475524, |
|
"eval_loss": 1.1637603044509888, |
|
"eval_runtime": 4.8886, |
|
"eval_samples_per_second": 58.503, |
|
"eval_steps_per_second": 0.614, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 98.77, |
|
"eval_accuracy": 0.5524475524475524, |
|
"eval_loss": 1.1549575328826904, |
|
"eval_runtime": 4.9266, |
|
"eval_samples_per_second": 58.052, |
|
"eval_steps_per_second": 0.609, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.548951048951049, |
|
"eval_loss": 1.1464989185333252, |
|
"eval_runtime": 4.7642, |
|
"eval_samples_per_second": 60.032, |
|
"eval_steps_per_second": 0.63, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 100.92, |
|
"eval_accuracy": 0.5594405594405595, |
|
"eval_loss": 1.1443748474121094, |
|
"eval_runtime": 4.7025, |
|
"eval_samples_per_second": 60.819, |
|
"eval_steps_per_second": 0.638, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 101.85, |
|
"eval_accuracy": 0.5629370629370629, |
|
"eval_loss": 1.1359333992004395, |
|
"eval_runtime": 4.6342, |
|
"eval_samples_per_second": 61.715, |
|
"eval_steps_per_second": 0.647, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 102.77, |
|
"eval_accuracy": 0.5664335664335665, |
|
"eval_loss": 1.1271060705184937, |
|
"eval_runtime": 4.4245, |
|
"eval_samples_per_second": 64.639, |
|
"eval_steps_per_second": 0.678, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.5769230769230769, |
|
"eval_loss": 1.109040379524231, |
|
"eval_runtime": 4.9047, |
|
"eval_samples_per_second": 58.311, |
|
"eval_steps_per_second": 0.612, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 104.92, |
|
"eval_accuracy": 0.5944055944055944, |
|
"eval_loss": 1.0972033739089966, |
|
"eval_runtime": 4.5473, |
|
"eval_samples_per_second": 62.895, |
|
"eval_steps_per_second": 0.66, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 105.85, |
|
"eval_accuracy": 0.6013986013986014, |
|
"eval_loss": 1.090105414390564, |
|
"eval_runtime": 3.7875, |
|
"eval_samples_per_second": 75.511, |
|
"eval_steps_per_second": 0.792, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 106.77, |
|
"eval_accuracy": 0.6083916083916084, |
|
"eval_loss": 1.0809463262557983, |
|
"eval_runtime": 4.7656, |
|
"eval_samples_per_second": 60.014, |
|
"eval_steps_per_second": 0.63, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 107.69, |
|
"grad_norm": 32308.33984375, |
|
"learning_rate": 2.222222222222222e-05, |
|
"loss": 0.8834, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.6118881118881119, |
|
"eval_loss": 1.0683268308639526, |
|
"eval_runtime": 4.3145, |
|
"eval_samples_per_second": 66.288, |
|
"eval_steps_per_second": 0.695, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 108.92, |
|
"eval_accuracy": 0.6223776223776224, |
|
"eval_loss": 1.0605404376983643, |
|
"eval_runtime": 4.6097, |
|
"eval_samples_per_second": 62.043, |
|
"eval_steps_per_second": 0.651, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 109.85, |
|
"eval_accuracy": 0.6258741258741258, |
|
"eval_loss": 1.0562984943389893, |
|
"eval_runtime": 4.859, |
|
"eval_samples_per_second": 58.86, |
|
"eval_steps_per_second": 0.617, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 110.77, |
|
"eval_accuracy": 0.6223776223776224, |
|
"eval_loss": 1.0537959337234497, |
|
"eval_runtime": 4.948, |
|
"eval_samples_per_second": 57.801, |
|
"eval_steps_per_second": 0.606, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 1.0491102933883667, |
|
"eval_runtime": 4.1434, |
|
"eval_samples_per_second": 69.026, |
|
"eval_steps_per_second": 0.724, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 112.92, |
|
"eval_accuracy": 0.6118881118881119, |
|
"eval_loss": 1.044057011604309, |
|
"eval_runtime": 4.3774, |
|
"eval_samples_per_second": 65.336, |
|
"eval_steps_per_second": 0.685, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 113.85, |
|
"eval_accuracy": 0.6118881118881119, |
|
"eval_loss": 1.0357924699783325, |
|
"eval_runtime": 4.7038, |
|
"eval_samples_per_second": 60.801, |
|
"eval_steps_per_second": 0.638, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 114.77, |
|
"eval_accuracy": 0.6223776223776224, |
|
"eval_loss": 1.0194157361984253, |
|
"eval_runtime": 5.0902, |
|
"eval_samples_per_second": 56.187, |
|
"eval_steps_per_second": 0.589, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 1.0034115314483643, |
|
"eval_runtime": 4.386, |
|
"eval_samples_per_second": 65.208, |
|
"eval_steps_per_second": 0.684, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 116.92, |
|
"eval_accuracy": 0.6258741258741258, |
|
"eval_loss": 0.9991269707679749, |
|
"eval_runtime": 5.2708, |
|
"eval_samples_per_second": 54.261, |
|
"eval_steps_per_second": 0.569, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 117.85, |
|
"eval_accuracy": 0.6258741258741258, |
|
"eval_loss": 0.9959561824798584, |
|
"eval_runtime": 4.7556, |
|
"eval_samples_per_second": 60.139, |
|
"eval_steps_per_second": 0.631, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 118.77, |
|
"eval_accuracy": 0.6293706293706294, |
|
"eval_loss": 0.9911425113677979, |
|
"eval_runtime": 4.0817, |
|
"eval_samples_per_second": 70.068, |
|
"eval_steps_per_second": 0.735, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9834115505218506, |
|
"eval_runtime": 4.0058, |
|
"eval_samples_per_second": 71.396, |
|
"eval_steps_per_second": 0.749, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 120.92, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9775691628456116, |
|
"eval_runtime": 4.3856, |
|
"eval_samples_per_second": 65.214, |
|
"eval_steps_per_second": 0.684, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 121.85, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9772741198539734, |
|
"eval_runtime": 4.6976, |
|
"eval_samples_per_second": 60.882, |
|
"eval_steps_per_second": 0.639, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 122.77, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9734641909599304, |
|
"eval_runtime": 4.6506, |
|
"eval_samples_per_second": 61.498, |
|
"eval_steps_per_second": 0.645, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 123.08, |
|
"grad_norm": 27630.990234375, |
|
"learning_rate": 2.0634920634920633e-05, |
|
"loss": 0.7786, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 0.9730696082115173, |
|
"eval_runtime": 3.9976, |
|
"eval_samples_per_second": 71.542, |
|
"eval_steps_per_second": 0.75, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 124.92, |
|
"eval_accuracy": 0.6433566433566433, |
|
"eval_loss": 0.9727755188941956, |
|
"eval_runtime": 4.0553, |
|
"eval_samples_per_second": 70.525, |
|
"eval_steps_per_second": 0.74, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 125.85, |
|
"eval_accuracy": 0.6573426573426573, |
|
"eval_loss": 0.9657326936721802, |
|
"eval_runtime": 4.4666, |
|
"eval_samples_per_second": 64.031, |
|
"eval_steps_per_second": 0.672, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 126.77, |
|
"eval_accuracy": 0.6573426573426573, |
|
"eval_loss": 0.9547586441040039, |
|
"eval_runtime": 4.6999, |
|
"eval_samples_per_second": 60.852, |
|
"eval_steps_per_second": 0.638, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.6643356643356644, |
|
"eval_loss": 0.942358136177063, |
|
"eval_runtime": 4.8438, |
|
"eval_samples_per_second": 59.045, |
|
"eval_steps_per_second": 0.619, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 128.92, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.9391436576843262, |
|
"eval_runtime": 4.4506, |
|
"eval_samples_per_second": 64.261, |
|
"eval_steps_per_second": 0.674, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 129.85, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.9418392777442932, |
|
"eval_runtime": 4.2912, |
|
"eval_samples_per_second": 66.648, |
|
"eval_steps_per_second": 0.699, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 130.77, |
|
"eval_accuracy": 0.6608391608391608, |
|
"eval_loss": 0.9476207494735718, |
|
"eval_runtime": 4.7281, |
|
"eval_samples_per_second": 60.49, |
|
"eval_steps_per_second": 0.635, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.6643356643356644, |
|
"eval_loss": 0.9457269310951233, |
|
"eval_runtime": 4.314, |
|
"eval_samples_per_second": 66.295, |
|
"eval_steps_per_second": 0.695, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 132.92, |
|
"eval_accuracy": 0.6643356643356644, |
|
"eval_loss": 0.941338062286377, |
|
"eval_runtime": 3.916, |
|
"eval_samples_per_second": 73.033, |
|
"eval_steps_per_second": 0.766, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 133.85, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.9334166049957275, |
|
"eval_runtime": 4.5886, |
|
"eval_samples_per_second": 62.329, |
|
"eval_steps_per_second": 0.654, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 134.77, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.9328890442848206, |
|
"eval_runtime": 4.1417, |
|
"eval_samples_per_second": 69.054, |
|
"eval_steps_per_second": 0.724, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9333996772766113, |
|
"eval_runtime": 4.538, |
|
"eval_samples_per_second": 63.023, |
|
"eval_steps_per_second": 0.661, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 136.92, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9264596700668335, |
|
"eval_runtime": 4.6642, |
|
"eval_samples_per_second": 61.318, |
|
"eval_steps_per_second": 0.643, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 137.85, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9186587929725647, |
|
"eval_runtime": 4.5978, |
|
"eval_samples_per_second": 62.204, |
|
"eval_steps_per_second": 0.652, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 138.46, |
|
"grad_norm": 34684.0078125, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.7133, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 138.77, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.916916012763977, |
|
"eval_runtime": 4.1718, |
|
"eval_samples_per_second": 68.556, |
|
"eval_steps_per_second": 0.719, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9141567349433899, |
|
"eval_runtime": 4.8158, |
|
"eval_samples_per_second": 59.388, |
|
"eval_steps_per_second": 0.623, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 140.92, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9131244421005249, |
|
"eval_runtime": 4.3984, |
|
"eval_samples_per_second": 65.024, |
|
"eval_steps_per_second": 0.682, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 141.85, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.9160958528518677, |
|
"eval_runtime": 3.9738, |
|
"eval_samples_per_second": 71.971, |
|
"eval_steps_per_second": 0.755, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 142.77, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.9223662614822388, |
|
"eval_runtime": 3.7836, |
|
"eval_samples_per_second": 75.589, |
|
"eval_steps_per_second": 0.793, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.9139449000358582, |
|
"eval_runtime": 4.0554, |
|
"eval_samples_per_second": 70.522, |
|
"eval_steps_per_second": 0.74, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 144.92, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.9089756608009338, |
|
"eval_runtime": 4.4989, |
|
"eval_samples_per_second": 63.571, |
|
"eval_steps_per_second": 0.667, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 145.85, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9072948694229126, |
|
"eval_runtime": 3.984, |
|
"eval_samples_per_second": 71.788, |
|
"eval_steps_per_second": 0.753, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 146.77, |
|
"eval_accuracy": 0.6608391608391608, |
|
"eval_loss": 0.9110231995582581, |
|
"eval_runtime": 4.596, |
|
"eval_samples_per_second": 62.228, |
|
"eval_steps_per_second": 0.653, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.6573426573426573, |
|
"eval_loss": 0.9167369604110718, |
|
"eval_runtime": 4.7051, |
|
"eval_samples_per_second": 60.785, |
|
"eval_steps_per_second": 0.638, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 148.92, |
|
"eval_accuracy": 0.6643356643356644, |
|
"eval_loss": 0.9118071794509888, |
|
"eval_runtime": 3.9295, |
|
"eval_samples_per_second": 72.783, |
|
"eval_steps_per_second": 0.763, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 149.85, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.8996461629867554, |
|
"eval_runtime": 4.5063, |
|
"eval_samples_per_second": 63.466, |
|
"eval_steps_per_second": 0.666, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 150.77, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8903929591178894, |
|
"eval_runtime": 4.0074, |
|
"eval_samples_per_second": 71.369, |
|
"eval_steps_per_second": 0.749, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8889052867889404, |
|
"eval_runtime": 4.2482, |
|
"eval_samples_per_second": 67.323, |
|
"eval_steps_per_second": 0.706, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 152.92, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.889894425868988, |
|
"eval_runtime": 4.7658, |
|
"eval_samples_per_second": 60.011, |
|
"eval_steps_per_second": 0.629, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"grad_norm": 27670.865234375, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 0.6674, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.887377917766571, |
|
"eval_runtime": 4.6951, |
|
"eval_samples_per_second": 60.915, |
|
"eval_steps_per_second": 0.639, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 154.77, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8873924016952515, |
|
"eval_runtime": 3.8042, |
|
"eval_samples_per_second": 75.181, |
|
"eval_steps_per_second": 0.789, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8905075788497925, |
|
"eval_runtime": 3.9282, |
|
"eval_samples_per_second": 72.806, |
|
"eval_steps_per_second": 0.764, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 156.92, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8881194591522217, |
|
"eval_runtime": 4.2085, |
|
"eval_samples_per_second": 67.957, |
|
"eval_steps_per_second": 0.713, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 157.85, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.882903516292572, |
|
"eval_runtime": 5.345, |
|
"eval_samples_per_second": 53.508, |
|
"eval_steps_per_second": 0.561, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 158.77, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8809071183204651, |
|
"eval_runtime": 4.4142, |
|
"eval_samples_per_second": 64.791, |
|
"eval_steps_per_second": 0.68, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8780828714370728, |
|
"eval_runtime": 3.6498, |
|
"eval_samples_per_second": 78.361, |
|
"eval_steps_per_second": 0.822, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 160.92, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.8776365518569946, |
|
"eval_runtime": 3.4668, |
|
"eval_samples_per_second": 82.497, |
|
"eval_steps_per_second": 0.865, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 161.85, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8795685768127441, |
|
"eval_runtime": 3.8004, |
|
"eval_samples_per_second": 75.256, |
|
"eval_steps_per_second": 0.789, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 162.77, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.8795468807220459, |
|
"eval_runtime": 3.8694, |
|
"eval_samples_per_second": 73.913, |
|
"eval_steps_per_second": 0.775, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8797011971473694, |
|
"eval_runtime": 4.1348, |
|
"eval_samples_per_second": 69.169, |
|
"eval_steps_per_second": 0.726, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 164.92, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8706856966018677, |
|
"eval_runtime": 4.5762, |
|
"eval_samples_per_second": 62.498, |
|
"eval_steps_per_second": 0.656, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 165.85, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8697258830070496, |
|
"eval_runtime": 3.5794, |
|
"eval_samples_per_second": 79.901, |
|
"eval_steps_per_second": 0.838, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 166.77, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8723975419998169, |
|
"eval_runtime": 5.761, |
|
"eval_samples_per_second": 49.644, |
|
"eval_steps_per_second": 0.521, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.870445966720581, |
|
"eval_runtime": 4.2907, |
|
"eval_samples_per_second": 66.656, |
|
"eval_steps_per_second": 0.699, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 168.92, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8693636655807495, |
|
"eval_runtime": 4.5637, |
|
"eval_samples_per_second": 62.668, |
|
"eval_steps_per_second": 0.657, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 169.23, |
|
"grad_norm": 67537.203125, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.6305, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 169.85, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8739539980888367, |
|
"eval_runtime": 4.5496, |
|
"eval_samples_per_second": 62.862, |
|
"eval_steps_per_second": 0.659, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 170.77, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8713040947914124, |
|
"eval_runtime": 4.3907, |
|
"eval_samples_per_second": 65.138, |
|
"eval_steps_per_second": 0.683, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8682331442832947, |
|
"eval_runtime": 4.1777, |
|
"eval_samples_per_second": 68.459, |
|
"eval_steps_per_second": 0.718, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 172.92, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.868798553943634, |
|
"eval_runtime": 3.5218, |
|
"eval_samples_per_second": 81.207, |
|
"eval_steps_per_second": 0.852, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 173.85, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.8692768216133118, |
|
"eval_runtime": 5.0064, |
|
"eval_samples_per_second": 57.127, |
|
"eval_steps_per_second": 0.599, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 174.77, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.874369204044342, |
|
"eval_runtime": 4.1257, |
|
"eval_samples_per_second": 69.322, |
|
"eval_steps_per_second": 0.727, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.6783216783216783, |
|
"eval_loss": 0.8759630918502808, |
|
"eval_runtime": 4.4848, |
|
"eval_samples_per_second": 63.771, |
|
"eval_steps_per_second": 0.669, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 176.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.8696449398994446, |
|
"eval_runtime": 4.1683, |
|
"eval_samples_per_second": 68.613, |
|
"eval_steps_per_second": 0.72, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 177.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.8668593764305115, |
|
"eval_runtime": 4.3889, |
|
"eval_samples_per_second": 65.165, |
|
"eval_steps_per_second": 0.684, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 178.77, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.8641146421432495, |
|
"eval_runtime": 4.0742, |
|
"eval_samples_per_second": 70.197, |
|
"eval_steps_per_second": 0.736, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.8696537613868713, |
|
"eval_runtime": 4.1345, |
|
"eval_samples_per_second": 69.173, |
|
"eval_steps_per_second": 0.726, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 180.92, |
|
"eval_accuracy": 0.6748251748251748, |
|
"eval_loss": 0.8678367733955383, |
|
"eval_runtime": 3.994, |
|
"eval_samples_per_second": 71.607, |
|
"eval_steps_per_second": 0.751, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 181.85, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.8620542287826538, |
|
"eval_runtime": 4.32, |
|
"eval_samples_per_second": 66.204, |
|
"eval_steps_per_second": 0.694, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 182.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8557011485099792, |
|
"eval_runtime": 4.7717, |
|
"eval_samples_per_second": 59.937, |
|
"eval_steps_per_second": 0.629, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.848114013671875, |
|
"eval_runtime": 4.0948, |
|
"eval_samples_per_second": 69.845, |
|
"eval_steps_per_second": 0.733, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 184.62, |
|
"grad_norm": 36502.2421875, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.6095, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 184.92, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8428906798362732, |
|
"eval_runtime": 4.6887, |
|
"eval_samples_per_second": 60.997, |
|
"eval_steps_per_second": 0.64, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 185.85, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8413122892379761, |
|
"eval_runtime": 3.8998, |
|
"eval_samples_per_second": 73.337, |
|
"eval_steps_per_second": 0.769, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 186.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8402045965194702, |
|
"eval_runtime": 4.1508, |
|
"eval_samples_per_second": 68.903, |
|
"eval_steps_per_second": 0.723, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8415275812149048, |
|
"eval_runtime": 4.4966, |
|
"eval_samples_per_second": 63.603, |
|
"eval_steps_per_second": 0.667, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 188.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8409523963928223, |
|
"eval_runtime": 4.0007, |
|
"eval_samples_per_second": 71.488, |
|
"eval_steps_per_second": 0.75, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 189.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.8388563394546509, |
|
"eval_runtime": 4.5212, |
|
"eval_samples_per_second": 63.257, |
|
"eval_steps_per_second": 0.664, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 190.77, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.8353860378265381, |
|
"eval_runtime": 4.6112, |
|
"eval_samples_per_second": 62.023, |
|
"eval_steps_per_second": 0.651, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8356983661651611, |
|
"eval_runtime": 4.6563, |
|
"eval_samples_per_second": 61.422, |
|
"eval_steps_per_second": 0.644, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 192.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8400572538375854, |
|
"eval_runtime": 5.369, |
|
"eval_samples_per_second": 53.269, |
|
"eval_steps_per_second": 0.559, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 193.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.844892144203186, |
|
"eval_runtime": 4.0956, |
|
"eval_samples_per_second": 69.831, |
|
"eval_steps_per_second": 0.732, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 194.77, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8478845357894897, |
|
"eval_runtime": 4.6385, |
|
"eval_samples_per_second": 61.658, |
|
"eval_steps_per_second": 0.647, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8454630374908447, |
|
"eval_runtime": 4.4423, |
|
"eval_samples_per_second": 64.381, |
|
"eval_steps_per_second": 0.675, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 196.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8421822190284729, |
|
"eval_runtime": 3.8632, |
|
"eval_samples_per_second": 74.032, |
|
"eval_steps_per_second": 0.777, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 197.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8425044417381287, |
|
"eval_runtime": 5.1031, |
|
"eval_samples_per_second": 56.044, |
|
"eval_steps_per_second": 0.588, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 198.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8436546325683594, |
|
"eval_runtime": 4.9685, |
|
"eval_samples_per_second": 57.562, |
|
"eval_steps_per_second": 0.604, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 66285.84375, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 0.5908, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8366544246673584, |
|
"eval_runtime": 4.3292, |
|
"eval_samples_per_second": 66.063, |
|
"eval_steps_per_second": 0.693, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 200.92, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.834704577922821, |
|
"eval_runtime": 4.7887, |
|
"eval_samples_per_second": 59.724, |
|
"eval_steps_per_second": 0.626, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 201.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8286824226379395, |
|
"eval_runtime": 4.388, |
|
"eval_samples_per_second": 65.178, |
|
"eval_steps_per_second": 0.684, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 202.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8259890079498291, |
|
"eval_runtime": 3.7365, |
|
"eval_samples_per_second": 76.543, |
|
"eval_steps_per_second": 0.803, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8263576626777649, |
|
"eval_runtime": 4.9175, |
|
"eval_samples_per_second": 58.159, |
|
"eval_steps_per_second": 0.61, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 204.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8295235633850098, |
|
"eval_runtime": 4.3071, |
|
"eval_samples_per_second": 66.401, |
|
"eval_steps_per_second": 0.697, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 205.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8301726579666138, |
|
"eval_runtime": 3.7499, |
|
"eval_samples_per_second": 76.268, |
|
"eval_steps_per_second": 0.8, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 206.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.828461766242981, |
|
"eval_runtime": 3.8022, |
|
"eval_samples_per_second": 75.219, |
|
"eval_steps_per_second": 0.789, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.831078052520752, |
|
"eval_runtime": 4.2868, |
|
"eval_samples_per_second": 66.716, |
|
"eval_steps_per_second": 0.7, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 208.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8320910334587097, |
|
"eval_runtime": 4.474, |
|
"eval_samples_per_second": 63.925, |
|
"eval_steps_per_second": 0.671, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 209.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8305550813674927, |
|
"eval_runtime": 4.1246, |
|
"eval_samples_per_second": 69.341, |
|
"eval_steps_per_second": 0.727, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 210.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8302868604660034, |
|
"eval_runtime": 4.9131, |
|
"eval_samples_per_second": 58.212, |
|
"eval_steps_per_second": 0.611, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.8256182670593262, |
|
"eval_runtime": 4.5542, |
|
"eval_samples_per_second": 62.8, |
|
"eval_steps_per_second": 0.659, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 212.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8230299353599548, |
|
"eval_runtime": 4.2845, |
|
"eval_samples_per_second": 66.752, |
|
"eval_steps_per_second": 0.7, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 213.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.819442868232727, |
|
"eval_runtime": 4.4153, |
|
"eval_samples_per_second": 64.775, |
|
"eval_steps_per_second": 0.679, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 214.77, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8183168768882751, |
|
"eval_runtime": 4.9672, |
|
"eval_samples_per_second": 57.577, |
|
"eval_steps_per_second": 0.604, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 215.38, |
|
"grad_norm": 29832.03125, |
|
"learning_rate": 1.111111111111111e-05, |
|
"loss": 0.5763, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8231977224349976, |
|
"eval_runtime": 4.6354, |
|
"eval_samples_per_second": 61.699, |
|
"eval_steps_per_second": 0.647, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 216.92, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8236932158470154, |
|
"eval_runtime": 3.7182, |
|
"eval_samples_per_second": 76.92, |
|
"eval_steps_per_second": 0.807, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 217.85, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.8195610642433167, |
|
"eval_runtime": 3.5502, |
|
"eval_samples_per_second": 80.56, |
|
"eval_steps_per_second": 0.845, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 218.77, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.8142436742782593, |
|
"eval_runtime": 4.9155, |
|
"eval_samples_per_second": 58.184, |
|
"eval_steps_per_second": 0.61, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.8115321397781372, |
|
"eval_runtime": 4.0939, |
|
"eval_samples_per_second": 69.86, |
|
"eval_steps_per_second": 0.733, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 220.92, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.8130100965499878, |
|
"eval_runtime": 4.2197, |
|
"eval_samples_per_second": 67.777, |
|
"eval_steps_per_second": 0.711, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 221.85, |
|
"eval_accuracy": 0.7027972027972028, |
|
"eval_loss": 0.8156144022941589, |
|
"eval_runtime": 4.2344, |
|
"eval_samples_per_second": 67.542, |
|
"eval_steps_per_second": 0.708, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 222.77, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8200713992118835, |
|
"eval_runtime": 4.8181, |
|
"eval_samples_per_second": 59.36, |
|
"eval_steps_per_second": 0.623, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8227414488792419, |
|
"eval_runtime": 4.5671, |
|
"eval_samples_per_second": 62.621, |
|
"eval_steps_per_second": 0.657, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 224.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8232228755950928, |
|
"eval_runtime": 5.221, |
|
"eval_samples_per_second": 54.779, |
|
"eval_steps_per_second": 0.575, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 225.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8198325634002686, |
|
"eval_runtime": 4.2136, |
|
"eval_samples_per_second": 67.875, |
|
"eval_steps_per_second": 0.712, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 226.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8151125311851501, |
|
"eval_runtime": 4.8801, |
|
"eval_samples_per_second": 58.606, |
|
"eval_steps_per_second": 0.615, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8136410713195801, |
|
"eval_runtime": 5.2461, |
|
"eval_samples_per_second": 54.516, |
|
"eval_steps_per_second": 0.572, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 228.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8134062886238098, |
|
"eval_runtime": 3.6429, |
|
"eval_samples_per_second": 78.509, |
|
"eval_steps_per_second": 0.824, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 229.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8123226761817932, |
|
"eval_runtime": 4.8374, |
|
"eval_samples_per_second": 59.122, |
|
"eval_steps_per_second": 0.62, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"grad_norm": 27062.134765625, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.57, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8095433115959167, |
|
"eval_runtime": 3.9409, |
|
"eval_samples_per_second": 72.572, |
|
"eval_steps_per_second": 0.761, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8082302212715149, |
|
"eval_runtime": 4.0933, |
|
"eval_samples_per_second": 69.87, |
|
"eval_steps_per_second": 0.733, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 232.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8084114193916321, |
|
"eval_runtime": 4.4952, |
|
"eval_samples_per_second": 63.624, |
|
"eval_steps_per_second": 0.667, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 233.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8113557696342468, |
|
"eval_runtime": 4.6955, |
|
"eval_samples_per_second": 60.909, |
|
"eval_steps_per_second": 0.639, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 234.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8130276799201965, |
|
"eval_runtime": 4.9303, |
|
"eval_samples_per_second": 58.009, |
|
"eval_steps_per_second": 0.608, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8153804540634155, |
|
"eval_runtime": 3.6663, |
|
"eval_samples_per_second": 78.007, |
|
"eval_steps_per_second": 0.818, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 236.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8160205483436584, |
|
"eval_runtime": 4.6226, |
|
"eval_samples_per_second": 61.87, |
|
"eval_steps_per_second": 0.649, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 237.85, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8126419186592102, |
|
"eval_runtime": 4.6278, |
|
"eval_samples_per_second": 61.801, |
|
"eval_steps_per_second": 0.648, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 238.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8113960027694702, |
|
"eval_runtime": 3.8362, |
|
"eval_samples_per_second": 74.552, |
|
"eval_steps_per_second": 0.782, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8041169047355652, |
|
"eval_runtime": 5.2095, |
|
"eval_samples_per_second": 54.9, |
|
"eval_steps_per_second": 0.576, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 240.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8005608916282654, |
|
"eval_runtime": 4.0128, |
|
"eval_samples_per_second": 71.273, |
|
"eval_steps_per_second": 0.748, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 241.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.7987480163574219, |
|
"eval_runtime": 4.8789, |
|
"eval_samples_per_second": 58.619, |
|
"eval_steps_per_second": 0.615, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 242.77, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.7977189421653748, |
|
"eval_runtime": 4.5854, |
|
"eval_samples_per_second": 62.372, |
|
"eval_steps_per_second": 0.654, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.8001275658607483, |
|
"eval_runtime": 4.7528, |
|
"eval_samples_per_second": 60.175, |
|
"eval_steps_per_second": 0.631, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 244.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8043994903564453, |
|
"eval_runtime": 4.2699, |
|
"eval_samples_per_second": 66.98, |
|
"eval_steps_per_second": 0.703, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 245.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8082275390625, |
|
"eval_runtime": 4.2996, |
|
"eval_samples_per_second": 66.518, |
|
"eval_steps_per_second": 0.698, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 246.15, |
|
"grad_norm": 99001.8359375, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.5456, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 246.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8120755553245544, |
|
"eval_runtime": 4.5242, |
|
"eval_samples_per_second": 63.216, |
|
"eval_steps_per_second": 0.663, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8106970191001892, |
|
"eval_runtime": 4.4479, |
|
"eval_samples_per_second": 64.3, |
|
"eval_steps_per_second": 0.674, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 248.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.806368887424469, |
|
"eval_runtime": 4.1522, |
|
"eval_samples_per_second": 68.88, |
|
"eval_steps_per_second": 0.723, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 249.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8042352199554443, |
|
"eval_runtime": 4.4213, |
|
"eval_samples_per_second": 64.687, |
|
"eval_steps_per_second": 0.679, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 250.77, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8005724549293518, |
|
"eval_runtime": 4.4134, |
|
"eval_samples_per_second": 64.802, |
|
"eval_steps_per_second": 0.68, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.7968676090240479, |
|
"eval_runtime": 3.8229, |
|
"eval_samples_per_second": 74.812, |
|
"eval_steps_per_second": 0.785, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 252.92, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.7954707741737366, |
|
"eval_runtime": 4.2693, |
|
"eval_samples_per_second": 66.99, |
|
"eval_steps_per_second": 0.703, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 253.85, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.7973347902297974, |
|
"eval_runtime": 4.1401, |
|
"eval_samples_per_second": 69.081, |
|
"eval_steps_per_second": 0.725, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 254.77, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8001494407653809, |
|
"eval_runtime": 4.4851, |
|
"eval_samples_per_second": 63.767, |
|
"eval_steps_per_second": 0.669, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.80350661277771, |
|
"eval_runtime": 4.4996, |
|
"eval_samples_per_second": 63.562, |
|
"eval_steps_per_second": 0.667, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 256.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.8035485148429871, |
|
"eval_runtime": 4.5713, |
|
"eval_samples_per_second": 62.564, |
|
"eval_steps_per_second": 0.656, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 257.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8012282252311707, |
|
"eval_runtime": 4.0638, |
|
"eval_samples_per_second": 70.377, |
|
"eval_steps_per_second": 0.738, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 258.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8000492453575134, |
|
"eval_runtime": 4.443, |
|
"eval_samples_per_second": 64.372, |
|
"eval_steps_per_second": 0.675, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7963055968284607, |
|
"eval_runtime": 5.2655, |
|
"eval_samples_per_second": 54.316, |
|
"eval_steps_per_second": 0.57, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 260.92, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.7927840352058411, |
|
"eval_runtime": 5.1407, |
|
"eval_samples_per_second": 55.634, |
|
"eval_steps_per_second": 0.584, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 261.54, |
|
"grad_norm": 24108.591796875, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.5369, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 261.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7919009327888489, |
|
"eval_runtime": 3.8577, |
|
"eval_samples_per_second": 74.138, |
|
"eval_steps_per_second": 0.778, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 262.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.791265606880188, |
|
"eval_runtime": 4.1966, |
|
"eval_samples_per_second": 68.151, |
|
"eval_steps_per_second": 0.715, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7929325699806213, |
|
"eval_runtime": 4.063, |
|
"eval_samples_per_second": 70.391, |
|
"eval_steps_per_second": 0.738, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 264.92, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.7954928278923035, |
|
"eval_runtime": 4.3933, |
|
"eval_samples_per_second": 65.099, |
|
"eval_steps_per_second": 0.683, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 265.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7962778210639954, |
|
"eval_runtime": 4.4424, |
|
"eval_samples_per_second": 64.38, |
|
"eval_steps_per_second": 0.675, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 266.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7951834201812744, |
|
"eval_runtime": 4.2605, |
|
"eval_samples_per_second": 67.128, |
|
"eval_steps_per_second": 0.704, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7936495542526245, |
|
"eval_runtime": 4.9467, |
|
"eval_samples_per_second": 57.816, |
|
"eval_steps_per_second": 0.606, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 268.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7928897738456726, |
|
"eval_runtime": 4.9925, |
|
"eval_samples_per_second": 57.286, |
|
"eval_steps_per_second": 0.601, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 269.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7933365702629089, |
|
"eval_runtime": 4.4133, |
|
"eval_samples_per_second": 64.804, |
|
"eval_steps_per_second": 0.68, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 270.77, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7940818071365356, |
|
"eval_runtime": 4.0519, |
|
"eval_samples_per_second": 70.584, |
|
"eval_steps_per_second": 0.74, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7939559817314148, |
|
"eval_runtime": 4.2845, |
|
"eval_samples_per_second": 66.753, |
|
"eval_steps_per_second": 0.7, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 272.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7929409742355347, |
|
"eval_runtime": 4.885, |
|
"eval_samples_per_second": 58.546, |
|
"eval_steps_per_second": 0.614, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 273.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7929646968841553, |
|
"eval_runtime": 3.7177, |
|
"eval_samples_per_second": 76.929, |
|
"eval_steps_per_second": 0.807, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 274.77, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7942932844161987, |
|
"eval_runtime": 4.7663, |
|
"eval_samples_per_second": 60.004, |
|
"eval_steps_per_second": 0.629, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7943535447120667, |
|
"eval_runtime": 4.0017, |
|
"eval_samples_per_second": 71.47, |
|
"eval_steps_per_second": 0.75, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 276.92, |
|
"grad_norm": 30744.533203125, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 0.5388, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 276.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7933218479156494, |
|
"eval_runtime": 4.3013, |
|
"eval_samples_per_second": 66.492, |
|
"eval_steps_per_second": 0.697, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 277.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7914408445358276, |
|
"eval_runtime": 4.8732, |
|
"eval_samples_per_second": 58.689, |
|
"eval_steps_per_second": 0.616, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 278.77, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7903594970703125, |
|
"eval_runtime": 4.6519, |
|
"eval_samples_per_second": 61.48, |
|
"eval_steps_per_second": 0.645, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7888299822807312, |
|
"eval_runtime": 4.5788, |
|
"eval_samples_per_second": 62.462, |
|
"eval_steps_per_second": 0.655, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 280.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7900360822677612, |
|
"eval_runtime": 4.5971, |
|
"eval_samples_per_second": 62.213, |
|
"eval_steps_per_second": 0.653, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 281.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7905992865562439, |
|
"eval_runtime": 4.4545, |
|
"eval_samples_per_second": 64.205, |
|
"eval_steps_per_second": 0.673, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 282.77, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7911333441734314, |
|
"eval_runtime": 4.4274, |
|
"eval_samples_per_second": 64.598, |
|
"eval_steps_per_second": 0.678, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7906560897827148, |
|
"eval_runtime": 3.9207, |
|
"eval_samples_per_second": 72.947, |
|
"eval_steps_per_second": 0.765, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 284.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7906984686851501, |
|
"eval_runtime": 4.5603, |
|
"eval_samples_per_second": 62.715, |
|
"eval_steps_per_second": 0.658, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 285.85, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.7905350923538208, |
|
"eval_runtime": 4.8134, |
|
"eval_samples_per_second": 59.418, |
|
"eval_steps_per_second": 0.623, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 286.77, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 0.7899833917617798, |
|
"eval_runtime": 4.0697, |
|
"eval_samples_per_second": 70.275, |
|
"eval_steps_per_second": 0.737, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7901102304458618, |
|
"eval_runtime": 4.0126, |
|
"eval_samples_per_second": 71.276, |
|
"eval_steps_per_second": 0.748, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 288.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7902336120605469, |
|
"eval_runtime": 3.8328, |
|
"eval_samples_per_second": 74.619, |
|
"eval_steps_per_second": 0.783, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 289.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7909765839576721, |
|
"eval_runtime": 3.9497, |
|
"eval_samples_per_second": 72.411, |
|
"eval_steps_per_second": 0.76, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 290.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7913976907730103, |
|
"eval_runtime": 4.7881, |
|
"eval_samples_per_second": 59.731, |
|
"eval_steps_per_second": 0.627, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7919970750808716, |
|
"eval_runtime": 4.0436, |
|
"eval_samples_per_second": 70.729, |
|
"eval_steps_per_second": 0.742, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 292.31, |
|
"grad_norm": 41198.3515625, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.5261, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 292.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7927921414375305, |
|
"eval_runtime": 3.9219, |
|
"eval_samples_per_second": 72.923, |
|
"eval_steps_per_second": 0.765, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 293.85, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.793153703212738, |
|
"eval_runtime": 4.3649, |
|
"eval_samples_per_second": 65.522, |
|
"eval_steps_per_second": 0.687, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 294.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7925400733947754, |
|
"eval_runtime": 4.2064, |
|
"eval_samples_per_second": 67.992, |
|
"eval_steps_per_second": 0.713, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7922278046607971, |
|
"eval_runtime": 4.03, |
|
"eval_samples_per_second": 70.968, |
|
"eval_steps_per_second": 0.744, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 296.92, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7919090986251831, |
|
"eval_runtime": 4.4889, |
|
"eval_samples_per_second": 63.713, |
|
"eval_steps_per_second": 0.668, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 297.85, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7922202348709106, |
|
"eval_runtime": 4.3742, |
|
"eval_samples_per_second": 65.383, |
|
"eval_steps_per_second": 0.686, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 298.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7921380400657654, |
|
"eval_runtime": 4.27, |
|
"eval_samples_per_second": 66.979, |
|
"eval_steps_per_second": 0.703, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7912278175354004, |
|
"eval_runtime": 4.209, |
|
"eval_samples_per_second": 67.95, |
|
"eval_steps_per_second": 0.713, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 300.92, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7907286882400513, |
|
"eval_runtime": 4.5975, |
|
"eval_samples_per_second": 62.208, |
|
"eval_steps_per_second": 0.653, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 301.85, |
|
"eval_accuracy": 0.6853146853146853, |
|
"eval_loss": 0.7895866632461548, |
|
"eval_runtime": 4.0629, |
|
"eval_samples_per_second": 70.394, |
|
"eval_steps_per_second": 0.738, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 302.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7885376811027527, |
|
"eval_runtime": 4.0112, |
|
"eval_samples_per_second": 71.301, |
|
"eval_steps_per_second": 0.748, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7877256870269775, |
|
"eval_runtime": 4.4199, |
|
"eval_samples_per_second": 64.708, |
|
"eval_steps_per_second": 0.679, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 304.92, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7874112725257874, |
|
"eval_runtime": 4.0366, |
|
"eval_samples_per_second": 70.852, |
|
"eval_steps_per_second": 0.743, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 305.85, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7876228094100952, |
|
"eval_runtime": 4.3519, |
|
"eval_samples_per_second": 65.718, |
|
"eval_steps_per_second": 0.689, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 306.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7879106402397156, |
|
"eval_runtime": 5.3443, |
|
"eval_samples_per_second": 53.515, |
|
"eval_steps_per_second": 0.561, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 307.69, |
|
"grad_norm": 31167.6875, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.5188, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7883804440498352, |
|
"eval_runtime": 4.1413, |
|
"eval_samples_per_second": 69.06, |
|
"eval_steps_per_second": 0.724, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 308.92, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7886692881584167, |
|
"eval_runtime": 4.049, |
|
"eval_samples_per_second": 70.634, |
|
"eval_steps_per_second": 0.741, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 309.85, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7890444397926331, |
|
"eval_runtime": 4.612, |
|
"eval_samples_per_second": 62.012, |
|
"eval_steps_per_second": 0.65, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 310.77, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7894096970558167, |
|
"eval_runtime": 3.8027, |
|
"eval_samples_per_second": 75.209, |
|
"eval_steps_per_second": 0.789, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7899323105812073, |
|
"eval_runtime": 4.3345, |
|
"eval_samples_per_second": 65.983, |
|
"eval_steps_per_second": 0.692, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 312.92, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.7903538346290588, |
|
"eval_runtime": 4.5846, |
|
"eval_samples_per_second": 62.383, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 313.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7907257080078125, |
|
"eval_runtime": 4.136, |
|
"eval_samples_per_second": 69.148, |
|
"eval_steps_per_second": 0.725, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 314.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.790963888168335, |
|
"eval_runtime": 4.2526, |
|
"eval_samples_per_second": 67.252, |
|
"eval_steps_per_second": 0.705, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7912085056304932, |
|
"eval_runtime": 4.1188, |
|
"eval_samples_per_second": 69.437, |
|
"eval_steps_per_second": 0.728, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 316.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7911705374717712, |
|
"eval_runtime": 4.1524, |
|
"eval_samples_per_second": 68.876, |
|
"eval_steps_per_second": 0.722, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 317.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7911967039108276, |
|
"eval_runtime": 3.9058, |
|
"eval_samples_per_second": 73.225, |
|
"eval_steps_per_second": 0.768, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 318.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7912610173225403, |
|
"eval_runtime": 4.6095, |
|
"eval_samples_per_second": 62.046, |
|
"eval_steps_per_second": 0.651, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7912730574607849, |
|
"eval_runtime": 5.5705, |
|
"eval_samples_per_second": 51.342, |
|
"eval_steps_per_second": 0.539, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 320.92, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7911974787712097, |
|
"eval_runtime": 4.9154, |
|
"eval_samples_per_second": 58.185, |
|
"eval_steps_per_second": 0.61, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 321.85, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7911575436592102, |
|
"eval_runtime": 4.8387, |
|
"eval_samples_per_second": 59.107, |
|
"eval_steps_per_second": 0.62, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 322.77, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7911355495452881, |
|
"eval_runtime": 4.1368, |
|
"eval_samples_per_second": 69.135, |
|
"eval_steps_per_second": 0.725, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 323.08, |
|
"grad_norm": 53824.44140625, |
|
"learning_rate": 0.0, |
|
"loss": 0.5194, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 323.08, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.7911302447319031, |
|
"eval_runtime": 4.2304, |
|
"eval_samples_per_second": 67.606, |
|
"eval_steps_per_second": 0.709, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 323.08, |
|
"step": 1050, |
|
"total_flos": 4.380490432252032e+18, |
|
"train_loss": 0.8143934268043155, |
|
"train_runtime": 4784.9132, |
|
"train_samples_per_second": 113.231, |
|
"train_steps_per_second": 0.219 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 350, |
|
"save_steps": 500, |
|
"total_flos": 4.380490432252032e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|