|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 14200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.789999999999999e-06, |
|
"loss": 13.5868, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.379e-05, |
|
"loss": 5.6931, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.0789999999999996e-05, |
|
"loss": 4.1485, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.779e-05, |
|
"loss": 3.5861, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.479e-05, |
|
"loss": 3.2885, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.178999999999999e-05, |
|
"loss": 3.2023, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.878999999999999e-05, |
|
"loss": 3.1447, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.579e-05, |
|
"loss": 3.0982, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.279e-05, |
|
"loss": 2.9573, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 6.979e-05, |
|
"loss": 2.3964, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 1.225058913230896, |
|
"eval_runtime": 113.8433, |
|
"eval_samples_per_second": 30.103, |
|
"eval_steps_per_second": 0.949, |
|
"eval_wer": 0.878148627762229, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.948560606060606e-05, |
|
"loss": 1.9563, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 6.895530303030303e-05, |
|
"loss": 1.7124, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 6.8425e-05, |
|
"loss": 1.5888, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 6.789469696969696e-05, |
|
"loss": 1.514, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 6.736439393939394e-05, |
|
"loss": 1.4347, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 6.68340909090909e-05, |
|
"loss": 1.4113, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 6.630378787878786e-05, |
|
"loss": 1.4, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 6.577348484848484e-05, |
|
"loss": 1.3486, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 6.52431818181818e-05, |
|
"loss": 1.3427, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 6.471287878787878e-05, |
|
"loss": 1.3176, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_loss": 0.3872202932834625, |
|
"eval_runtime": 113.3065, |
|
"eval_samples_per_second": 30.245, |
|
"eval_steps_per_second": 0.953, |
|
"eval_wer": 0.4461756965620953, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 6.418257575757575e-05, |
|
"loss": 1.2983, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 6.365227272727272e-05, |
|
"loss": 1.2845, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 6.312196969696969e-05, |
|
"loss": 1.2803, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 6.259166666666667e-05, |
|
"loss": 1.2482, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.206136363636363e-05, |
|
"loss": 1.2734, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 6.153106060606061e-05, |
|
"loss": 1.2382, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 6.100075757575757e-05, |
|
"loss": 1.2278, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 6.047045454545454e-05, |
|
"loss": 1.2254, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 5.994015151515151e-05, |
|
"loss": 1.2084, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 5.940984848484848e-05, |
|
"loss": 1.1999, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"eval_loss": 0.32438814640045166, |
|
"eval_runtime": 116.219, |
|
"eval_samples_per_second": 29.487, |
|
"eval_steps_per_second": 0.929, |
|
"eval_wer": 0.39216341534734117, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 5.887954545454545e-05, |
|
"loss": 1.2041, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 5.8349242424242423e-05, |
|
"loss": 1.1884, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 5.781893939393939e-05, |
|
"loss": 1.1938, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 5.728863636363636e-05, |
|
"loss": 1.1871, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 5.675833333333333e-05, |
|
"loss": 1.1859, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 5.62280303030303e-05, |
|
"loss": 1.1824, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 5.569772727272727e-05, |
|
"loss": 1.1627, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 5.516742424242423e-05, |
|
"loss": 1.1557, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 5.4637121212121204e-05, |
|
"loss": 1.1679, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 5.4106818181818174e-05, |
|
"loss": 1.1633, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"eval_loss": 0.3014352321624756, |
|
"eval_runtime": 112.4096, |
|
"eval_samples_per_second": 30.487, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 0.3703997660720999, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 5.3576515151515145e-05, |
|
"loss": 1.1435, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 5.3046212121212116e-05, |
|
"loss": 1.14, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 5.252121212121211e-05, |
|
"loss": 1.1508, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 5.199090909090908e-05, |
|
"loss": 1.1158, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 5.146060606060605e-05, |
|
"loss": 1.1181, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 5.093030303030302e-05, |
|
"loss": 1.1156, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 5.039999999999999e-05, |
|
"loss": 1.1205, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 4.986969696969696e-05, |
|
"loss": 1.1364, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 4.933939393939393e-05, |
|
"loss": 1.1098, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 4.8809090909090903e-05, |
|
"loss": 1.1132, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"eval_loss": 0.2912500500679016, |
|
"eval_runtime": 113.7465, |
|
"eval_samples_per_second": 30.128, |
|
"eval_steps_per_second": 0.949, |
|
"eval_wer": 0.3622540624086219, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"learning_rate": 4.8278787878787874e-05, |
|
"loss": 1.1102, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 4.7748484848484845e-05, |
|
"loss": 1.12, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 4.7218181818181815e-05, |
|
"loss": 1.1068, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 4.6687878787878786e-05, |
|
"loss": 1.1125, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"learning_rate": 4.6157575757575756e-05, |
|
"loss": 1.0941, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 4.562727272727273e-05, |
|
"loss": 1.0938, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"learning_rate": 4.50969696969697e-05, |
|
"loss": 1.071, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"learning_rate": 4.456666666666667e-05, |
|
"loss": 1.0878, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 4.403636363636364e-05, |
|
"loss": 1.0815, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"learning_rate": 4.3506060606060596e-05, |
|
"loss": 1.0888, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"eval_loss": 0.28635460138320923, |
|
"eval_runtime": 111.8452, |
|
"eval_samples_per_second": 30.641, |
|
"eval_steps_per_second": 0.966, |
|
"eval_wer": 0.3498057562972555, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.48, |
|
"learning_rate": 4.2975757575757566e-05, |
|
"loss": 1.0875, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"learning_rate": 4.244545454545454e-05, |
|
"loss": 1.0609, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 22.18, |
|
"learning_rate": 4.191515151515151e-05, |
|
"loss": 1.0796, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 22.54, |
|
"learning_rate": 4.1390151515151515e-05, |
|
"loss": 1.0701, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"learning_rate": 4.0859848484848485e-05, |
|
"loss": 1.0633, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 4.0329545454545456e-05, |
|
"loss": 1.0491, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"learning_rate": 3.9799242424242427e-05, |
|
"loss": 1.0673, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 3.927424242424242e-05, |
|
"loss": 1.0574, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 3.874393939393939e-05, |
|
"loss": 1.0371, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"learning_rate": 3.821363636363636e-05, |
|
"loss": 1.0487, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"eval_loss": 0.28213828802108765, |
|
"eval_runtime": 109.6819, |
|
"eval_samples_per_second": 31.245, |
|
"eval_steps_per_second": 0.985, |
|
"eval_wer": 0.3434562847236727, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.768333333333333e-05, |
|
"loss": 1.0749, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 25.35, |
|
"learning_rate": 3.71530303030303e-05, |
|
"loss": 1.0492, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 25.7, |
|
"learning_rate": 3.662272727272727e-05, |
|
"loss": 1.0189, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 3.609242424242424e-05, |
|
"loss": 1.0428, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 3.556212121212121e-05, |
|
"loss": 1.0372, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 26.76, |
|
"learning_rate": 3.503181818181818e-05, |
|
"loss": 1.0324, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 27.11, |
|
"learning_rate": 3.450151515151515e-05, |
|
"loss": 1.0244, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 27.46, |
|
"learning_rate": 3.397121212121212e-05, |
|
"loss": 1.0217, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 3.344090909090909e-05, |
|
"loss": 1.024, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 3.291060606060606e-05, |
|
"loss": 1.0431, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"eval_loss": 0.2738790810108185, |
|
"eval_runtime": 112.4344, |
|
"eval_samples_per_second": 30.48, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 0.33075734157650694, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"learning_rate": 3.238030303030303e-05, |
|
"loss": 1.0062, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 28.87, |
|
"learning_rate": 3.185e-05, |
|
"loss": 1.0167, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 29.23, |
|
"learning_rate": 3.1319696969696965e-05, |
|
"loss": 1.0051, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 3.0789393939393936e-05, |
|
"loss": 1.015, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 29.93, |
|
"learning_rate": 3.0259090909090907e-05, |
|
"loss": 1.0221, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 30.28, |
|
"learning_rate": 2.9728787878787874e-05, |
|
"loss": 0.9987, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 30.63, |
|
"learning_rate": 2.9198484848484844e-05, |
|
"loss": 1.0008, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"learning_rate": 2.8668181818181815e-05, |
|
"loss": 0.9992, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 31.34, |
|
"learning_rate": 2.8143181818181816e-05, |
|
"loss": 1.0122, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 31.69, |
|
"learning_rate": 2.7612878787878786e-05, |
|
"loss": 0.9896, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 31.69, |
|
"eval_loss": 0.2629273533821106, |
|
"eval_runtime": 110.3673, |
|
"eval_samples_per_second": 31.051, |
|
"eval_steps_per_second": 0.979, |
|
"eval_wer": 0.32432432432432434, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 32.04, |
|
"learning_rate": 2.7082575757575757e-05, |
|
"loss": 0.9994, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 32.39, |
|
"learning_rate": 2.6552272727272727e-05, |
|
"loss": 0.9966, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"learning_rate": 2.6021969696969695e-05, |
|
"loss": 0.9905, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 33.1, |
|
"learning_rate": 2.5491666666666665e-05, |
|
"loss": 0.9819, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 33.45, |
|
"learning_rate": 2.4966666666666666e-05, |
|
"loss": 1.0018, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 33.8, |
|
"learning_rate": 2.4436363636363636e-05, |
|
"loss": 0.9881, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 34.15, |
|
"learning_rate": 2.3906060606060604e-05, |
|
"loss": 0.9939, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 2.3375757575757574e-05, |
|
"loss": 0.9874, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"learning_rate": 2.284545454545454e-05, |
|
"loss": 0.9617, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 2.2315151515151512e-05, |
|
"loss": 0.9839, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"eval_loss": 0.28058063983917236, |
|
"eval_runtime": 111.4323, |
|
"eval_samples_per_second": 30.754, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wer": 0.33079911441580684, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 2.1784848484848483e-05, |
|
"loss": 0.9761, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 35.92, |
|
"learning_rate": 2.1254545454545453e-05, |
|
"loss": 0.9892, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 2.0724242424242424e-05, |
|
"loss": 0.9712, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"learning_rate": 2.0193939393939394e-05, |
|
"loss": 0.9725, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 36.97, |
|
"learning_rate": 1.966363636363636e-05, |
|
"loss": 0.9738, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 37.32, |
|
"learning_rate": 1.9133333333333332e-05, |
|
"loss": 0.9622, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 37.68, |
|
"learning_rate": 1.86030303030303e-05, |
|
"loss": 0.9612, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"learning_rate": 1.807272727272727e-05, |
|
"loss": 0.9645, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 38.38, |
|
"learning_rate": 1.754242424242424e-05, |
|
"loss": 0.9703, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 38.73, |
|
"learning_rate": 1.701212121212121e-05, |
|
"loss": 0.9586, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 38.73, |
|
"eval_loss": 0.26495224237442017, |
|
"eval_runtime": 115.232, |
|
"eval_samples_per_second": 29.74, |
|
"eval_steps_per_second": 0.937, |
|
"eval_wer": 0.32353064037762647, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 39.08, |
|
"learning_rate": 1.6481818181818178e-05, |
|
"loss": 0.9597, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 39.44, |
|
"learning_rate": 1.595151515151515e-05, |
|
"loss": 0.9617, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 39.79, |
|
"learning_rate": 1.542121212121212e-05, |
|
"loss": 0.9615, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 40.14, |
|
"learning_rate": 1.489090909090909e-05, |
|
"loss": 0.9476, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 40.49, |
|
"learning_rate": 1.4360606060606059e-05, |
|
"loss": 0.958, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 40.85, |
|
"learning_rate": 1.3830303030303029e-05, |
|
"loss": 0.9515, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"learning_rate": 1.33e-05, |
|
"loss": 0.9525, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 41.55, |
|
"learning_rate": 1.2769696969696969e-05, |
|
"loss": 0.9484, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"learning_rate": 1.2239393939393937e-05, |
|
"loss": 0.9326, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"learning_rate": 1.1719696969696968e-05, |
|
"loss": 0.9501, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"eval_loss": 0.25854310393333435, |
|
"eval_runtime": 111.5579, |
|
"eval_samples_per_second": 30.719, |
|
"eval_steps_per_second": 0.968, |
|
"eval_wer": 0.3173064873219433, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 1.1189393939393939e-05, |
|
"loss": 0.9395, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"learning_rate": 1.065909090909091e-05, |
|
"loss": 0.9519, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 43.31, |
|
"learning_rate": 1.0128787878787877e-05, |
|
"loss": 0.9445, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 43.66, |
|
"learning_rate": 9.598484848484847e-06, |
|
"loss": 0.9293, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 9.068181818181818e-06, |
|
"loss": 0.9421, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 44.37, |
|
"learning_rate": 8.537878787878787e-06, |
|
"loss": 0.9305, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 44.72, |
|
"learning_rate": 8.007575757575757e-06, |
|
"loss": 0.9305, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 45.07, |
|
"learning_rate": 7.477272727272726e-06, |
|
"loss": 0.9238, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 45.42, |
|
"learning_rate": 6.946969696969697e-06, |
|
"loss": 0.9291, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 45.77, |
|
"learning_rate": 6.416666666666666e-06, |
|
"loss": 0.938, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 45.77, |
|
"eval_loss": 0.2560683786869049, |
|
"eval_runtime": 110.8197, |
|
"eval_samples_per_second": 30.924, |
|
"eval_steps_per_second": 0.975, |
|
"eval_wer": 0.3117089268557584, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 46.13, |
|
"learning_rate": 5.886363636363636e-06, |
|
"loss": 0.9387, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 5.356060606060605e-06, |
|
"loss": 0.9103, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 46.83, |
|
"learning_rate": 4.825757575757576e-06, |
|
"loss": 0.9283, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 47.18, |
|
"learning_rate": 4.295454545454545e-06, |
|
"loss": 0.9282, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 47.54, |
|
"learning_rate": 3.7651515151515147e-06, |
|
"loss": 0.9168, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 47.89, |
|
"learning_rate": 3.2348484848484844e-06, |
|
"loss": 0.925, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 48.24, |
|
"learning_rate": 2.704545454545454e-06, |
|
"loss": 0.928, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 48.59, |
|
"learning_rate": 2.174242424242424e-06, |
|
"loss": 0.9301, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 48.94, |
|
"learning_rate": 1.6439393939393937e-06, |
|
"loss": 0.9404, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 49.3, |
|
"learning_rate": 1.1136363636363635e-06, |
|
"loss": 0.921, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 49.3, |
|
"eval_loss": 0.2559361755847931, |
|
"eval_runtime": 110.0973, |
|
"eval_samples_per_second": 31.127, |
|
"eval_steps_per_second": 0.981, |
|
"eval_wer": 0.31154183549855885, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 49.65, |
|
"learning_rate": 5.833333333333333e-07, |
|
"loss": 0.9243, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 5.303030303030303e-08, |
|
"loss": 0.9223, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 14200, |
|
"total_flos": 5.595667068571877e+19, |
|
"train_loss": 1.318804485697142, |
|
"train_runtime": 22379.119, |
|
"train_samples_per_second": 20.269, |
|
"train_steps_per_second": 0.635 |
|
} |
|
], |
|
"max_steps": 14200, |
|
"num_train_epochs": 50, |
|
"total_flos": 5.595667068571877e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|