|
{ |
|
"best_metric": 6.666951123247926, |
|
"best_model_checkpoint": "./whisper-small-accented-zh/checkpoint-1000", |
|
"epoch": 3.0, |
|
"eval_steps": 1000, |
|
"global_step": 1830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.040983606557377046, |
|
"grad_norm": 10.932804107666016, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 0.8423, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08196721311475409, |
|
"grad_norm": 7.338128089904785, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 0.7101, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12295081967213115, |
|
"grad_norm": 6.037665843963623, |
|
"learning_rate": 1.46e-06, |
|
"loss": 0.5068, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16393442622950818, |
|
"grad_norm": 5.529111385345459, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 0.2505, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20491803278688525, |
|
"grad_norm": 5.599390983581543, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.2319, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2459016393442623, |
|
"grad_norm": 5.434178829193115, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.2241, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28688524590163933, |
|
"grad_norm": 4.823729515075684, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.208, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 5.677418231964111, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.1731, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36885245901639346, |
|
"grad_norm": 5.482035160064697, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.1905, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4098360655737705, |
|
"grad_norm": 5.01705265045166, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.19, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.45081967213114754, |
|
"grad_norm": 3.9028825759887695, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.1717, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 4.761416435241699, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.1947, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5327868852459017, |
|
"grad_norm": 4.570663928985596, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.1803, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5737704918032787, |
|
"grad_norm": 4.883725166320801, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.1702, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6147540983606558, |
|
"grad_norm": 4.104577541351318, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.1602, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 5.41796350479126, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.1826, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6967213114754098, |
|
"grad_norm": 4.365445613861084, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.1737, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7377049180327869, |
|
"grad_norm": 5.060102939605713, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.1615, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7786885245901639, |
|
"grad_norm": 4.938976764678955, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.1554, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 4.954484939575195, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.1578, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.860655737704918, |
|
"grad_norm": 4.614640712738037, |
|
"learning_rate": 9.827067669172933e-06, |
|
"loss": 0.1749, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9016393442622951, |
|
"grad_norm": 4.592267036437988, |
|
"learning_rate": 9.639097744360903e-06, |
|
"loss": 0.1589, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9426229508196722, |
|
"grad_norm": 3.920531988143921, |
|
"learning_rate": 9.451127819548873e-06, |
|
"loss": 0.1547, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 5.515031814575195, |
|
"learning_rate": 9.263157894736842e-06, |
|
"loss": 0.1772, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0245901639344261, |
|
"grad_norm": 3.4223694801330566, |
|
"learning_rate": 9.075187969924812e-06, |
|
"loss": 0.1081, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0655737704918034, |
|
"grad_norm": 3.1705524921417236, |
|
"learning_rate": 8.887218045112783e-06, |
|
"loss": 0.0887, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1065573770491803, |
|
"grad_norm": 3.728119373321533, |
|
"learning_rate": 8.699248120300753e-06, |
|
"loss": 0.0859, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.1475409836065573, |
|
"grad_norm": 3.746469497680664, |
|
"learning_rate": 8.511278195488722e-06, |
|
"loss": 0.0917, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1885245901639343, |
|
"grad_norm": 3.7209877967834473, |
|
"learning_rate": 8.323308270676692e-06, |
|
"loss": 0.0918, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.2295081967213115, |
|
"grad_norm": 2.609157085418701, |
|
"learning_rate": 8.135338345864663e-06, |
|
"loss": 0.0804, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2704918032786885, |
|
"grad_norm": 2.0212669372558594, |
|
"learning_rate": 7.947368421052633e-06, |
|
"loss": 0.0809, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 3.3288052082061768, |
|
"learning_rate": 7.759398496240602e-06, |
|
"loss": 0.0895, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3524590163934427, |
|
"grad_norm": 3.645484685897827, |
|
"learning_rate": 7.571428571428572e-06, |
|
"loss": 0.083, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.3934426229508197, |
|
"grad_norm": 2.8785440921783447, |
|
"learning_rate": 7.3834586466165416e-06, |
|
"loss": 0.0826, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4344262295081966, |
|
"grad_norm": 2.876462697982788, |
|
"learning_rate": 7.195488721804512e-06, |
|
"loss": 0.0871, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.4754098360655736, |
|
"grad_norm": 3.863492012023926, |
|
"learning_rate": 7.0075187969924815e-06, |
|
"loss": 0.0974, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5163934426229508, |
|
"grad_norm": 1.6586412191390991, |
|
"learning_rate": 6.819548872180452e-06, |
|
"loss": 0.0775, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.5573770491803278, |
|
"grad_norm": 3.3304669857025146, |
|
"learning_rate": 6.631578947368421e-06, |
|
"loss": 0.086, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.598360655737705, |
|
"grad_norm": 3.0590872764587402, |
|
"learning_rate": 6.443609022556392e-06, |
|
"loss": 0.082, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 3.5884344577789307, |
|
"learning_rate": 6.255639097744361e-06, |
|
"loss": 0.0883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"eval_loss": 0.1597493290901184, |
|
"eval_runtime": 885.7009, |
|
"eval_samples_per_second": 2.753, |
|
"eval_steps_per_second": 0.173, |
|
"eval_wer": 6.666951123247926, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.680327868852459, |
|
"grad_norm": 2.7260067462921143, |
|
"learning_rate": 6.067669172932331e-06, |
|
"loss": 0.0887, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.721311475409836, |
|
"grad_norm": 3.793546438217163, |
|
"learning_rate": 5.879699248120301e-06, |
|
"loss": 0.0805, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.762295081967213, |
|
"grad_norm": 3.374661684036255, |
|
"learning_rate": 5.6917293233082715e-06, |
|
"loss": 0.0815, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.8032786885245902, |
|
"grad_norm": 3.6348347663879395, |
|
"learning_rate": 5.503759398496241e-06, |
|
"loss": 0.0777, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8442622950819674, |
|
"grad_norm": 3.1936988830566406, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 0.0748, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.8852459016393444, |
|
"grad_norm": 3.0702261924743652, |
|
"learning_rate": 5.12781954887218e-06, |
|
"loss": 0.0869, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.9262295081967213, |
|
"grad_norm": 2.608741283416748, |
|
"learning_rate": 4.9398496240601505e-06, |
|
"loss": 0.0807, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 3.992398500442505, |
|
"learning_rate": 4.751879699248121e-06, |
|
"loss": 0.0743, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0081967213114753, |
|
"grad_norm": 2.5503103733062744, |
|
"learning_rate": 4.56390977443609e-06, |
|
"loss": 0.0613, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.0491803278688523, |
|
"grad_norm": 2.326587438583374, |
|
"learning_rate": 4.375939849624061e-06, |
|
"loss": 0.0247, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.0901639344262297, |
|
"grad_norm": 1.7671444416046143, |
|
"learning_rate": 4.18796992481203e-06, |
|
"loss": 0.0306, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.1311475409836067, |
|
"grad_norm": 2.0086865425109863, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0252, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1721311475409837, |
|
"grad_norm": 0.9309338331222534, |
|
"learning_rate": 3.81203007518797e-06, |
|
"loss": 0.0236, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.2131147540983607, |
|
"grad_norm": 0.9044923782348633, |
|
"learning_rate": 3.6240601503759406e-06, |
|
"loss": 0.027, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.2540983606557377, |
|
"grad_norm": 1.874260425567627, |
|
"learning_rate": 3.43609022556391e-06, |
|
"loss": 0.0298, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.2950819672131146, |
|
"grad_norm": 0.8456152081489563, |
|
"learning_rate": 3.24812030075188e-06, |
|
"loss": 0.0207, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.3360655737704916, |
|
"grad_norm": 1.6556758880615234, |
|
"learning_rate": 3.06015037593985e-06, |
|
"loss": 0.0349, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.3770491803278686, |
|
"grad_norm": 1.3671510219573975, |
|
"learning_rate": 2.8721804511278195e-06, |
|
"loss": 0.0284, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.418032786885246, |
|
"grad_norm": 1.1702046394348145, |
|
"learning_rate": 2.68421052631579e-06, |
|
"loss": 0.024, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.459016393442623, |
|
"grad_norm": 2.4468085765838623, |
|
"learning_rate": 2.4962406015037594e-06, |
|
"loss": 0.0258, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.201532244682312, |
|
"learning_rate": 2.3082706766917294e-06, |
|
"loss": 0.0254, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.540983606557377, |
|
"grad_norm": 0.9549139738082886, |
|
"learning_rate": 2.1203007518796993e-06, |
|
"loss": 0.0256, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.581967213114754, |
|
"grad_norm": 2.483381509780884, |
|
"learning_rate": 1.9323308270676693e-06, |
|
"loss": 0.0299, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.6229508196721314, |
|
"grad_norm": 1.537619709968567, |
|
"learning_rate": 1.7443609022556392e-06, |
|
"loss": 0.0291, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.663934426229508, |
|
"grad_norm": 1.5355826616287231, |
|
"learning_rate": 1.5563909774436092e-06, |
|
"loss": 0.0269, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.7049180327868854, |
|
"grad_norm": 1.311331033706665, |
|
"learning_rate": 1.3684210526315791e-06, |
|
"loss": 0.0273, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.7459016393442623, |
|
"grad_norm": 0.8623602390289307, |
|
"learning_rate": 1.1804511278195489e-06, |
|
"loss": 0.0219, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.7868852459016393, |
|
"grad_norm": 1.6167572736740112, |
|
"learning_rate": 9.924812030075188e-07, |
|
"loss": 0.0303, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.8278688524590163, |
|
"grad_norm": 2.8278274536132812, |
|
"learning_rate": 8.045112781954888e-07, |
|
"loss": 0.0245, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.8688524590163933, |
|
"grad_norm": 2.3211958408355713, |
|
"learning_rate": 6.165413533834587e-07, |
|
"loss": 0.0241, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.9098360655737707, |
|
"grad_norm": 1.8018407821655273, |
|
"learning_rate": 4.285714285714286e-07, |
|
"loss": 0.022, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.9508196721311473, |
|
"grad_norm": 1.6640270948410034, |
|
"learning_rate": 2.406015037593985e-07, |
|
"loss": 0.025, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.9918032786885247, |
|
"grad_norm": 0.6498773694038391, |
|
"learning_rate": 5.263157894736842e-08, |
|
"loss": 0.0234, |
|
"step": 1825 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1830, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.44025720315904e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|