{ "best_metric": 6.666951123247926, "best_model_checkpoint": "./whisper-small-accented-zh/checkpoint-1000", "epoch": 3.0, "eval_steps": 1000, "global_step": 1830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.040983606557377046, "grad_norm": 10.932804107666016, "learning_rate": 4.6000000000000004e-07, "loss": 0.8423, "step": 25 }, { "epoch": 0.08196721311475409, "grad_norm": 7.338128089904785, "learning_rate": 9.600000000000001e-07, "loss": 0.7101, "step": 50 }, { "epoch": 0.12295081967213115, "grad_norm": 6.037665843963623, "learning_rate": 1.46e-06, "loss": 0.5068, "step": 75 }, { "epoch": 0.16393442622950818, "grad_norm": 5.529111385345459, "learning_rate": 1.9600000000000003e-06, "loss": 0.2505, "step": 100 }, { "epoch": 0.20491803278688525, "grad_norm": 5.599390983581543, "learning_rate": 2.46e-06, "loss": 0.2319, "step": 125 }, { "epoch": 0.2459016393442623, "grad_norm": 5.434178829193115, "learning_rate": 2.96e-06, "loss": 0.2241, "step": 150 }, { "epoch": 0.28688524590163933, "grad_norm": 4.823729515075684, "learning_rate": 3.46e-06, "loss": 0.208, "step": 175 }, { "epoch": 0.32786885245901637, "grad_norm": 5.677418231964111, "learning_rate": 3.96e-06, "loss": 0.1731, "step": 200 }, { "epoch": 0.36885245901639346, "grad_norm": 5.482035160064697, "learning_rate": 4.4600000000000005e-06, "loss": 0.1905, "step": 225 }, { "epoch": 0.4098360655737705, "grad_norm": 5.01705265045166, "learning_rate": 4.960000000000001e-06, "loss": 0.19, "step": 250 }, { "epoch": 0.45081967213114754, "grad_norm": 3.9028825759887695, "learning_rate": 5.460000000000001e-06, "loss": 0.1717, "step": 275 }, { "epoch": 0.4918032786885246, "grad_norm": 4.761416435241699, "learning_rate": 5.9600000000000005e-06, "loss": 0.1947, "step": 300 }, { "epoch": 0.5327868852459017, "grad_norm": 4.570663928985596, "learning_rate": 6.460000000000001e-06, "loss": 0.1803, "step": 325 }, { "epoch": 0.5737704918032787, "grad_norm": 4.883725166320801, "learning_rate": 6.96e-06, "loss": 0.1702, "step": 350 }, { "epoch": 0.6147540983606558, "grad_norm": 4.104577541351318, "learning_rate": 7.4600000000000006e-06, "loss": 0.1602, "step": 375 }, { "epoch": 0.6557377049180327, "grad_norm": 5.41796350479126, "learning_rate": 7.960000000000002e-06, "loss": 0.1826, "step": 400 }, { "epoch": 0.6967213114754098, "grad_norm": 4.365445613861084, "learning_rate": 8.46e-06, "loss": 0.1737, "step": 425 }, { "epoch": 0.7377049180327869, "grad_norm": 5.060102939605713, "learning_rate": 8.96e-06, "loss": 0.1615, "step": 450 }, { "epoch": 0.7786885245901639, "grad_norm": 4.938976764678955, "learning_rate": 9.460000000000001e-06, "loss": 0.1554, "step": 475 }, { "epoch": 0.819672131147541, "grad_norm": 4.954484939575195, "learning_rate": 9.960000000000001e-06, "loss": 0.1578, "step": 500 }, { "epoch": 0.860655737704918, "grad_norm": 4.614640712738037, "learning_rate": 9.827067669172933e-06, "loss": 0.1749, "step": 525 }, { "epoch": 0.9016393442622951, "grad_norm": 4.592267036437988, "learning_rate": 9.639097744360903e-06, "loss": 0.1589, "step": 550 }, { "epoch": 0.9426229508196722, "grad_norm": 3.920531988143921, "learning_rate": 9.451127819548873e-06, "loss": 0.1547, "step": 575 }, { "epoch": 0.9836065573770492, "grad_norm": 5.515031814575195, "learning_rate": 9.263157894736842e-06, "loss": 0.1772, "step": 600 }, { "epoch": 1.0245901639344261, "grad_norm": 3.4223694801330566, "learning_rate": 9.075187969924812e-06, "loss": 0.1081, "step": 625 }, { "epoch": 1.0655737704918034, "grad_norm": 3.1705524921417236, "learning_rate": 8.887218045112783e-06, "loss": 0.0887, "step": 650 }, { "epoch": 1.1065573770491803, "grad_norm": 3.728119373321533, "learning_rate": 8.699248120300753e-06, "loss": 0.0859, "step": 675 }, { "epoch": 1.1475409836065573, "grad_norm": 3.746469497680664, "learning_rate": 8.511278195488722e-06, "loss": 0.0917, "step": 700 }, { "epoch": 1.1885245901639343, "grad_norm": 3.7209877967834473, "learning_rate": 8.323308270676692e-06, "loss": 0.0918, "step": 725 }, { "epoch": 1.2295081967213115, "grad_norm": 2.609157085418701, "learning_rate": 8.135338345864663e-06, "loss": 0.0804, "step": 750 }, { "epoch": 1.2704918032786885, "grad_norm": 2.0212669372558594, "learning_rate": 7.947368421052633e-06, "loss": 0.0809, "step": 775 }, { "epoch": 1.3114754098360657, "grad_norm": 3.3288052082061768, "learning_rate": 7.759398496240602e-06, "loss": 0.0895, "step": 800 }, { "epoch": 1.3524590163934427, "grad_norm": 3.645484685897827, "learning_rate": 7.571428571428572e-06, "loss": 0.083, "step": 825 }, { "epoch": 1.3934426229508197, "grad_norm": 2.8785440921783447, "learning_rate": 7.3834586466165416e-06, "loss": 0.0826, "step": 850 }, { "epoch": 1.4344262295081966, "grad_norm": 2.876462697982788, "learning_rate": 7.195488721804512e-06, "loss": 0.0871, "step": 875 }, { "epoch": 1.4754098360655736, "grad_norm": 3.863492012023926, "learning_rate": 7.0075187969924815e-06, "loss": 0.0974, "step": 900 }, { "epoch": 1.5163934426229508, "grad_norm": 1.6586412191390991, "learning_rate": 6.819548872180452e-06, "loss": 0.0775, "step": 925 }, { "epoch": 1.5573770491803278, "grad_norm": 3.3304669857025146, "learning_rate": 6.631578947368421e-06, "loss": 0.086, "step": 950 }, { "epoch": 1.598360655737705, "grad_norm": 3.0590872764587402, "learning_rate": 6.443609022556392e-06, "loss": 0.082, "step": 975 }, { "epoch": 1.639344262295082, "grad_norm": 3.5884344577789307, "learning_rate": 6.255639097744361e-06, "loss": 0.0883, "step": 1000 }, { "epoch": 1.639344262295082, "eval_loss": 0.1597493290901184, "eval_runtime": 885.7009, "eval_samples_per_second": 2.753, "eval_steps_per_second": 0.173, "eval_wer": 6.666951123247926, "step": 1000 }, { "epoch": 1.680327868852459, "grad_norm": 2.7260067462921143, "learning_rate": 6.067669172932331e-06, "loss": 0.0887, "step": 1025 }, { "epoch": 1.721311475409836, "grad_norm": 3.793546438217163, "learning_rate": 5.879699248120301e-06, "loss": 0.0805, "step": 1050 }, { "epoch": 1.762295081967213, "grad_norm": 3.374661684036255, "learning_rate": 5.6917293233082715e-06, "loss": 0.0815, "step": 1075 }, { "epoch": 1.8032786885245902, "grad_norm": 3.6348347663879395, "learning_rate": 5.503759398496241e-06, "loss": 0.0777, "step": 1100 }, { "epoch": 1.8442622950819674, "grad_norm": 3.1936988830566406, "learning_rate": 5.315789473684211e-06, "loss": 0.0748, "step": 1125 }, { "epoch": 1.8852459016393444, "grad_norm": 3.0702261924743652, "learning_rate": 5.12781954887218e-06, "loss": 0.0869, "step": 1150 }, { "epoch": 1.9262295081967213, "grad_norm": 2.608741283416748, "learning_rate": 4.9398496240601505e-06, "loss": 0.0807, "step": 1175 }, { "epoch": 1.9672131147540983, "grad_norm": 3.992398500442505, "learning_rate": 4.751879699248121e-06, "loss": 0.0743, "step": 1200 }, { "epoch": 2.0081967213114753, "grad_norm": 2.5503103733062744, "learning_rate": 4.56390977443609e-06, "loss": 0.0613, "step": 1225 }, { "epoch": 2.0491803278688523, "grad_norm": 2.326587438583374, "learning_rate": 4.375939849624061e-06, "loss": 0.0247, "step": 1250 }, { "epoch": 2.0901639344262297, "grad_norm": 1.7671444416046143, "learning_rate": 4.18796992481203e-06, "loss": 0.0306, "step": 1275 }, { "epoch": 2.1311475409836067, "grad_norm": 2.0086865425109863, "learning_rate": 4.000000000000001e-06, "loss": 0.0252, "step": 1300 }, { "epoch": 2.1721311475409837, "grad_norm": 0.9309338331222534, "learning_rate": 3.81203007518797e-06, "loss": 0.0236, "step": 1325 }, { "epoch": 2.2131147540983607, "grad_norm": 0.9044923782348633, "learning_rate": 3.6240601503759406e-06, "loss": 0.027, "step": 1350 }, { "epoch": 2.2540983606557377, "grad_norm": 1.874260425567627, "learning_rate": 3.43609022556391e-06, "loss": 0.0298, "step": 1375 }, { "epoch": 2.2950819672131146, "grad_norm": 0.8456152081489563, "learning_rate": 3.24812030075188e-06, "loss": 0.0207, "step": 1400 }, { "epoch": 2.3360655737704916, "grad_norm": 1.6556758880615234, "learning_rate": 3.06015037593985e-06, "loss": 0.0349, "step": 1425 }, { "epoch": 2.3770491803278686, "grad_norm": 1.3671510219573975, "learning_rate": 2.8721804511278195e-06, "loss": 0.0284, "step": 1450 }, { "epoch": 2.418032786885246, "grad_norm": 1.1702046394348145, "learning_rate": 2.68421052631579e-06, "loss": 0.024, "step": 1475 }, { "epoch": 2.459016393442623, "grad_norm": 2.4468085765838623, "learning_rate": 2.4962406015037594e-06, "loss": 0.0258, "step": 1500 }, { "epoch": 2.5, "grad_norm": 1.201532244682312, "learning_rate": 2.3082706766917294e-06, "loss": 0.0254, "step": 1525 }, { "epoch": 2.540983606557377, "grad_norm": 0.9549139738082886, "learning_rate": 2.1203007518796993e-06, "loss": 0.0256, "step": 1550 }, { "epoch": 2.581967213114754, "grad_norm": 2.483381509780884, "learning_rate": 1.9323308270676693e-06, "loss": 0.0299, "step": 1575 }, { "epoch": 2.6229508196721314, "grad_norm": 1.537619709968567, "learning_rate": 1.7443609022556392e-06, "loss": 0.0291, "step": 1600 }, { "epoch": 2.663934426229508, "grad_norm": 1.5355826616287231, "learning_rate": 1.5563909774436092e-06, "loss": 0.0269, "step": 1625 }, { "epoch": 2.7049180327868854, "grad_norm": 1.311331033706665, "learning_rate": 1.3684210526315791e-06, "loss": 0.0273, "step": 1650 }, { "epoch": 2.7459016393442623, "grad_norm": 0.8623602390289307, "learning_rate": 1.1804511278195489e-06, "loss": 0.0219, "step": 1675 }, { "epoch": 2.7868852459016393, "grad_norm": 1.6167572736740112, "learning_rate": 9.924812030075188e-07, "loss": 0.0303, "step": 1700 }, { "epoch": 2.8278688524590163, "grad_norm": 2.8278274536132812, "learning_rate": 8.045112781954888e-07, "loss": 0.0245, "step": 1725 }, { "epoch": 2.8688524590163933, "grad_norm": 2.3211958408355713, "learning_rate": 6.165413533834587e-07, "loss": 0.0241, "step": 1750 }, { "epoch": 2.9098360655737707, "grad_norm": 1.8018407821655273, "learning_rate": 4.285714285714286e-07, "loss": 0.022, "step": 1775 }, { "epoch": 2.9508196721311473, "grad_norm": 1.6640270948410034, "learning_rate": 2.406015037593985e-07, "loss": 0.025, "step": 1800 }, { "epoch": 2.9918032786885247, "grad_norm": 0.6498773694038391, "learning_rate": 5.263157894736842e-08, "loss": 0.0234, "step": 1825 } ], "logging_steps": 25, "max_steps": 1830, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.44025720315904e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }