ippersky's picture
Training in progress, step 1830, checkpoint
f9a418f verified
raw
history blame
13.6 kB
{
"best_metric": 6.666951123247926,
"best_model_checkpoint": "./whisper-small-accented-zh/checkpoint-1000",
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 1830,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.040983606557377046,
"grad_norm": 10.932804107666016,
"learning_rate": 4.6000000000000004e-07,
"loss": 0.8423,
"step": 25
},
{
"epoch": 0.08196721311475409,
"grad_norm": 7.338128089904785,
"learning_rate": 9.600000000000001e-07,
"loss": 0.7101,
"step": 50
},
{
"epoch": 0.12295081967213115,
"grad_norm": 6.037665843963623,
"learning_rate": 1.46e-06,
"loss": 0.5068,
"step": 75
},
{
"epoch": 0.16393442622950818,
"grad_norm": 5.529111385345459,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.2505,
"step": 100
},
{
"epoch": 0.20491803278688525,
"grad_norm": 5.599390983581543,
"learning_rate": 2.46e-06,
"loss": 0.2319,
"step": 125
},
{
"epoch": 0.2459016393442623,
"grad_norm": 5.434178829193115,
"learning_rate": 2.96e-06,
"loss": 0.2241,
"step": 150
},
{
"epoch": 0.28688524590163933,
"grad_norm": 4.823729515075684,
"learning_rate": 3.46e-06,
"loss": 0.208,
"step": 175
},
{
"epoch": 0.32786885245901637,
"grad_norm": 5.677418231964111,
"learning_rate": 3.96e-06,
"loss": 0.1731,
"step": 200
},
{
"epoch": 0.36885245901639346,
"grad_norm": 5.482035160064697,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.1905,
"step": 225
},
{
"epoch": 0.4098360655737705,
"grad_norm": 5.01705265045166,
"learning_rate": 4.960000000000001e-06,
"loss": 0.19,
"step": 250
},
{
"epoch": 0.45081967213114754,
"grad_norm": 3.9028825759887695,
"learning_rate": 5.460000000000001e-06,
"loss": 0.1717,
"step": 275
},
{
"epoch": 0.4918032786885246,
"grad_norm": 4.761416435241699,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.1947,
"step": 300
},
{
"epoch": 0.5327868852459017,
"grad_norm": 4.570663928985596,
"learning_rate": 6.460000000000001e-06,
"loss": 0.1803,
"step": 325
},
{
"epoch": 0.5737704918032787,
"grad_norm": 4.883725166320801,
"learning_rate": 6.96e-06,
"loss": 0.1702,
"step": 350
},
{
"epoch": 0.6147540983606558,
"grad_norm": 4.104577541351318,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.1602,
"step": 375
},
{
"epoch": 0.6557377049180327,
"grad_norm": 5.41796350479126,
"learning_rate": 7.960000000000002e-06,
"loss": 0.1826,
"step": 400
},
{
"epoch": 0.6967213114754098,
"grad_norm": 4.365445613861084,
"learning_rate": 8.46e-06,
"loss": 0.1737,
"step": 425
},
{
"epoch": 0.7377049180327869,
"grad_norm": 5.060102939605713,
"learning_rate": 8.96e-06,
"loss": 0.1615,
"step": 450
},
{
"epoch": 0.7786885245901639,
"grad_norm": 4.938976764678955,
"learning_rate": 9.460000000000001e-06,
"loss": 0.1554,
"step": 475
},
{
"epoch": 0.819672131147541,
"grad_norm": 4.954484939575195,
"learning_rate": 9.960000000000001e-06,
"loss": 0.1578,
"step": 500
},
{
"epoch": 0.860655737704918,
"grad_norm": 4.614640712738037,
"learning_rate": 9.827067669172933e-06,
"loss": 0.1749,
"step": 525
},
{
"epoch": 0.9016393442622951,
"grad_norm": 4.592267036437988,
"learning_rate": 9.639097744360903e-06,
"loss": 0.1589,
"step": 550
},
{
"epoch": 0.9426229508196722,
"grad_norm": 3.920531988143921,
"learning_rate": 9.451127819548873e-06,
"loss": 0.1547,
"step": 575
},
{
"epoch": 0.9836065573770492,
"grad_norm": 5.515031814575195,
"learning_rate": 9.263157894736842e-06,
"loss": 0.1772,
"step": 600
},
{
"epoch": 1.0245901639344261,
"grad_norm": 3.4223694801330566,
"learning_rate": 9.075187969924812e-06,
"loss": 0.1081,
"step": 625
},
{
"epoch": 1.0655737704918034,
"grad_norm": 3.1705524921417236,
"learning_rate": 8.887218045112783e-06,
"loss": 0.0887,
"step": 650
},
{
"epoch": 1.1065573770491803,
"grad_norm": 3.728119373321533,
"learning_rate": 8.699248120300753e-06,
"loss": 0.0859,
"step": 675
},
{
"epoch": 1.1475409836065573,
"grad_norm": 3.746469497680664,
"learning_rate": 8.511278195488722e-06,
"loss": 0.0917,
"step": 700
},
{
"epoch": 1.1885245901639343,
"grad_norm": 3.7209877967834473,
"learning_rate": 8.323308270676692e-06,
"loss": 0.0918,
"step": 725
},
{
"epoch": 1.2295081967213115,
"grad_norm": 2.609157085418701,
"learning_rate": 8.135338345864663e-06,
"loss": 0.0804,
"step": 750
},
{
"epoch": 1.2704918032786885,
"grad_norm": 2.0212669372558594,
"learning_rate": 7.947368421052633e-06,
"loss": 0.0809,
"step": 775
},
{
"epoch": 1.3114754098360657,
"grad_norm": 3.3288052082061768,
"learning_rate": 7.759398496240602e-06,
"loss": 0.0895,
"step": 800
},
{
"epoch": 1.3524590163934427,
"grad_norm": 3.645484685897827,
"learning_rate": 7.571428571428572e-06,
"loss": 0.083,
"step": 825
},
{
"epoch": 1.3934426229508197,
"grad_norm": 2.8785440921783447,
"learning_rate": 7.3834586466165416e-06,
"loss": 0.0826,
"step": 850
},
{
"epoch": 1.4344262295081966,
"grad_norm": 2.876462697982788,
"learning_rate": 7.195488721804512e-06,
"loss": 0.0871,
"step": 875
},
{
"epoch": 1.4754098360655736,
"grad_norm": 3.863492012023926,
"learning_rate": 7.0075187969924815e-06,
"loss": 0.0974,
"step": 900
},
{
"epoch": 1.5163934426229508,
"grad_norm": 1.6586412191390991,
"learning_rate": 6.819548872180452e-06,
"loss": 0.0775,
"step": 925
},
{
"epoch": 1.5573770491803278,
"grad_norm": 3.3304669857025146,
"learning_rate": 6.631578947368421e-06,
"loss": 0.086,
"step": 950
},
{
"epoch": 1.598360655737705,
"grad_norm": 3.0590872764587402,
"learning_rate": 6.443609022556392e-06,
"loss": 0.082,
"step": 975
},
{
"epoch": 1.639344262295082,
"grad_norm": 3.5884344577789307,
"learning_rate": 6.255639097744361e-06,
"loss": 0.0883,
"step": 1000
},
{
"epoch": 1.639344262295082,
"eval_loss": 0.1597493290901184,
"eval_runtime": 885.7009,
"eval_samples_per_second": 2.753,
"eval_steps_per_second": 0.173,
"eval_wer": 6.666951123247926,
"step": 1000
},
{
"epoch": 1.680327868852459,
"grad_norm": 2.7260067462921143,
"learning_rate": 6.067669172932331e-06,
"loss": 0.0887,
"step": 1025
},
{
"epoch": 1.721311475409836,
"grad_norm": 3.793546438217163,
"learning_rate": 5.879699248120301e-06,
"loss": 0.0805,
"step": 1050
},
{
"epoch": 1.762295081967213,
"grad_norm": 3.374661684036255,
"learning_rate": 5.6917293233082715e-06,
"loss": 0.0815,
"step": 1075
},
{
"epoch": 1.8032786885245902,
"grad_norm": 3.6348347663879395,
"learning_rate": 5.503759398496241e-06,
"loss": 0.0777,
"step": 1100
},
{
"epoch": 1.8442622950819674,
"grad_norm": 3.1936988830566406,
"learning_rate": 5.315789473684211e-06,
"loss": 0.0748,
"step": 1125
},
{
"epoch": 1.8852459016393444,
"grad_norm": 3.0702261924743652,
"learning_rate": 5.12781954887218e-06,
"loss": 0.0869,
"step": 1150
},
{
"epoch": 1.9262295081967213,
"grad_norm": 2.608741283416748,
"learning_rate": 4.9398496240601505e-06,
"loss": 0.0807,
"step": 1175
},
{
"epoch": 1.9672131147540983,
"grad_norm": 3.992398500442505,
"learning_rate": 4.751879699248121e-06,
"loss": 0.0743,
"step": 1200
},
{
"epoch": 2.0081967213114753,
"grad_norm": 2.5503103733062744,
"learning_rate": 4.56390977443609e-06,
"loss": 0.0613,
"step": 1225
},
{
"epoch": 2.0491803278688523,
"grad_norm": 2.326587438583374,
"learning_rate": 4.375939849624061e-06,
"loss": 0.0247,
"step": 1250
},
{
"epoch": 2.0901639344262297,
"grad_norm": 1.7671444416046143,
"learning_rate": 4.18796992481203e-06,
"loss": 0.0306,
"step": 1275
},
{
"epoch": 2.1311475409836067,
"grad_norm": 2.0086865425109863,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0252,
"step": 1300
},
{
"epoch": 2.1721311475409837,
"grad_norm": 0.9309338331222534,
"learning_rate": 3.81203007518797e-06,
"loss": 0.0236,
"step": 1325
},
{
"epoch": 2.2131147540983607,
"grad_norm": 0.9044923782348633,
"learning_rate": 3.6240601503759406e-06,
"loss": 0.027,
"step": 1350
},
{
"epoch": 2.2540983606557377,
"grad_norm": 1.874260425567627,
"learning_rate": 3.43609022556391e-06,
"loss": 0.0298,
"step": 1375
},
{
"epoch": 2.2950819672131146,
"grad_norm": 0.8456152081489563,
"learning_rate": 3.24812030075188e-06,
"loss": 0.0207,
"step": 1400
},
{
"epoch": 2.3360655737704916,
"grad_norm": 1.6556758880615234,
"learning_rate": 3.06015037593985e-06,
"loss": 0.0349,
"step": 1425
},
{
"epoch": 2.3770491803278686,
"grad_norm": 1.3671510219573975,
"learning_rate": 2.8721804511278195e-06,
"loss": 0.0284,
"step": 1450
},
{
"epoch": 2.418032786885246,
"grad_norm": 1.1702046394348145,
"learning_rate": 2.68421052631579e-06,
"loss": 0.024,
"step": 1475
},
{
"epoch": 2.459016393442623,
"grad_norm": 2.4468085765838623,
"learning_rate": 2.4962406015037594e-06,
"loss": 0.0258,
"step": 1500
},
{
"epoch": 2.5,
"grad_norm": 1.201532244682312,
"learning_rate": 2.3082706766917294e-06,
"loss": 0.0254,
"step": 1525
},
{
"epoch": 2.540983606557377,
"grad_norm": 0.9549139738082886,
"learning_rate": 2.1203007518796993e-06,
"loss": 0.0256,
"step": 1550
},
{
"epoch": 2.581967213114754,
"grad_norm": 2.483381509780884,
"learning_rate": 1.9323308270676693e-06,
"loss": 0.0299,
"step": 1575
},
{
"epoch": 2.6229508196721314,
"grad_norm": 1.537619709968567,
"learning_rate": 1.7443609022556392e-06,
"loss": 0.0291,
"step": 1600
},
{
"epoch": 2.663934426229508,
"grad_norm": 1.5355826616287231,
"learning_rate": 1.5563909774436092e-06,
"loss": 0.0269,
"step": 1625
},
{
"epoch": 2.7049180327868854,
"grad_norm": 1.311331033706665,
"learning_rate": 1.3684210526315791e-06,
"loss": 0.0273,
"step": 1650
},
{
"epoch": 2.7459016393442623,
"grad_norm": 0.8623602390289307,
"learning_rate": 1.1804511278195489e-06,
"loss": 0.0219,
"step": 1675
},
{
"epoch": 2.7868852459016393,
"grad_norm": 1.6167572736740112,
"learning_rate": 9.924812030075188e-07,
"loss": 0.0303,
"step": 1700
},
{
"epoch": 2.8278688524590163,
"grad_norm": 2.8278274536132812,
"learning_rate": 8.045112781954888e-07,
"loss": 0.0245,
"step": 1725
},
{
"epoch": 2.8688524590163933,
"grad_norm": 2.3211958408355713,
"learning_rate": 6.165413533834587e-07,
"loss": 0.0241,
"step": 1750
},
{
"epoch": 2.9098360655737707,
"grad_norm": 1.8018407821655273,
"learning_rate": 4.285714285714286e-07,
"loss": 0.022,
"step": 1775
},
{
"epoch": 2.9508196721311473,
"grad_norm": 1.6640270948410034,
"learning_rate": 2.406015037593985e-07,
"loss": 0.025,
"step": 1800
},
{
"epoch": 2.9918032786885247,
"grad_norm": 0.6498773694038391,
"learning_rate": 5.263157894736842e-08,
"loss": 0.0234,
"step": 1825
}
],
"logging_steps": 25,
"max_steps": 1830,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.44025720315904e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}