{ "best_metric": 5.599513085818624, "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-40000", "epoch": 20.0, "eval_steps": 2500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 10.493926048278809, "learning_rate": 9.972e-06, "loss": 4.8718, "step": 2500 }, { "epoch": 1.0, "eval_loss": 3.8609094619750977, "eval_runtime": 650.8319, "eval_samples_per_second": 4.026, "eval_steps_per_second": 0.504, "eval_wer": 111.85902312842362, "step": 2500 }, { "epoch": 2.0, "grad_norm": 6.234286785125732, "learning_rate": 8.575428571428573e-06, "loss": 2.5628, "step": 5000 }, { "epoch": 2.0, "eval_loss": 0.2978440523147583, "eval_runtime": 624.1365, "eval_samples_per_second": 4.198, "eval_steps_per_second": 0.526, "eval_wer": 15.619293974437007, "step": 5000 }, { "epoch": 3.0, "grad_norm": 3.2328641414642334, "learning_rate": 7.146857142857143e-06, "loss": 0.1698, "step": 7500 }, { "epoch": 3.0, "eval_loss": 0.22175125777721405, "eval_runtime": 585.8606, "eval_samples_per_second": 4.472, "eval_steps_per_second": 0.56, "eval_wer": 11.090611685940353, "step": 7500 }, { "epoch": 4.0, "grad_norm": 2.424901247024536, "learning_rate": 5.718285714285715e-06, "loss": 0.0867, "step": 10000 }, { "epoch": 4.0, "eval_loss": 0.2011481672525406, "eval_runtime": 614.9021, "eval_samples_per_second": 4.261, "eval_steps_per_second": 0.533, "eval_wer": 10.189059646987218, "step": 10000 }, { "epoch": 5.0, "grad_norm": 3.77018404006958, "learning_rate": 4.290285714285714e-06, "loss": 0.1697, "step": 12500 }, { "epoch": 5.0, "eval_loss": 0.1640562266111374, "eval_runtime": 597.6823, "eval_samples_per_second": 4.384, "eval_steps_per_second": 0.549, "eval_wer": 8.985088253195373, "step": 12500 }, { "epoch": 6.0, "grad_norm": 2.6757357120513916, "learning_rate": 2.861714285714286e-06, "loss": 0.0993, "step": 15000 }, { "epoch": 6.0, "eval_loss": 0.15530936419963837, "eval_runtime": 585.6998, "eval_samples_per_second": 4.473, "eval_steps_per_second": 0.56, "eval_wer": 7.803940961655509, "step": 15000 }, { "epoch": 7.0, "grad_norm": 2.374549388885498, "learning_rate": 1.434857142857143e-06, "loss": 0.0651, "step": 17500 }, { "epoch": 7.0, "eval_loss": 0.1555396020412445, "eval_runtime": 585.2066, "eval_samples_per_second": 4.477, "eval_steps_per_second": 0.56, "eval_wer": 7.244750456482045, "step": 17500 }, { "epoch": 8.0, "grad_norm": 1.977560043334961, "learning_rate": 6.2857142857142855e-09, "loss": 0.0468, "step": 20000 }, { "epoch": 8.0, "eval_loss": 0.15691713988780975, "eval_runtime": 580.4295, "eval_samples_per_second": 4.514, "eval_steps_per_second": 0.565, "eval_wer": 7.149650030432136, "step": 20000 }, { "epoch": 9.0, "grad_norm": 4.3119354248046875, "learning_rate": 2.731636363636364e-06, "loss": 0.2168, "step": 22500 }, { "epoch": 9.0, "eval_loss": 0.15093587338924408, "eval_runtime": 583.1378, "eval_samples_per_second": 4.493, "eval_steps_per_second": 0.562, "eval_wer": 7.050745587340232, "step": 22500 }, { "epoch": 10.0, "grad_norm": 3.5476253032684326, "learning_rate": 1.822909090909091e-06, "loss": 0.1467, "step": 25000 }, { "epoch": 10.0, "eval_loss": 0.1494235247373581, "eval_runtime": 591.6025, "eval_samples_per_second": 4.429, "eval_steps_per_second": 0.554, "eval_wer": 6.967057212416311, "step": 25000 }, { "epoch": 11.0, "grad_norm": 3.3021419048309326, "learning_rate": 9.141818181818182e-07, "loss": 0.1113, "step": 27500 }, { "epoch": 11.0, "eval_loss": 0.14934100210666656, "eval_runtime": 584.5502, "eval_samples_per_second": 4.482, "eval_steps_per_second": 0.561, "eval_wer": 6.75973828362751, "step": 27500 }, { "epoch": 12.0, "grad_norm": 2.4910900592803955, "learning_rate": 5.090909090909091e-09, "loss": 0.0914, "step": 30000 }, { "epoch": 12.0, "eval_loss": 0.15113578736782074, "eval_runtime": 619.6762, "eval_samples_per_second": 4.228, "eval_steps_per_second": 0.529, "eval_wer": 6.80348447961047, "step": 30000 }, { "epoch": 13.0, "grad_norm": 4.114705562591553, "learning_rate": 2.004e-06, "loss": 0.1946, "step": 32500 }, { "epoch": 13.0, "eval_loss": 0.139073446393013, "eval_runtime": 595.1979, "eval_samples_per_second": 4.402, "eval_steps_per_second": 0.551, "eval_wer": 6.421180766889837, "step": 32500 }, { "epoch": 14.0, "grad_norm": 4.109494686126709, "learning_rate": 1.3378666666666667e-06, "loss": 0.1425, "step": 35000 }, { "epoch": 14.0, "eval_loss": 0.13689054548740387, "eval_runtime": 633.0271, "eval_samples_per_second": 4.139, "eval_steps_per_second": 0.518, "eval_wer": 5.87530432136336, "step": 35000 }, { "epoch": 15.0, "grad_norm": 2.726956367492676, "learning_rate": 6.712e-07, "loss": 0.1145, "step": 37500 }, { "epoch": 15.0, "eval_loss": 0.13682714104652405, "eval_runtime": 601.9865, "eval_samples_per_second": 4.352, "eval_steps_per_second": 0.545, "eval_wer": 5.753575776019477, "step": 37500 }, { "epoch": 16.0, "grad_norm": 3.426710844039917, "learning_rate": 1.1807058823529414e-06, "loss": 0.1776, "step": 40000 }, { "epoch": 16.0, "eval_loss": 0.13023081421852112, "eval_runtime": 578.9166, "eval_samples_per_second": 4.526, "eval_steps_per_second": 0.567, "eval_wer": 5.599513085818624, "step": 40000 }, { "epoch": 17.0, "grad_norm": 3.3029887676239014, "learning_rate": 5.927058823529412e-07, "loss": 0.1416, "step": 42500 }, { "epoch": 17.0, "eval_loss": 0.12984110414981842, "eval_runtime": 624.3393, "eval_samples_per_second": 4.196, "eval_steps_per_second": 0.525, "eval_wer": 5.620435179549604, "step": 42500 }, { "epoch": 18.0, "grad_norm": 3.106375217437744, "learning_rate": 4.705882352941177e-09, "loss": 0.1239, "step": 45000 }, { "epoch": 18.0, "eval_loss": 0.12974976003170013, "eval_runtime": 580.9928, "eval_samples_per_second": 4.51, "eval_steps_per_second": 0.565, "eval_wer": 5.620435179549604, "step": 45000 }, { "epoch": 19.0, "grad_norm": 4.747838973999023, "learning_rate": 5.309473684210527e-07, "loss": 0.3373, "step": 47500 }, { "epoch": 19.0, "eval_loss": 0.13534972071647644, "eval_runtime": 649.9396, "eval_samples_per_second": 4.031, "eval_steps_per_second": 0.505, "eval_wer": 5.740261716372489, "step": 47500 }, { "epoch": 20.0, "grad_norm": 5.997547626495361, "learning_rate": 4.842105263157895e-09, "loss": 0.2785, "step": 50000 }, { "epoch": 20.0, "eval_loss": 0.13221722841262817, "eval_runtime": 634.989, "eval_samples_per_second": 4.126, "eval_steps_per_second": 0.517, "eval_wer": 5.6641813755325625, "step": 50000 }, { "epoch": 20.0, "step": 50000, "total_flos": 3.4664620032e+20, "train_loss": 0.030787386474609376, "train_runtime": 24493.4093, "train_samples_per_second": 40.827, "train_steps_per_second": 2.041 } ], "logging_steps": 2500, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 2500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.4664620032e+20, "train_batch_size": 20, "trial_name": null, "trial_params": null }