{ "best_metric": 0.3207731246948242, "best_model_checkpoint": "./Hubert-common_voice-phonemes-debug/checkpoint-4500", "epoch": 30.0, "eval_steps": 100, "global_step": 11280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26595744680851063, "eval_cer": 1.829171076931253, "eval_loss": 18.536373138427734, "eval_runtime": 213.5728, "eval_samples_per_second": 23.219, "eval_steps_per_second": 2.903, "eval_wer": 1.0645295587010823, "step": 100 }, { "epoch": 0.5319148936170213, "eval_cer": 0.9812975256754901, "eval_loss": 8.279097557067871, "eval_runtime": 218.6029, "eval_samples_per_second": 22.685, "eval_steps_per_second": 2.836, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.7978723404255319, "eval_cer": 0.9813063831106427, "eval_loss": 7.022369384765625, "eval_runtime": 215.2791, "eval_samples_per_second": 23.035, "eval_steps_per_second": 2.88, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.0638297872340425, "eval_cer": 0.9813019543930664, "eval_loss": 6.310593128204346, "eval_runtime": 219.0435, "eval_samples_per_second": 22.639, "eval_steps_per_second": 2.83, "eval_wer": 1.0, "step": 400 }, { "epoch": 1.3297872340425532, "grad_norm": 28.102890014648438, "learning_rate": 1.1903999999999998e-05, "loss": 8.9892, "step": 500 }, { "epoch": 1.3297872340425532, "eval_cer": 0.9813063831106427, "eval_loss": 5.522346496582031, "eval_runtime": 213.327, "eval_samples_per_second": 23.246, "eval_steps_per_second": 2.906, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.5957446808510638, "eval_cer": 0.9813108118282189, "eval_loss": 4.712064266204834, "eval_runtime": 215.4766, "eval_samples_per_second": 23.014, "eval_steps_per_second": 2.877, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.8617021276595744, "eval_cer": 0.9812975256754901, "eval_loss": 4.002798080444336, "eval_runtime": 215.7516, "eval_samples_per_second": 22.985, "eval_steps_per_second": 2.874, "eval_wer": 1.0, "step": 700 }, { "epoch": 2.127659574468085, "eval_cer": 0.9813108118282189, "eval_loss": 3.475456953048706, "eval_runtime": 214.2882, "eval_samples_per_second": 23.142, "eval_steps_per_second": 2.893, "eval_wer": 1.0, "step": 800 }, { "epoch": 2.393617021276596, "eval_cer": 0.9813019543930664, "eval_loss": 3.1987791061401367, "eval_runtime": 216.1589, "eval_samples_per_second": 22.941, "eval_steps_per_second": 2.868, "eval_wer": 1.0, "step": 900 }, { "epoch": 2.6595744680851063, "grad_norm": 2.371589422225952, "learning_rate": 2.3903999999999997e-05, "loss": 3.7187, "step": 1000 }, { "epoch": 2.6595744680851063, "eval_cer": 0.9813108118282189, "eval_loss": 3.0792152881622314, "eval_runtime": 213.8516, "eval_samples_per_second": 23.189, "eval_steps_per_second": 2.899, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.925531914893617, "eval_cer": 0.9812930969579139, "eval_loss": 3.0459136962890625, "eval_runtime": 216.2299, "eval_samples_per_second": 22.934, "eval_steps_per_second": 2.867, "eval_wer": 1.0, "step": 1100 }, { "epoch": 3.1914893617021276, "eval_cer": 0.9813152405457951, "eval_loss": 3.035966396331787, "eval_runtime": 217.4243, "eval_samples_per_second": 22.808, "eval_steps_per_second": 2.852, "eval_wer": 1.0, "step": 1200 }, { "epoch": 3.4574468085106385, "eval_cer": 0.9813108118282189, "eval_loss": 3.008437395095825, "eval_runtime": 215.1294, "eval_samples_per_second": 23.051, "eval_steps_per_second": 2.882, "eval_wer": 1.0, "step": 1300 }, { "epoch": 3.723404255319149, "eval_cer": 0.9343265470617673, "eval_loss": 2.495579481124878, "eval_runtime": 217.105, "eval_samples_per_second": 22.841, "eval_steps_per_second": 2.856, "eval_wer": 1.0, "step": 1400 }, { "epoch": 3.9893617021276597, "grad_norm": 3.707914352416992, "learning_rate": 3.5903999999999994e-05, "loss": 2.783, "step": 1500 }, { "epoch": 3.9893617021276597, "eval_cer": 0.33314142223836246, "eval_loss": 1.4417579174041748, "eval_runtime": 217.5545, "eval_samples_per_second": 22.794, "eval_steps_per_second": 2.85, "eval_wer": 1.0, "step": 1500 }, { "epoch": 4.25531914893617, "eval_cer": 0.27530679941009484, "eval_loss": 1.0227618217468262, "eval_runtime": 217.2546, "eval_samples_per_second": 22.826, "eval_steps_per_second": 2.854, "eval_wer": 1.0, "step": 1600 }, { "epoch": 4.5212765957446805, "eval_cer": 0.2531632115288376, "eval_loss": 0.8218135237693787, "eval_runtime": 219.2044, "eval_samples_per_second": 22.623, "eval_steps_per_second": 2.828, "eval_wer": 1.0, "step": 1700 }, { "epoch": 4.787234042553192, "eval_cer": 0.24330045748652562, "eval_loss": 0.7084201574325562, "eval_runtime": 216.8198, "eval_samples_per_second": 22.872, "eval_steps_per_second": 2.86, "eval_wer": 1.0, "step": 1800 }, { "epoch": 5.053191489361702, "eval_cer": 0.23373885623939875, "eval_loss": 0.6305696368217468, "eval_runtime": 218.0682, "eval_samples_per_second": 22.741, "eval_steps_per_second": 2.843, "eval_wer": 1.0, "step": 1900 }, { "epoch": 5.319148936170213, "grad_norm": 2.717346429824829, "learning_rate": 4.7903999999999994e-05, "loss": 0.8659, "step": 2000 }, { "epoch": 5.319148936170213, "eval_cer": 0.23099305134212286, "eval_loss": 0.5933591723442078, "eval_runtime": 218.7287, "eval_samples_per_second": 22.672, "eval_steps_per_second": 2.835, "eval_wer": 1.0, "step": 2000 }, { "epoch": 5.585106382978723, "eval_cer": 0.228406680277592, "eval_loss": 0.5647820234298706, "eval_runtime": 215.722, "eval_samples_per_second": 22.988, "eval_steps_per_second": 2.874, "eval_wer": 1.0, "step": 2100 }, { "epoch": 5.851063829787234, "eval_cer": 0.22140930650711474, "eval_loss": 0.5329573750495911, "eval_runtime": 218.357, "eval_samples_per_second": 22.711, "eval_steps_per_second": 2.839, "eval_wer": 1.0, "step": 2200 }, { "epoch": 6.117021276595745, "eval_cer": 0.22092214757372708, "eval_loss": 0.5139152407646179, "eval_runtime": 217.5924, "eval_samples_per_second": 22.79, "eval_steps_per_second": 2.849, "eval_wer": 1.0, "step": 2300 }, { "epoch": 6.382978723404255, "eval_cer": 0.21591769671256295, "eval_loss": 0.4906846582889557, "eval_runtime": 217.5534, "eval_samples_per_second": 22.794, "eval_steps_per_second": 2.85, "eval_wer": 1.0, "step": 2400 }, { "epoch": 6.648936170212766, "grad_norm": 6.231712341308594, "learning_rate": 5.9903999999999994e-05, "loss": 0.5271, "step": 2500 }, { "epoch": 6.648936170212766, "eval_cer": 0.2159929849113592, "eval_loss": 0.46404561400413513, "eval_runtime": 217.7416, "eval_samples_per_second": 22.775, "eval_steps_per_second": 2.847, "eval_wer": 1.0, "step": 2500 }, { "epoch": 6.914893617021277, "eval_cer": 0.21120111249385515, "eval_loss": 0.4608772397041321, "eval_runtime": 216.2932, "eval_samples_per_second": 22.927, "eval_steps_per_second": 2.866, "eval_wer": 1.0, "step": 2600 }, { "epoch": 7.180851063829787, "eval_cer": 0.2096732049300484, "eval_loss": 0.4550062417984009, "eval_runtime": 219.5448, "eval_samples_per_second": 22.588, "eval_steps_per_second": 2.824, "eval_wer": 1.0001387732445184, "step": 2700 }, { "epoch": 7.446808510638298, "eval_cer": 0.210000930030691, "eval_loss": 0.4601040184497833, "eval_runtime": 218.3231, "eval_samples_per_second": 22.714, "eval_steps_per_second": 2.84, "eval_wer": 0.9991673605328892, "step": 2800 }, { "epoch": 7.712765957446808, "eval_cer": 0.20508948223862816, "eval_loss": 0.4290345013141632, "eval_runtime": 218.312, "eval_samples_per_second": 22.715, "eval_steps_per_second": 2.84, "eval_wer": 0.9952817096863724, "step": 2900 }, { "epoch": 7.9787234042553195, "grad_norm": 2.573235273361206, "learning_rate": 7.1904e-05, "loss": 0.4244, "step": 3000 }, { "epoch": 7.9787234042553195, "eval_cer": 0.20240125066984355, "eval_loss": 0.42562517523765564, "eval_runtime": 219.9498, "eval_samples_per_second": 22.546, "eval_steps_per_second": 2.819, "eval_wer": 0.9970857618651124, "step": 3000 }, { "epoch": 8.24468085106383, "eval_cer": 0.2014180753679157, "eval_loss": 0.4134505093097687, "eval_runtime": 218.0911, "eval_samples_per_second": 22.738, "eval_steps_per_second": 2.843, "eval_wer": 0.9998612267554815, "step": 3100 }, { "epoch": 8.51063829787234, "eval_cer": 0.1998945965216852, "eval_loss": 0.41247421503067017, "eval_runtime": 220.4438, "eval_samples_per_second": 22.496, "eval_steps_per_second": 2.813, "eval_wer": 0.9955592561754094, "step": 3200 }, { "epoch": 8.77659574468085, "eval_cer": 0.1926979304602766, "eval_loss": 0.3886178731918335, "eval_runtime": 219.8802, "eval_samples_per_second": 22.553, "eval_steps_per_second": 2.82, "eval_wer": 0.9941715237302248, "step": 3300 }, { "epoch": 9.042553191489361, "eval_cer": 0.1911124495679786, "eval_loss": 0.38326719403266907, "eval_runtime": 219.6869, "eval_samples_per_second": 22.573, "eval_steps_per_second": 2.822, "eval_wer": 1.0005550929780738, "step": 3400 }, { "epoch": 9.308510638297872, "grad_norm": 2.091050148010254, "learning_rate": 8.390399999999999e-05, "loss": 0.3373, "step": 3500 }, { "epoch": 9.308510638297872, "eval_cer": 0.1886855123361928, "eval_loss": 0.36107370257377625, "eval_runtime": 222.4868, "eval_samples_per_second": 22.289, "eval_steps_per_second": 2.787, "eval_wer": 1.0363585900638357, "step": 3500 }, { "epoch": 9.574468085106384, "eval_cer": 0.18428336706539888, "eval_loss": 0.3584689497947693, "eval_runtime": 217.5818, "eval_samples_per_second": 22.791, "eval_steps_per_second": 2.85, "eval_wer": 1.0080488481820704, "step": 3600 }, { "epoch": 9.840425531914894, "eval_cer": 0.18549683568129177, "eval_loss": 0.3562237024307251, "eval_runtime": 221.8447, "eval_samples_per_second": 22.353, "eval_steps_per_second": 2.795, "eval_wer": 0.9980571745767416, "step": 3700 }, { "epoch": 10.106382978723405, "eval_cer": 0.1799387951230962, "eval_loss": 0.3412463068962097, "eval_runtime": 217.8756, "eval_samples_per_second": 22.761, "eval_steps_per_second": 2.846, "eval_wer": 0.9883430474604497, "step": 3800 }, { "epoch": 10.372340425531915, "eval_cer": 0.18462437831877024, "eval_loss": 0.35612642765045166, "eval_runtime": 218.9988, "eval_samples_per_second": 22.644, "eval_steps_per_second": 2.831, "eval_wer": 0.9834859839023037, "step": 3900 }, { "epoch": 10.638297872340425, "grad_norm": 2.690404176712036, "learning_rate": 9.5904e-05, "loss": 0.2779, "step": 4000 }, { "epoch": 10.638297872340425, "eval_cer": 0.17977050385519866, "eval_loss": 0.34815147519111633, "eval_runtime": 221.1151, "eval_samples_per_second": 22.427, "eval_steps_per_second": 2.804, "eval_wer": 0.9772411878989731, "step": 4000 }, { "epoch": 10.904255319148936, "eval_cer": 0.17927891620423475, "eval_loss": 0.3266104757785797, "eval_runtime": 217.4603, "eval_samples_per_second": 22.804, "eval_steps_per_second": 2.851, "eval_wer": 0.9794615598112684, "step": 4100 }, { "epoch": 11.170212765957446, "eval_cer": 0.17892019008055837, "eval_loss": 0.34835320711135864, "eval_runtime": 220.4292, "eval_samples_per_second": 22.497, "eval_steps_per_second": 2.813, "eval_wer": 0.9791840133222315, "step": 4200 }, { "epoch": 11.436170212765958, "eval_cer": 0.17989007922975744, "eval_loss": 0.33775731921195984, "eval_runtime": 218.1639, "eval_samples_per_second": 22.731, "eval_steps_per_second": 2.842, "eval_wer": 0.9991673605328892, "step": 4300 }, { "epoch": 11.702127659574469, "eval_cer": 0.17952249567092857, "eval_loss": 0.33295243978500366, "eval_runtime": 218.1937, "eval_samples_per_second": 22.728, "eval_steps_per_second": 2.842, "eval_wer": 0.9764085484318623, "step": 4400 }, { "epoch": 11.96808510638298, "grad_norm": 2.2231483459472656, "learning_rate": 0.00010790399999999999, "loss": 0.2409, "step": 4500 }, { "epoch": 11.96808510638298, "eval_cer": 0.17923020031089598, "eval_loss": 0.3207731246948242, "eval_runtime": 218.6698, "eval_samples_per_second": 22.678, "eval_steps_per_second": 2.835, "eval_wer": 0.9780738273660838, "step": 4500 }, { "epoch": 12.23404255319149, "eval_cer": 0.18048352738497514, "eval_loss": 0.3602401614189148, "eval_runtime": 217.2746, "eval_samples_per_second": 22.824, "eval_steps_per_second": 2.854, "eval_wer": 0.97571468220927, "step": 4600 }, { "epoch": 12.5, "eval_cer": 0.17876961368296582, "eval_loss": 0.3362875282764435, "eval_runtime": 219.2626, "eval_samples_per_second": 22.617, "eval_steps_per_second": 2.828, "eval_wer": 0.9938939772411879, "step": 4700 }, { "epoch": 12.76595744680851, "eval_cer": 0.17953135310608107, "eval_loss": 0.32531023025512695, "eval_runtime": 217.8058, "eval_samples_per_second": 22.768, "eval_steps_per_second": 2.847, "eval_wer": 0.9732167638079379, "step": 4800 }, { "epoch": 13.03191489361702, "eval_cer": 0.17615224159540122, "eval_loss": 0.3284818232059479, "eval_runtime": 218.6415, "eval_samples_per_second": 22.681, "eval_steps_per_second": 2.836, "eval_wer": 0.971135165140161, "step": 4900 }, { "epoch": 13.297872340425531, "grad_norm": 2.2732036113739014, "learning_rate": 0.00011990399999999998, "loss": 0.2104, "step": 5000 }, { "epoch": 13.297872340425531, "eval_cer": 0.17691398101851646, "eval_loss": 0.32327184081077576, "eval_runtime": 218.7534, "eval_samples_per_second": 22.669, "eval_steps_per_second": 2.834, "eval_wer": 0.9729392173189009, "step": 5000 }, { "epoch": 13.563829787234042, "eval_cer": 0.18268017130279585, "eval_loss": 0.3362744450569153, "eval_runtime": 216.3239, "eval_samples_per_second": 22.924, "eval_steps_per_second": 2.866, "eval_wer": 0.97751873438801, "step": 5100 }, { "epoch": 13.829787234042554, "eval_cer": 0.17587766110567363, "eval_loss": 0.33709821105003357, "eval_runtime": 219.8171, "eval_samples_per_second": 22.56, "eval_steps_per_second": 2.821, "eval_wer": 0.9683597002497918, "step": 5200 }, { "epoch": 14.095744680851064, "eval_cer": 0.17777315222830925, "eval_loss": 0.3463989496231079, "eval_runtime": 216.1817, "eval_samples_per_second": 22.939, "eval_steps_per_second": 2.868, "eval_wer": 0.9730779905634194, "step": 5300 }, { "epoch": 14.361702127659575, "eval_cer": 0.17830459833745943, "eval_loss": 0.3450472950935364, "eval_runtime": 217.8272, "eval_samples_per_second": 22.766, "eval_steps_per_second": 2.846, "eval_wer": 0.9776575076325285, "step": 5400 }, { "epoch": 14.627659574468085, "grad_norm": 3.842231273651123, "learning_rate": 0.000131904, "loss": 0.1947, "step": 5500 }, { "epoch": 14.627659574468085, "eval_cer": 0.1773037081652266, "eval_loss": 0.3442366421222687, "eval_runtime": 219.014, "eval_samples_per_second": 22.642, "eval_steps_per_second": 2.831, "eval_wer": 0.9680821537607549, "step": 5500 }, { "epoch": 14.893617021276595, "eval_cer": 0.17800344554227432, "eval_loss": 0.3346150517463684, "eval_runtime": 216.342, "eval_samples_per_second": 22.922, "eval_steps_per_second": 2.866, "eval_wer": 0.9858451290591174, "step": 5600 }, { "epoch": 15.159574468085106, "eval_cer": 0.17705569998095652, "eval_loss": 0.35239535570144653, "eval_runtime": 219.7446, "eval_samples_per_second": 22.567, "eval_steps_per_second": 2.821, "eval_wer": 0.9732167638079379, "step": 5700 }, { "epoch": 15.425531914893616, "eval_cer": 0.17744542712766664, "eval_loss": 0.3413674235343933, "eval_runtime": 216.7948, "eval_samples_per_second": 22.874, "eval_steps_per_second": 2.86, "eval_wer": 0.9782126006106022, "step": 5800 }, { "epoch": 15.691489361702128, "eval_cer": 0.1765641123299926, "eval_loss": 0.3437710702419281, "eval_runtime": 217.8111, "eval_samples_per_second": 22.767, "eval_steps_per_second": 2.847, "eval_wer": 1.0019428254232583, "step": 5900 }, { "epoch": 15.957446808510639, "grad_norm": 1.6587754487991333, "learning_rate": 0.00014390399999999998, "loss": 0.1892, "step": 6000 }, { "epoch": 15.957446808510639, "eval_cer": 0.18023551920070505, "eval_loss": 0.3391115367412567, "eval_runtime": 220.8887, "eval_samples_per_second": 22.45, "eval_steps_per_second": 2.807, "eval_wer": 0.9705800721620872, "step": 6000 }, { "epoch": 16.22340425531915, "eval_cer": 0.18025766278858632, "eval_loss": 0.35049712657928467, "eval_runtime": 218.4181, "eval_samples_per_second": 22.704, "eval_steps_per_second": 2.839, "eval_wer": 0.9782126006106022, "step": 6100 }, { "epoch": 16.48936170212766, "eval_cer": 0.17667040155182265, "eval_loss": 0.34669336676597595, "eval_runtime": 220.5862, "eval_samples_per_second": 22.481, "eval_steps_per_second": 2.811, "eval_wer": 0.9736330835414932, "step": 6200 }, { "epoch": 16.75531914893617, "eval_cer": 0.17923462902847223, "eval_loss": 0.36808422207832336, "eval_runtime": 219.3778, "eval_samples_per_second": 22.605, "eval_steps_per_second": 2.826, "eval_wer": 0.9945878434637802, "step": 6300 }, { "epoch": 17.02127659574468, "eval_cer": 0.17693612460639774, "eval_loss": 0.35571783781051636, "eval_runtime": 218.8646, "eval_samples_per_second": 22.658, "eval_steps_per_second": 2.833, "eval_wer": 1.0104079933388843, "step": 6400 }, { "epoch": 17.28723404255319, "grad_norm": 1.837514877319336, "learning_rate": 0.000155904, "loss": 0.1749, "step": 6500 }, { "epoch": 17.28723404255319, "eval_cer": 0.17874747009508457, "eval_loss": 0.3445756435394287, "eval_runtime": 220.0286, "eval_samples_per_second": 22.538, "eval_steps_per_second": 2.818, "eval_wer": 0.9769636414099362, "step": 6500 }, { "epoch": 17.5531914893617, "eval_cer": 0.18034180842253508, "eval_loss": 0.34961050748825073, "eval_runtime": 216.6863, "eval_samples_per_second": 22.886, "eval_steps_per_second": 2.861, "eval_wer": 0.983902303635859, "step": 6600 }, { "epoch": 17.819148936170212, "eval_cer": 0.18056324430134765, "eval_loss": 0.3584659695625305, "eval_runtime": 217.6982, "eval_samples_per_second": 22.779, "eval_steps_per_second": 2.848, "eval_wer": 1.0012489592006661, "step": 6700 }, { "epoch": 18.085106382978722, "eval_cer": 0.17987236435945245, "eval_loss": 0.35618793964385986, "eval_runtime": 217.2896, "eval_samples_per_second": 22.822, "eval_steps_per_second": 2.853, "eval_wer": 0.9716902581182348, "step": 6800 }, { "epoch": 18.351063829787233, "eval_cer": 0.18354819994774113, "eval_loss": 0.3722291588783264, "eval_runtime": 217.7923, "eval_samples_per_second": 22.769, "eval_steps_per_second": 2.847, "eval_wer": 1.0503746877601998, "step": 6900 }, { "epoch": 18.617021276595743, "grad_norm": 5.107634544372559, "learning_rate": 0.00016790399999999997, "loss": 0.1717, "step": 7000 }, { "epoch": 18.617021276595743, "eval_cer": 0.18089982683714276, "eval_loss": 0.3554377853870392, "eval_runtime": 218.5289, "eval_samples_per_second": 22.693, "eval_steps_per_second": 2.837, "eval_wer": 0.9772411878989731, "step": 7000 }, { "epoch": 18.882978723404257, "eval_cer": 0.1788094721411521, "eval_loss": 0.36780402064323425, "eval_runtime": 216.2085, "eval_samples_per_second": 22.936, "eval_steps_per_second": 2.868, "eval_wer": 0.9683597002497918, "step": 7100 }, { "epoch": 19.148936170212767, "eval_cer": 0.18539497517703799, "eval_loss": 0.49383628368377686, "eval_runtime": 219.4656, "eval_samples_per_second": 22.596, "eval_steps_per_second": 2.825, "eval_wer": 1.041909519844574, "step": 7200 }, { "epoch": 19.414893617021278, "eval_cer": 0.18054552943104266, "eval_loss": 0.3926149308681488, "eval_runtime": 216.1405, "eval_samples_per_second": 22.943, "eval_steps_per_second": 2.869, "eval_wer": 0.9826533444351929, "step": 7300 }, { "epoch": 19.680851063829788, "eval_cer": 0.1819007170093756, "eval_loss": 0.3581140339374542, "eval_runtime": 217.0297, "eval_samples_per_second": 22.849, "eval_steps_per_second": 2.857, "eval_wer": 1.0001387732445184, "step": 7400 }, { "epoch": 19.9468085106383, "grad_norm": 1.7489858865737915, "learning_rate": 0.000179904, "loss": 0.1715, "step": 7500 }, { "epoch": 19.9468085106383, "eval_cer": 0.18397778555263752, "eval_loss": 0.35685011744499207, "eval_runtime": 219.4728, "eval_samples_per_second": 22.595, "eval_steps_per_second": 2.825, "eval_wer": 0.9929225645295587, "step": 7500 }, { "epoch": 20.21276595744681, "eval_cer": 0.18136927090022542, "eval_loss": 0.3910757303237915, "eval_runtime": 217.6276, "eval_samples_per_second": 22.787, "eval_steps_per_second": 2.849, "eval_wer": 0.9969469886205939, "step": 7600 }, { "epoch": 20.47872340425532, "eval_cer": 0.18082896735592274, "eval_loss": 0.39733073115348816, "eval_runtime": 218.1385, "eval_samples_per_second": 22.733, "eval_steps_per_second": 2.842, "eval_wer": 1.0016652789342215, "step": 7700 }, { "epoch": 20.74468085106383, "eval_cer": 0.18392906965929876, "eval_loss": 0.3943112790584564, "eval_runtime": 217.3912, "eval_samples_per_second": 22.811, "eval_steps_per_second": 2.852, "eval_wer": 0.9723841243408271, "step": 7800 }, { "epoch": 21.01063829787234, "eval_cer": 0.18227715800335698, "eval_loss": 0.3984341025352478, "eval_runtime": 216.5247, "eval_samples_per_second": 22.903, "eval_steps_per_second": 2.863, "eval_wer": 0.9764085484318623, "step": 7900 }, { "epoch": 21.27659574468085, "grad_norm": 1.9202663898468018, "learning_rate": 0.00019190399999999998, "loss": 0.1667, "step": 8000 }, { "epoch": 21.27659574468085, "eval_cer": 0.18395121324718, "eval_loss": 0.4305620491504669, "eval_runtime": 219.6412, "eval_samples_per_second": 22.578, "eval_steps_per_second": 2.823, "eval_wer": 1.0499583680266444, "step": 8000 }, { "epoch": 21.54255319148936, "eval_cer": 0.1881762098149239, "eval_loss": 0.37944892048835754, "eval_runtime": 214.5629, "eval_samples_per_second": 23.112, "eval_steps_per_second": 2.89, "eval_wer": 0.9728004440743825, "step": 8100 }, { "epoch": 21.80851063829787, "eval_cer": 0.18339762355014858, "eval_loss": 0.3965916037559509, "eval_runtime": 214.8413, "eval_samples_per_second": 23.082, "eval_steps_per_second": 2.886, "eval_wer": 0.9912572855953372, "step": 8200 }, { "epoch": 22.074468085106382, "eval_cer": 0.18381392300231622, "eval_loss": 0.39811334013938904, "eval_runtime": 215.0561, "eval_samples_per_second": 23.059, "eval_steps_per_second": 2.883, "eval_wer": 0.9744657230086039, "step": 8300 }, { "epoch": 22.340425531914892, "eval_cer": 0.18262702669188083, "eval_loss": 0.4328286349773407, "eval_runtime": 214.1041, "eval_samples_per_second": 23.162, "eval_steps_per_second": 2.896, "eval_wer": 0.9926450180405217, "step": 8400 }, { "epoch": 22.606382978723403, "grad_norm": 2.1760590076446533, "learning_rate": 0.00020387999999999998, "loss": 0.1625, "step": 8500 }, { "epoch": 22.606382978723403, "eval_cer": 0.18350834148955486, "eval_loss": 0.4087058901786804, "eval_runtime": 215.1826, "eval_samples_per_second": 23.046, "eval_steps_per_second": 2.881, "eval_wer": 0.9709963918956426, "step": 8500 }, { "epoch": 22.872340425531917, "eval_cer": 0.18611685614196696, "eval_loss": 0.4149414300918579, "eval_runtime": 214.9057, "eval_samples_per_second": 23.075, "eval_steps_per_second": 2.885, "eval_wer": 1.0062447960033305, "step": 8600 }, { "epoch": 23.138297872340427, "eval_cer": 0.18750304474333368, "eval_loss": 0.4106564223766327, "eval_runtime": 214.2659, "eval_samples_per_second": 23.144, "eval_steps_per_second": 2.894, "eval_wer": 0.992089925062448, "step": 8700 }, { "epoch": 23.404255319148938, "eval_cer": 0.18692731145842098, "eval_loss": 0.41398727893829346, "eval_runtime": 214.1566, "eval_samples_per_second": 23.156, "eval_steps_per_second": 2.895, "eval_wer": 0.9834859839023037, "step": 8800 }, { "epoch": 23.670212765957448, "eval_cer": 0.18904423845986917, "eval_loss": 0.40873560309410095, "eval_runtime": 213.9242, "eval_samples_per_second": 23.181, "eval_steps_per_second": 2.898, "eval_wer": 0.9918123785734111, "step": 8900 }, { "epoch": 23.93617021276596, "grad_norm": 1.4986417293548584, "learning_rate": 0.00021588, "loss": 0.1647, "step": 9000 }, { "epoch": 23.93617021276596, "eval_cer": 0.1869538837638785, "eval_loss": 0.4083107113838196, "eval_runtime": 229.7373, "eval_samples_per_second": 21.586, "eval_steps_per_second": 2.699, "eval_wer": 0.984179850124896, "step": 9000 }, { "epoch": 24.20212765957447, "eval_cer": 0.18472181010544778, "eval_loss": 0.40058091282844543, "eval_runtime": 215.0023, "eval_samples_per_second": 23.065, "eval_steps_per_second": 2.884, "eval_wer": 0.9858451290591174, "step": 9100 }, { "epoch": 24.46808510638298, "eval_cer": 0.18496096085456534, "eval_loss": 0.4137335419654846, "eval_runtime": 233.293, "eval_samples_per_second": 21.257, "eval_steps_per_second": 2.658, "eval_wer": 1.001526505689703, "step": 9200 }, { "epoch": 24.73404255319149, "eval_cer": 0.1906208619170147, "eval_loss": 0.4106745719909668, "eval_runtime": 218.151, "eval_samples_per_second": 22.732, "eval_steps_per_second": 2.842, "eval_wer": 0.9994449070219261, "step": 9300 }, { "epoch": 25.0, "eval_cer": 0.19122759622496113, "eval_loss": 0.42086583375930786, "eval_runtime": 214.8363, "eval_samples_per_second": 23.083, "eval_steps_per_second": 2.886, "eval_wer": 0.9843186233694143, "step": 9400 }, { "epoch": 25.26595744680851, "grad_norm": 6.019426345825195, "learning_rate": 0.00022785599999999997, "loss": 0.1667, "step": 9500 }, { "epoch": 25.26595744680851, "eval_cer": 0.18928338920898674, "eval_loss": 0.4373093247413635, "eval_runtime": 214.1648, "eval_samples_per_second": 23.155, "eval_steps_per_second": 2.895, "eval_wer": 0.9956980294199278, "step": 9500 }, { "epoch": 25.53191489361702, "eval_cer": 0.18901766615441168, "eval_loss": 0.43900319933891296, "eval_runtime": 216.3618, "eval_samples_per_second": 22.92, "eval_steps_per_second": 2.866, "eval_wer": 0.9822370247016375, "step": 9600 }, { "epoch": 25.79787234042553, "eval_cer": 0.19637376604856532, "eval_loss": 0.4538724422454834, "eval_runtime": 220.7821, "eval_samples_per_second": 22.461, "eval_steps_per_second": 2.808, "eval_wer": 0.985706355814599, "step": 9700 }, { "epoch": 26.06382978723404, "eval_cer": 0.19334895194398558, "eval_loss": 0.43809235095977783, "eval_runtime": 213.8497, "eval_samples_per_second": 23.189, "eval_steps_per_second": 2.899, "eval_wer": 1.0037468776019984, "step": 9800 }, { "epoch": 26.329787234042552, "eval_cer": 0.18651101200625336, "eval_loss": 0.4227481782436371, "eval_runtime": 219.4755, "eval_samples_per_second": 22.595, "eval_steps_per_second": 2.825, "eval_wer": 0.9875104079933389, "step": 9900 }, { "epoch": 26.595744680851062, "grad_norm": 3.2743945121765137, "learning_rate": 0.000239856, "loss": 0.1644, "step": 10000 }, { "epoch": 26.595744680851062, "eval_cer": 0.18837107338827896, "eval_loss": 0.4802379012107849, "eval_runtime": 214.7366, "eval_samples_per_second": 23.093, "eval_steps_per_second": 2.887, "eval_wer": 1.0266444629475437, "step": 10000 }, { "epoch": 26.861702127659573, "eval_cer": 0.19581131891638137, "eval_loss": 0.4389278292655945, "eval_runtime": 217.9698, "eval_samples_per_second": 22.751, "eval_steps_per_second": 2.844, "eval_wer": 0.9950041631973355, "step": 10100 }, { "epoch": 27.127659574468087, "eval_cer": 0.19391139907616953, "eval_loss": 0.4744134247303009, "eval_runtime": 217.5599, "eval_samples_per_second": 22.794, "eval_steps_per_second": 2.85, "eval_wer": 0.9827921176797113, "step": 10200 }, { "epoch": 27.393617021276597, "eval_cer": 0.1983356879348447, "eval_loss": 0.4494173526763916, "eval_runtime": 214.0697, "eval_samples_per_second": 23.165, "eval_steps_per_second": 2.896, "eval_wer": 1.0005550929780738, "step": 10300 }, { "epoch": 27.659574468085108, "eval_cer": 0.19612132914671898, "eval_loss": 0.44136643409729004, "eval_runtime": 214.2461, "eval_samples_per_second": 23.146, "eval_steps_per_second": 2.894, "eval_wer": 0.9962531223980017, "step": 10400 }, { "epoch": 27.925531914893618, "grad_norm": 5.488828659057617, "learning_rate": 0.000251856, "loss": 0.1742, "step": 10500 }, { "epoch": 27.925531914893618, "eval_cer": 0.19315851708820678, "eval_loss": 0.46681222319602966, "eval_runtime": 214.6029, "eval_samples_per_second": 23.108, "eval_steps_per_second": 2.889, "eval_wer": 0.9764085484318623, "step": 10500 }, { "epoch": 28.19148936170213, "eval_cer": 0.18782634112640004, "eval_loss": 0.42840632796287537, "eval_runtime": 214.4792, "eval_samples_per_second": 23.121, "eval_steps_per_second": 2.891, "eval_wer": 0.9719678046072717, "step": 10600 }, { "epoch": 28.45744680851064, "eval_cer": 0.1943764144216759, "eval_loss": 0.4257587492465973, "eval_runtime": 214.0589, "eval_samples_per_second": 23.167, "eval_steps_per_second": 2.896, "eval_wer": 1.02789342214821, "step": 10700 }, { "epoch": 28.72340425531915, "eval_cer": 0.18916381383442796, "eval_loss": 0.4250844120979309, "eval_runtime": 228.7764, "eval_samples_per_second": 21.676, "eval_steps_per_second": 2.71, "eval_wer": 1.0023591451568137, "step": 10800 }, { "epoch": 28.98936170212766, "eval_cer": 0.19779538439054203, "eval_loss": 0.4596838653087616, "eval_runtime": 214.1002, "eval_samples_per_second": 23.162, "eval_steps_per_second": 2.896, "eval_wer": 1.0201221204551763, "step": 10900 }, { "epoch": 29.25531914893617, "grad_norm": 4.931338787078857, "learning_rate": 0.000263832, "loss": 0.1669, "step": 11000 }, { "epoch": 29.25531914893617, "eval_cer": 0.1918786177086701, "eval_loss": 0.4414420425891876, "eval_runtime": 214.9531, "eval_samples_per_second": 23.07, "eval_steps_per_second": 2.884, "eval_wer": 0.9879267277268943, "step": 11000 }, { "epoch": 29.52127659574468, "eval_cer": 0.19090429984189478, "eval_loss": 0.4472629725933075, "eval_runtime": 220.3712, "eval_samples_per_second": 22.503, "eval_steps_per_second": 2.813, "eval_wer": 0.9772411878989731, "step": 11100 }, { "epoch": 29.78723404255319, "eval_cer": 0.19330466476822306, "eval_loss": 0.45273834466934204, "eval_runtime": 214.7179, "eval_samples_per_second": 23.095, "eval_steps_per_second": 2.888, "eval_wer": 0.9944490702192618, "step": 11200 }, { "epoch": 30.0, "step": 11280, "total_flos": 1.5765326864839811e+19, "train_loss": 0.910362657587579, "train_runtime": 61619.102, "train_samples_per_second": 5.857, "train_steps_per_second": 0.183 } ], "logging_steps": 500, "max_steps": 11280, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5765326864839811e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }