|
{ |
|
"best_metric": 0.3207731246948242, |
|
"best_model_checkpoint": "./Hubert-common_voice-phonemes-debug/checkpoint-4500", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 11280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_cer": 1.829171076931253, |
|
"eval_loss": 18.536373138427734, |
|
"eval_runtime": 213.5728, |
|
"eval_samples_per_second": 23.219, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 1.0645295587010823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_cer": 0.9812975256754901, |
|
"eval_loss": 8.279097557067871, |
|
"eval_runtime": 218.6029, |
|
"eval_samples_per_second": 22.685, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_cer": 0.9813063831106427, |
|
"eval_loss": 7.022369384765625, |
|
"eval_runtime": 215.2791, |
|
"eval_samples_per_second": 23.035, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_cer": 0.9813019543930664, |
|
"eval_loss": 6.310593128204346, |
|
"eval_runtime": 219.0435, |
|
"eval_samples_per_second": 22.639, |
|
"eval_steps_per_second": 2.83, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 28.102890014648438, |
|
"learning_rate": 1.1903999999999998e-05, |
|
"loss": 8.9892, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_cer": 0.9813063831106427, |
|
"eval_loss": 5.522346496582031, |
|
"eval_runtime": 213.327, |
|
"eval_samples_per_second": 23.246, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_cer": 0.9813108118282189, |
|
"eval_loss": 4.712064266204834, |
|
"eval_runtime": 215.4766, |
|
"eval_samples_per_second": 23.014, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"eval_cer": 0.9812975256754901, |
|
"eval_loss": 4.002798080444336, |
|
"eval_runtime": 215.7516, |
|
"eval_samples_per_second": 22.985, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.9813108118282189, |
|
"eval_loss": 3.475456953048706, |
|
"eval_runtime": 214.2882, |
|
"eval_samples_per_second": 23.142, |
|
"eval_steps_per_second": 2.893, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"eval_cer": 0.9813019543930664, |
|
"eval_loss": 3.1987791061401367, |
|
"eval_runtime": 216.1589, |
|
"eval_samples_per_second": 22.941, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 2.371589422225952, |
|
"learning_rate": 2.3903999999999997e-05, |
|
"loss": 3.7187, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_cer": 0.9813108118282189, |
|
"eval_loss": 3.0792152881622314, |
|
"eval_runtime": 213.8516, |
|
"eval_samples_per_second": 23.189, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"eval_cer": 0.9812930969579139, |
|
"eval_loss": 3.0459136962890625, |
|
"eval_runtime": 216.2299, |
|
"eval_samples_per_second": 22.934, |
|
"eval_steps_per_second": 2.867, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"eval_cer": 0.9813152405457951, |
|
"eval_loss": 3.035966396331787, |
|
"eval_runtime": 217.4243, |
|
"eval_samples_per_second": 22.808, |
|
"eval_steps_per_second": 2.852, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"eval_cer": 0.9813108118282189, |
|
"eval_loss": 3.008437395095825, |
|
"eval_runtime": 215.1294, |
|
"eval_samples_per_second": 23.051, |
|
"eval_steps_per_second": 2.882, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"eval_cer": 0.9343265470617673, |
|
"eval_loss": 2.495579481124878, |
|
"eval_runtime": 217.105, |
|
"eval_samples_per_second": 22.841, |
|
"eval_steps_per_second": 2.856, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 3.707914352416992, |
|
"learning_rate": 3.5903999999999994e-05, |
|
"loss": 2.783, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_cer": 0.33314142223836246, |
|
"eval_loss": 1.4417579174041748, |
|
"eval_runtime": 217.5545, |
|
"eval_samples_per_second": 22.794, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.27530679941009484, |
|
"eval_loss": 1.0227618217468262, |
|
"eval_runtime": 217.2546, |
|
"eval_samples_per_second": 22.826, |
|
"eval_steps_per_second": 2.854, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"eval_cer": 0.2531632115288376, |
|
"eval_loss": 0.8218135237693787, |
|
"eval_runtime": 219.2044, |
|
"eval_samples_per_second": 22.623, |
|
"eval_steps_per_second": 2.828, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"eval_cer": 0.24330045748652562, |
|
"eval_loss": 0.7084201574325562, |
|
"eval_runtime": 216.8198, |
|
"eval_samples_per_second": 22.872, |
|
"eval_steps_per_second": 2.86, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"eval_cer": 0.23373885623939875, |
|
"eval_loss": 0.6305696368217468, |
|
"eval_runtime": 218.0682, |
|
"eval_samples_per_second": 22.741, |
|
"eval_steps_per_second": 2.843, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 2.717346429824829, |
|
"learning_rate": 4.7903999999999994e-05, |
|
"loss": 0.8659, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_cer": 0.23099305134212286, |
|
"eval_loss": 0.5933591723442078, |
|
"eval_runtime": 218.7287, |
|
"eval_samples_per_second": 22.672, |
|
"eval_steps_per_second": 2.835, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"eval_cer": 0.228406680277592, |
|
"eval_loss": 0.5647820234298706, |
|
"eval_runtime": 215.722, |
|
"eval_samples_per_second": 22.988, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"eval_cer": 0.22140930650711474, |
|
"eval_loss": 0.5329573750495911, |
|
"eval_runtime": 218.357, |
|
"eval_samples_per_second": 22.711, |
|
"eval_steps_per_second": 2.839, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"eval_cer": 0.22092214757372708, |
|
"eval_loss": 0.5139152407646179, |
|
"eval_runtime": 217.5924, |
|
"eval_samples_per_second": 22.79, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.21591769671256295, |
|
"eval_loss": 0.4906846582889557, |
|
"eval_runtime": 217.5534, |
|
"eval_samples_per_second": 22.794, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 6.231712341308594, |
|
"learning_rate": 5.9903999999999994e-05, |
|
"loss": 0.5271, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"eval_cer": 0.2159929849113592, |
|
"eval_loss": 0.46404561400413513, |
|
"eval_runtime": 217.7416, |
|
"eval_samples_per_second": 22.775, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"eval_cer": 0.21120111249385515, |
|
"eval_loss": 0.4608772397041321, |
|
"eval_runtime": 216.2932, |
|
"eval_samples_per_second": 22.927, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"eval_cer": 0.2096732049300484, |
|
"eval_loss": 0.4550062417984009, |
|
"eval_runtime": 219.5448, |
|
"eval_samples_per_second": 22.588, |
|
"eval_steps_per_second": 2.824, |
|
"eval_wer": 1.0001387732445184, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"eval_cer": 0.210000930030691, |
|
"eval_loss": 0.4601040184497833, |
|
"eval_runtime": 218.3231, |
|
"eval_samples_per_second": 22.714, |
|
"eval_steps_per_second": 2.84, |
|
"eval_wer": 0.9991673605328892, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"eval_cer": 0.20508948223862816, |
|
"eval_loss": 0.4290345013141632, |
|
"eval_runtime": 218.312, |
|
"eval_samples_per_second": 22.715, |
|
"eval_steps_per_second": 2.84, |
|
"eval_wer": 0.9952817096863724, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 2.573235273361206, |
|
"learning_rate": 7.1904e-05, |
|
"loss": 0.4244, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"eval_cer": 0.20240125066984355, |
|
"eval_loss": 0.42562517523765564, |
|
"eval_runtime": 219.9498, |
|
"eval_samples_per_second": 22.546, |
|
"eval_steps_per_second": 2.819, |
|
"eval_wer": 0.9970857618651124, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"eval_cer": 0.2014180753679157, |
|
"eval_loss": 0.4134505093097687, |
|
"eval_runtime": 218.0911, |
|
"eval_samples_per_second": 22.738, |
|
"eval_steps_per_second": 2.843, |
|
"eval_wer": 0.9998612267554815, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.1998945965216852, |
|
"eval_loss": 0.41247421503067017, |
|
"eval_runtime": 220.4438, |
|
"eval_samples_per_second": 22.496, |
|
"eval_steps_per_second": 2.813, |
|
"eval_wer": 0.9955592561754094, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"eval_cer": 0.1926979304602766, |
|
"eval_loss": 0.3886178731918335, |
|
"eval_runtime": 219.8802, |
|
"eval_samples_per_second": 22.553, |
|
"eval_steps_per_second": 2.82, |
|
"eval_wer": 0.9941715237302248, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"eval_cer": 0.1911124495679786, |
|
"eval_loss": 0.38326719403266907, |
|
"eval_runtime": 219.6869, |
|
"eval_samples_per_second": 22.573, |
|
"eval_steps_per_second": 2.822, |
|
"eval_wer": 1.0005550929780738, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 2.091050148010254, |
|
"learning_rate": 8.390399999999999e-05, |
|
"loss": 0.3373, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"eval_cer": 0.1886855123361928, |
|
"eval_loss": 0.36107370257377625, |
|
"eval_runtime": 222.4868, |
|
"eval_samples_per_second": 22.289, |
|
"eval_steps_per_second": 2.787, |
|
"eval_wer": 1.0363585900638357, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"eval_cer": 0.18428336706539888, |
|
"eval_loss": 0.3584689497947693, |
|
"eval_runtime": 217.5818, |
|
"eval_samples_per_second": 22.791, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 1.0080488481820704, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"eval_cer": 0.18549683568129177, |
|
"eval_loss": 0.3562237024307251, |
|
"eval_runtime": 221.8447, |
|
"eval_samples_per_second": 22.353, |
|
"eval_steps_per_second": 2.795, |
|
"eval_wer": 0.9980571745767416, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"eval_cer": 0.1799387951230962, |
|
"eval_loss": 0.3412463068962097, |
|
"eval_runtime": 217.8756, |
|
"eval_samples_per_second": 22.761, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 0.9883430474604497, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"eval_cer": 0.18462437831877024, |
|
"eval_loss": 0.35612642765045166, |
|
"eval_runtime": 218.9988, |
|
"eval_samples_per_second": 22.644, |
|
"eval_steps_per_second": 2.831, |
|
"eval_wer": 0.9834859839023037, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 2.690404176712036, |
|
"learning_rate": 9.5904e-05, |
|
"loss": 0.2779, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.17977050385519866, |
|
"eval_loss": 0.34815147519111633, |
|
"eval_runtime": 221.1151, |
|
"eval_samples_per_second": 22.427, |
|
"eval_steps_per_second": 2.804, |
|
"eval_wer": 0.9772411878989731, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"eval_cer": 0.17927891620423475, |
|
"eval_loss": 0.3266104757785797, |
|
"eval_runtime": 217.4603, |
|
"eval_samples_per_second": 22.804, |
|
"eval_steps_per_second": 2.851, |
|
"eval_wer": 0.9794615598112684, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"eval_cer": 0.17892019008055837, |
|
"eval_loss": 0.34835320711135864, |
|
"eval_runtime": 220.4292, |
|
"eval_samples_per_second": 22.497, |
|
"eval_steps_per_second": 2.813, |
|
"eval_wer": 0.9791840133222315, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"eval_cer": 0.17989007922975744, |
|
"eval_loss": 0.33775731921195984, |
|
"eval_runtime": 218.1639, |
|
"eval_samples_per_second": 22.731, |
|
"eval_steps_per_second": 2.842, |
|
"eval_wer": 0.9991673605328892, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"eval_cer": 0.17952249567092857, |
|
"eval_loss": 0.33295243978500366, |
|
"eval_runtime": 218.1937, |
|
"eval_samples_per_second": 22.728, |
|
"eval_steps_per_second": 2.842, |
|
"eval_wer": 0.9764085484318623, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 2.2231483459472656, |
|
"learning_rate": 0.00010790399999999999, |
|
"loss": 0.2409, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"eval_cer": 0.17923020031089598, |
|
"eval_loss": 0.3207731246948242, |
|
"eval_runtime": 218.6698, |
|
"eval_samples_per_second": 22.678, |
|
"eval_steps_per_second": 2.835, |
|
"eval_wer": 0.9780738273660838, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"eval_cer": 0.18048352738497514, |
|
"eval_loss": 0.3602401614189148, |
|
"eval_runtime": 217.2746, |
|
"eval_samples_per_second": 22.824, |
|
"eval_steps_per_second": 2.854, |
|
"eval_wer": 0.97571468220927, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_cer": 0.17876961368296582, |
|
"eval_loss": 0.3362875282764435, |
|
"eval_runtime": 219.2626, |
|
"eval_samples_per_second": 22.617, |
|
"eval_steps_per_second": 2.828, |
|
"eval_wer": 0.9938939772411879, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.17953135310608107, |
|
"eval_loss": 0.32531023025512695, |
|
"eval_runtime": 217.8058, |
|
"eval_samples_per_second": 22.768, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 0.9732167638079379, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"eval_cer": 0.17615224159540122, |
|
"eval_loss": 0.3284818232059479, |
|
"eval_runtime": 218.6415, |
|
"eval_samples_per_second": 22.681, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 0.971135165140161, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 2.2732036113739014, |
|
"learning_rate": 0.00011990399999999998, |
|
"loss": 0.2104, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"eval_cer": 0.17691398101851646, |
|
"eval_loss": 0.32327184081077576, |
|
"eval_runtime": 218.7534, |
|
"eval_samples_per_second": 22.669, |
|
"eval_steps_per_second": 2.834, |
|
"eval_wer": 0.9729392173189009, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"eval_cer": 0.18268017130279585, |
|
"eval_loss": 0.3362744450569153, |
|
"eval_runtime": 216.3239, |
|
"eval_samples_per_second": 22.924, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.97751873438801, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"eval_cer": 0.17587766110567363, |
|
"eval_loss": 0.33709821105003357, |
|
"eval_runtime": 219.8171, |
|
"eval_samples_per_second": 22.56, |
|
"eval_steps_per_second": 2.821, |
|
"eval_wer": 0.9683597002497918, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"eval_cer": 0.17777315222830925, |
|
"eval_loss": 0.3463989496231079, |
|
"eval_runtime": 216.1817, |
|
"eval_samples_per_second": 22.939, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 0.9730779905634194, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"eval_cer": 0.17830459833745943, |
|
"eval_loss": 0.3450472950935364, |
|
"eval_runtime": 217.8272, |
|
"eval_samples_per_second": 22.766, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 0.9776575076325285, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 3.842231273651123, |
|
"learning_rate": 0.000131904, |
|
"loss": 0.1947, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"eval_cer": 0.1773037081652266, |
|
"eval_loss": 0.3442366421222687, |
|
"eval_runtime": 219.014, |
|
"eval_samples_per_second": 22.642, |
|
"eval_steps_per_second": 2.831, |
|
"eval_wer": 0.9680821537607549, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.17800344554227432, |
|
"eval_loss": 0.3346150517463684, |
|
"eval_runtime": 216.342, |
|
"eval_samples_per_second": 22.922, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.9858451290591174, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.159574468085106, |
|
"eval_cer": 0.17705569998095652, |
|
"eval_loss": 0.35239535570144653, |
|
"eval_runtime": 219.7446, |
|
"eval_samples_per_second": 22.567, |
|
"eval_steps_per_second": 2.821, |
|
"eval_wer": 0.9732167638079379, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 15.425531914893616, |
|
"eval_cer": 0.17744542712766664, |
|
"eval_loss": 0.3413674235343933, |
|
"eval_runtime": 216.7948, |
|
"eval_samples_per_second": 22.874, |
|
"eval_steps_per_second": 2.86, |
|
"eval_wer": 0.9782126006106022, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 15.691489361702128, |
|
"eval_cer": 0.1765641123299926, |
|
"eval_loss": 0.3437710702419281, |
|
"eval_runtime": 217.8111, |
|
"eval_samples_per_second": 22.767, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 1.0019428254232583, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 1.6587754487991333, |
|
"learning_rate": 0.00014390399999999998, |
|
"loss": 0.1892, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"eval_cer": 0.18023551920070505, |
|
"eval_loss": 0.3391115367412567, |
|
"eval_runtime": 220.8887, |
|
"eval_samples_per_second": 22.45, |
|
"eval_steps_per_second": 2.807, |
|
"eval_wer": 0.9705800721620872, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.22340425531915, |
|
"eval_cer": 0.18025766278858632, |
|
"eval_loss": 0.35049712657928467, |
|
"eval_runtime": 218.4181, |
|
"eval_samples_per_second": 22.704, |
|
"eval_steps_per_second": 2.839, |
|
"eval_wer": 0.9782126006106022, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 16.48936170212766, |
|
"eval_cer": 0.17667040155182265, |
|
"eval_loss": 0.34669336676597595, |
|
"eval_runtime": 220.5862, |
|
"eval_samples_per_second": 22.481, |
|
"eval_steps_per_second": 2.811, |
|
"eval_wer": 0.9736330835414932, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 16.75531914893617, |
|
"eval_cer": 0.17923462902847223, |
|
"eval_loss": 0.36808422207832336, |
|
"eval_runtime": 219.3778, |
|
"eval_samples_per_second": 22.605, |
|
"eval_steps_per_second": 2.826, |
|
"eval_wer": 0.9945878434637802, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"eval_cer": 0.17693612460639774, |
|
"eval_loss": 0.35571783781051636, |
|
"eval_runtime": 218.8646, |
|
"eval_samples_per_second": 22.658, |
|
"eval_steps_per_second": 2.833, |
|
"eval_wer": 1.0104079933388843, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"grad_norm": 1.837514877319336, |
|
"learning_rate": 0.000155904, |
|
"loss": 0.1749, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"eval_cer": 0.17874747009508457, |
|
"eval_loss": 0.3445756435394287, |
|
"eval_runtime": 220.0286, |
|
"eval_samples_per_second": 22.538, |
|
"eval_steps_per_second": 2.818, |
|
"eval_wer": 0.9769636414099362, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.5531914893617, |
|
"eval_cer": 0.18034180842253508, |
|
"eval_loss": 0.34961050748825073, |
|
"eval_runtime": 216.6863, |
|
"eval_samples_per_second": 22.886, |
|
"eval_steps_per_second": 2.861, |
|
"eval_wer": 0.983902303635859, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 17.819148936170212, |
|
"eval_cer": 0.18056324430134765, |
|
"eval_loss": 0.3584659695625305, |
|
"eval_runtime": 217.6982, |
|
"eval_samples_per_second": 22.779, |
|
"eval_steps_per_second": 2.848, |
|
"eval_wer": 1.0012489592006661, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 18.085106382978722, |
|
"eval_cer": 0.17987236435945245, |
|
"eval_loss": 0.35618793964385986, |
|
"eval_runtime": 217.2896, |
|
"eval_samples_per_second": 22.822, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 0.9716902581182348, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 18.351063829787233, |
|
"eval_cer": 0.18354819994774113, |
|
"eval_loss": 0.3722291588783264, |
|
"eval_runtime": 217.7923, |
|
"eval_samples_per_second": 22.769, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 1.0503746877601998, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"grad_norm": 5.107634544372559, |
|
"learning_rate": 0.00016790399999999997, |
|
"loss": 0.1717, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"eval_cer": 0.18089982683714276, |
|
"eval_loss": 0.3554377853870392, |
|
"eval_runtime": 218.5289, |
|
"eval_samples_per_second": 22.693, |
|
"eval_steps_per_second": 2.837, |
|
"eval_wer": 0.9772411878989731, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.882978723404257, |
|
"eval_cer": 0.1788094721411521, |
|
"eval_loss": 0.36780402064323425, |
|
"eval_runtime": 216.2085, |
|
"eval_samples_per_second": 22.936, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 0.9683597002497918, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"eval_cer": 0.18539497517703799, |
|
"eval_loss": 0.49383628368377686, |
|
"eval_runtime": 219.4656, |
|
"eval_samples_per_second": 22.596, |
|
"eval_steps_per_second": 2.825, |
|
"eval_wer": 1.041909519844574, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.414893617021278, |
|
"eval_cer": 0.18054552943104266, |
|
"eval_loss": 0.3926149308681488, |
|
"eval_runtime": 216.1405, |
|
"eval_samples_per_second": 22.943, |
|
"eval_steps_per_second": 2.869, |
|
"eval_wer": 0.9826533444351929, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 19.680851063829788, |
|
"eval_cer": 0.1819007170093756, |
|
"eval_loss": 0.3581140339374542, |
|
"eval_runtime": 217.0297, |
|
"eval_samples_per_second": 22.849, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 1.0001387732445184, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"grad_norm": 1.7489858865737915, |
|
"learning_rate": 0.000179904, |
|
"loss": 0.1715, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"eval_cer": 0.18397778555263752, |
|
"eval_loss": 0.35685011744499207, |
|
"eval_runtime": 219.4728, |
|
"eval_samples_per_second": 22.595, |
|
"eval_steps_per_second": 2.825, |
|
"eval_wer": 0.9929225645295587, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.21276595744681, |
|
"eval_cer": 0.18136927090022542, |
|
"eval_loss": 0.3910757303237915, |
|
"eval_runtime": 217.6276, |
|
"eval_samples_per_second": 22.787, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 0.9969469886205939, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 20.47872340425532, |
|
"eval_cer": 0.18082896735592274, |
|
"eval_loss": 0.39733073115348816, |
|
"eval_runtime": 218.1385, |
|
"eval_samples_per_second": 22.733, |
|
"eval_steps_per_second": 2.842, |
|
"eval_wer": 1.0016652789342215, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 20.74468085106383, |
|
"eval_cer": 0.18392906965929876, |
|
"eval_loss": 0.3943112790584564, |
|
"eval_runtime": 217.3912, |
|
"eval_samples_per_second": 22.811, |
|
"eval_steps_per_second": 2.852, |
|
"eval_wer": 0.9723841243408271, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 21.01063829787234, |
|
"eval_cer": 0.18227715800335698, |
|
"eval_loss": 0.3984341025352478, |
|
"eval_runtime": 216.5247, |
|
"eval_samples_per_second": 22.903, |
|
"eval_steps_per_second": 2.863, |
|
"eval_wer": 0.9764085484318623, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"grad_norm": 1.9202663898468018, |
|
"learning_rate": 0.00019190399999999998, |
|
"loss": 0.1667, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"eval_cer": 0.18395121324718, |
|
"eval_loss": 0.4305620491504669, |
|
"eval_runtime": 219.6412, |
|
"eval_samples_per_second": 22.578, |
|
"eval_steps_per_second": 2.823, |
|
"eval_wer": 1.0499583680266444, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.54255319148936, |
|
"eval_cer": 0.1881762098149239, |
|
"eval_loss": 0.37944892048835754, |
|
"eval_runtime": 214.5629, |
|
"eval_samples_per_second": 23.112, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.9728004440743825, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 21.80851063829787, |
|
"eval_cer": 0.18339762355014858, |
|
"eval_loss": 0.3965916037559509, |
|
"eval_runtime": 214.8413, |
|
"eval_samples_per_second": 23.082, |
|
"eval_steps_per_second": 2.886, |
|
"eval_wer": 0.9912572855953372, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 22.074468085106382, |
|
"eval_cer": 0.18381392300231622, |
|
"eval_loss": 0.39811334013938904, |
|
"eval_runtime": 215.0561, |
|
"eval_samples_per_second": 23.059, |
|
"eval_steps_per_second": 2.883, |
|
"eval_wer": 0.9744657230086039, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 22.340425531914892, |
|
"eval_cer": 0.18262702669188083, |
|
"eval_loss": 0.4328286349773407, |
|
"eval_runtime": 214.1041, |
|
"eval_samples_per_second": 23.162, |
|
"eval_steps_per_second": 2.896, |
|
"eval_wer": 0.9926450180405217, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"grad_norm": 2.1760590076446533, |
|
"learning_rate": 0.00020387999999999998, |
|
"loss": 0.1625, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"eval_cer": 0.18350834148955486, |
|
"eval_loss": 0.4087058901786804, |
|
"eval_runtime": 215.1826, |
|
"eval_samples_per_second": 23.046, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 0.9709963918956426, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.872340425531917, |
|
"eval_cer": 0.18611685614196696, |
|
"eval_loss": 0.4149414300918579, |
|
"eval_runtime": 214.9057, |
|
"eval_samples_per_second": 23.075, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 1.0062447960033305, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 23.138297872340427, |
|
"eval_cer": 0.18750304474333368, |
|
"eval_loss": 0.4106564223766327, |
|
"eval_runtime": 214.2659, |
|
"eval_samples_per_second": 23.144, |
|
"eval_steps_per_second": 2.894, |
|
"eval_wer": 0.992089925062448, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 23.404255319148938, |
|
"eval_cer": 0.18692731145842098, |
|
"eval_loss": 0.41398727893829346, |
|
"eval_runtime": 214.1566, |
|
"eval_samples_per_second": 23.156, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.9834859839023037, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 23.670212765957448, |
|
"eval_cer": 0.18904423845986917, |
|
"eval_loss": 0.40873560309410095, |
|
"eval_runtime": 213.9242, |
|
"eval_samples_per_second": 23.181, |
|
"eval_steps_per_second": 2.898, |
|
"eval_wer": 0.9918123785734111, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"grad_norm": 1.4986417293548584, |
|
"learning_rate": 0.00021588, |
|
"loss": 0.1647, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"eval_cer": 0.1869538837638785, |
|
"eval_loss": 0.4083107113838196, |
|
"eval_runtime": 229.7373, |
|
"eval_samples_per_second": 21.586, |
|
"eval_steps_per_second": 2.699, |
|
"eval_wer": 0.984179850124896, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 24.20212765957447, |
|
"eval_cer": 0.18472181010544778, |
|
"eval_loss": 0.40058091282844543, |
|
"eval_runtime": 215.0023, |
|
"eval_samples_per_second": 23.065, |
|
"eval_steps_per_second": 2.884, |
|
"eval_wer": 0.9858451290591174, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 24.46808510638298, |
|
"eval_cer": 0.18496096085456534, |
|
"eval_loss": 0.4137335419654846, |
|
"eval_runtime": 233.293, |
|
"eval_samples_per_second": 21.257, |
|
"eval_steps_per_second": 2.658, |
|
"eval_wer": 1.001526505689703, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 24.73404255319149, |
|
"eval_cer": 0.1906208619170147, |
|
"eval_loss": 0.4106745719909668, |
|
"eval_runtime": 218.151, |
|
"eval_samples_per_second": 22.732, |
|
"eval_steps_per_second": 2.842, |
|
"eval_wer": 0.9994449070219261, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.19122759622496113, |
|
"eval_loss": 0.42086583375930786, |
|
"eval_runtime": 214.8363, |
|
"eval_samples_per_second": 23.083, |
|
"eval_steps_per_second": 2.886, |
|
"eval_wer": 0.9843186233694143, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 25.26595744680851, |
|
"grad_norm": 6.019426345825195, |
|
"learning_rate": 0.00022785599999999997, |
|
"loss": 0.1667, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.26595744680851, |
|
"eval_cer": 0.18928338920898674, |
|
"eval_loss": 0.4373093247413635, |
|
"eval_runtime": 214.1648, |
|
"eval_samples_per_second": 23.155, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.9956980294199278, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.53191489361702, |
|
"eval_cer": 0.18901766615441168, |
|
"eval_loss": 0.43900319933891296, |
|
"eval_runtime": 216.3618, |
|
"eval_samples_per_second": 22.92, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.9822370247016375, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 25.79787234042553, |
|
"eval_cer": 0.19637376604856532, |
|
"eval_loss": 0.4538724422454834, |
|
"eval_runtime": 220.7821, |
|
"eval_samples_per_second": 22.461, |
|
"eval_steps_per_second": 2.808, |
|
"eval_wer": 0.985706355814599, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 26.06382978723404, |
|
"eval_cer": 0.19334895194398558, |
|
"eval_loss": 0.43809235095977783, |
|
"eval_runtime": 213.8497, |
|
"eval_samples_per_second": 23.189, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 1.0037468776019984, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 26.329787234042552, |
|
"eval_cer": 0.18651101200625336, |
|
"eval_loss": 0.4227481782436371, |
|
"eval_runtime": 219.4755, |
|
"eval_samples_per_second": 22.595, |
|
"eval_steps_per_second": 2.825, |
|
"eval_wer": 0.9875104079933389, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 26.595744680851062, |
|
"grad_norm": 3.2743945121765137, |
|
"learning_rate": 0.000239856, |
|
"loss": 0.1644, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.595744680851062, |
|
"eval_cer": 0.18837107338827896, |
|
"eval_loss": 0.4802379012107849, |
|
"eval_runtime": 214.7366, |
|
"eval_samples_per_second": 23.093, |
|
"eval_steps_per_second": 2.887, |
|
"eval_wer": 1.0266444629475437, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.861702127659573, |
|
"eval_cer": 0.19581131891638137, |
|
"eval_loss": 0.4389278292655945, |
|
"eval_runtime": 217.9698, |
|
"eval_samples_per_second": 22.751, |
|
"eval_steps_per_second": 2.844, |
|
"eval_wer": 0.9950041631973355, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 27.127659574468087, |
|
"eval_cer": 0.19391139907616953, |
|
"eval_loss": 0.4744134247303009, |
|
"eval_runtime": 217.5599, |
|
"eval_samples_per_second": 22.794, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 0.9827921176797113, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 27.393617021276597, |
|
"eval_cer": 0.1983356879348447, |
|
"eval_loss": 0.4494173526763916, |
|
"eval_runtime": 214.0697, |
|
"eval_samples_per_second": 23.165, |
|
"eval_steps_per_second": 2.896, |
|
"eval_wer": 1.0005550929780738, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 27.659574468085108, |
|
"eval_cer": 0.19612132914671898, |
|
"eval_loss": 0.44136643409729004, |
|
"eval_runtime": 214.2461, |
|
"eval_samples_per_second": 23.146, |
|
"eval_steps_per_second": 2.894, |
|
"eval_wer": 0.9962531223980017, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 27.925531914893618, |
|
"grad_norm": 5.488828659057617, |
|
"learning_rate": 0.000251856, |
|
"loss": 0.1742, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.925531914893618, |
|
"eval_cer": 0.19315851708820678, |
|
"eval_loss": 0.46681222319602966, |
|
"eval_runtime": 214.6029, |
|
"eval_samples_per_second": 23.108, |
|
"eval_steps_per_second": 2.889, |
|
"eval_wer": 0.9764085484318623, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 28.19148936170213, |
|
"eval_cer": 0.18782634112640004, |
|
"eval_loss": 0.42840632796287537, |
|
"eval_runtime": 214.4792, |
|
"eval_samples_per_second": 23.121, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.9719678046072717, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 28.45744680851064, |
|
"eval_cer": 0.1943764144216759, |
|
"eval_loss": 0.4257587492465973, |
|
"eval_runtime": 214.0589, |
|
"eval_samples_per_second": 23.167, |
|
"eval_steps_per_second": 2.896, |
|
"eval_wer": 1.02789342214821, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 28.72340425531915, |
|
"eval_cer": 0.18916381383442796, |
|
"eval_loss": 0.4250844120979309, |
|
"eval_runtime": 228.7764, |
|
"eval_samples_per_second": 21.676, |
|
"eval_steps_per_second": 2.71, |
|
"eval_wer": 1.0023591451568137, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 28.98936170212766, |
|
"eval_cer": 0.19779538439054203, |
|
"eval_loss": 0.4596838653087616, |
|
"eval_runtime": 214.1002, |
|
"eval_samples_per_second": 23.162, |
|
"eval_steps_per_second": 2.896, |
|
"eval_wer": 1.0201221204551763, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 29.25531914893617, |
|
"grad_norm": 4.931338787078857, |
|
"learning_rate": 0.000263832, |
|
"loss": 0.1669, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.25531914893617, |
|
"eval_cer": 0.1918786177086701, |
|
"eval_loss": 0.4414420425891876, |
|
"eval_runtime": 214.9531, |
|
"eval_samples_per_second": 23.07, |
|
"eval_steps_per_second": 2.884, |
|
"eval_wer": 0.9879267277268943, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.52127659574468, |
|
"eval_cer": 0.19090429984189478, |
|
"eval_loss": 0.4472629725933075, |
|
"eval_runtime": 220.3712, |
|
"eval_samples_per_second": 22.503, |
|
"eval_steps_per_second": 2.813, |
|
"eval_wer": 0.9772411878989731, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 29.78723404255319, |
|
"eval_cer": 0.19330466476822306, |
|
"eval_loss": 0.45273834466934204, |
|
"eval_runtime": 214.7179, |
|
"eval_samples_per_second": 23.095, |
|
"eval_steps_per_second": 2.888, |
|
"eval_wer": 0.9944490702192618, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 11280, |
|
"total_flos": 1.5765326864839811e+19, |
|
"train_loss": 0.910362657587579, |
|
"train_runtime": 61619.102, |
|
"train_samples_per_second": 5.857, |
|
"train_steps_per_second": 0.183 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5765326864839811e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|