|
{ |
|
"best_metric": 60.21685813863431, |
|
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-1000", |
|
"epoch": 0.6451612903225806, |
|
"eval_steps": 1000, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016129032258064516, |
|
"grad_norm": 241.39755249023438, |
|
"learning_rate": 5.376344086021506e-07, |
|
"loss": 8.0646, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 52.91600799560547, |
|
"learning_rate": 1.0752688172043011e-06, |
|
"loss": 5.6903, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04838709677419355, |
|
"grad_norm": 32.09747314453125, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 3.6353, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 31.451000213623047, |
|
"learning_rate": 2.1505376344086023e-06, |
|
"loss": 2.6364, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08064516129032258, |
|
"grad_norm": 29.471986770629883, |
|
"learning_rate": 2.688172043010753e-06, |
|
"loss": 2.3125, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0967741935483871, |
|
"grad_norm": 28.64345932006836, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 2.1281, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11290322580645161, |
|
"grad_norm": 28.750173568725586, |
|
"learning_rate": 3.763440860215054e-06, |
|
"loss": 1.9073, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 23.051420211791992, |
|
"learning_rate": 4.3010752688172045e-06, |
|
"loss": 1.5977, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14516129032258066, |
|
"grad_norm": 18.67135238647461, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 1.5081, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 15.335652351379395, |
|
"learning_rate": 5.376344086021506e-06, |
|
"loss": 1.4169, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1774193548387097, |
|
"grad_norm": 16.2917537689209, |
|
"learning_rate": 5.9139784946236566e-06, |
|
"loss": 1.3469, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 15.212031364440918, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 1.4059, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20967741935483872, |
|
"grad_norm": 15.661399841308594, |
|
"learning_rate": 6.989247311827958e-06, |
|
"loss": 1.333, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.22580645161290322, |
|
"grad_norm": 16.841798782348633, |
|
"learning_rate": 7.526881720430108e-06, |
|
"loss": 1.2252, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24193548387096775, |
|
"grad_norm": 17.468032836914062, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 1.2996, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 16.684844970703125, |
|
"learning_rate": 8.602150537634409e-06, |
|
"loss": 1.2653, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27419354838709675, |
|
"grad_norm": 14.749136924743652, |
|
"learning_rate": 9.13978494623656e-06, |
|
"loss": 1.1967, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2903225806451613, |
|
"grad_norm": 13.751141548156738, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 1.1865, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3064516129032258, |
|
"grad_norm": 16.48873519897461, |
|
"learning_rate": 9.97610513739546e-06, |
|
"loss": 1.1636, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 14.694608688354492, |
|
"learning_rate": 9.916367980884111e-06, |
|
"loss": 1.1796, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3387096774193548, |
|
"grad_norm": 15.619414329528809, |
|
"learning_rate": 9.856630824372761e-06, |
|
"loss": 1.1655, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3548387096774194, |
|
"grad_norm": 13.177242279052734, |
|
"learning_rate": 9.79689366786141e-06, |
|
"loss": 1.143, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3709677419354839, |
|
"grad_norm": 15.957605361938477, |
|
"learning_rate": 9.737156511350062e-06, |
|
"loss": 1.1414, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 12.467620849609375, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 1.0964, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4032258064516129, |
|
"grad_norm": 15.435978889465332, |
|
"learning_rate": 9.61768219832736e-06, |
|
"loss": 1.1512, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.41935483870967744, |
|
"grad_norm": 13.087624549865723, |
|
"learning_rate": 9.557945041816011e-06, |
|
"loss": 1.1338, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.43548387096774194, |
|
"grad_norm": 15.716456413269043, |
|
"learning_rate": 9.49820788530466e-06, |
|
"loss": 1.0783, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.45161290322580644, |
|
"grad_norm": 14.517507553100586, |
|
"learning_rate": 9.43847072879331e-06, |
|
"loss": 1.0728, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.46774193548387094, |
|
"grad_norm": 17.37009620666504, |
|
"learning_rate": 9.37873357228196e-06, |
|
"loss": 1.0317, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 14.03701400756836, |
|
"learning_rate": 9.31899641577061e-06, |
|
"loss": 1.0347, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 12.431659698486328, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 1.0524, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 12.746413230895996, |
|
"learning_rate": 9.19952210274791e-06, |
|
"loss": 1.0826, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.532258064516129, |
|
"grad_norm": 15.521408081054688, |
|
"learning_rate": 9.13978494623656e-06, |
|
"loss": 1.0377, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5483870967741935, |
|
"grad_norm": 15.342901229858398, |
|
"learning_rate": 9.08004778972521e-06, |
|
"loss": 0.9762, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5645161290322581, |
|
"grad_norm": 16.137371063232422, |
|
"learning_rate": 9.02031063321386e-06, |
|
"loss": 1.0725, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5806451612903226, |
|
"grad_norm": 14.61146068572998, |
|
"learning_rate": 8.96057347670251e-06, |
|
"loss": 0.9554, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5967741935483871, |
|
"grad_norm": 13.561723709106445, |
|
"learning_rate": 8.90083632019116e-06, |
|
"loss": 1.0127, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6129032258064516, |
|
"grad_norm": 16.037729263305664, |
|
"learning_rate": 8.84109916367981e-06, |
|
"loss": 0.9621, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6290322580645161, |
|
"grad_norm": 13.945268630981445, |
|
"learning_rate": 8.78136200716846e-06, |
|
"loss": 0.9479, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 15.826567649841309, |
|
"learning_rate": 8.72162485065711e-06, |
|
"loss": 0.9789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"eval_cer": 60.21685813863431, |
|
"eval_loss": 0.9020848870277405, |
|
"eval_runtime": 953.7359, |
|
"eval_samples_per_second": 2.392, |
|
"eval_steps_per_second": 0.3, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4650, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 4.61736640512e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|