yuweiiizz's picture
Training in progress, step 1000, checkpoint
169a268 verified
raw
history blame
7.68 kB
{
"best_metric": 60.21685813863431,
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-1000",
"epoch": 0.6451612903225806,
"eval_steps": 1000,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016129032258064516,
"grad_norm": 241.39755249023438,
"learning_rate": 5.376344086021506e-07,
"loss": 8.0646,
"step": 25
},
{
"epoch": 0.03225806451612903,
"grad_norm": 52.91600799560547,
"learning_rate": 1.0752688172043011e-06,
"loss": 5.6903,
"step": 50
},
{
"epoch": 0.04838709677419355,
"grad_norm": 32.09747314453125,
"learning_rate": 1.6129032258064516e-06,
"loss": 3.6353,
"step": 75
},
{
"epoch": 0.06451612903225806,
"grad_norm": 31.451000213623047,
"learning_rate": 2.1505376344086023e-06,
"loss": 2.6364,
"step": 100
},
{
"epoch": 0.08064516129032258,
"grad_norm": 29.471986770629883,
"learning_rate": 2.688172043010753e-06,
"loss": 2.3125,
"step": 125
},
{
"epoch": 0.0967741935483871,
"grad_norm": 28.64345932006836,
"learning_rate": 3.225806451612903e-06,
"loss": 2.1281,
"step": 150
},
{
"epoch": 0.11290322580645161,
"grad_norm": 28.750173568725586,
"learning_rate": 3.763440860215054e-06,
"loss": 1.9073,
"step": 175
},
{
"epoch": 0.12903225806451613,
"grad_norm": 23.051420211791992,
"learning_rate": 4.3010752688172045e-06,
"loss": 1.5977,
"step": 200
},
{
"epoch": 0.14516129032258066,
"grad_norm": 18.67135238647461,
"learning_rate": 4.838709677419355e-06,
"loss": 1.5081,
"step": 225
},
{
"epoch": 0.16129032258064516,
"grad_norm": 15.335652351379395,
"learning_rate": 5.376344086021506e-06,
"loss": 1.4169,
"step": 250
},
{
"epoch": 0.1774193548387097,
"grad_norm": 16.2917537689209,
"learning_rate": 5.9139784946236566e-06,
"loss": 1.3469,
"step": 275
},
{
"epoch": 0.1935483870967742,
"grad_norm": 15.212031364440918,
"learning_rate": 6.451612903225806e-06,
"loss": 1.4059,
"step": 300
},
{
"epoch": 0.20967741935483872,
"grad_norm": 15.661399841308594,
"learning_rate": 6.989247311827958e-06,
"loss": 1.333,
"step": 325
},
{
"epoch": 0.22580645161290322,
"grad_norm": 16.841798782348633,
"learning_rate": 7.526881720430108e-06,
"loss": 1.2252,
"step": 350
},
{
"epoch": 0.24193548387096775,
"grad_norm": 17.468032836914062,
"learning_rate": 8.064516129032258e-06,
"loss": 1.2996,
"step": 375
},
{
"epoch": 0.25806451612903225,
"grad_norm": 16.684844970703125,
"learning_rate": 8.602150537634409e-06,
"loss": 1.2653,
"step": 400
},
{
"epoch": 0.27419354838709675,
"grad_norm": 14.749136924743652,
"learning_rate": 9.13978494623656e-06,
"loss": 1.1967,
"step": 425
},
{
"epoch": 0.2903225806451613,
"grad_norm": 13.751141548156738,
"learning_rate": 9.67741935483871e-06,
"loss": 1.1865,
"step": 450
},
{
"epoch": 0.3064516129032258,
"grad_norm": 16.48873519897461,
"learning_rate": 9.97610513739546e-06,
"loss": 1.1636,
"step": 475
},
{
"epoch": 0.3225806451612903,
"grad_norm": 14.694608688354492,
"learning_rate": 9.916367980884111e-06,
"loss": 1.1796,
"step": 500
},
{
"epoch": 0.3387096774193548,
"grad_norm": 15.619414329528809,
"learning_rate": 9.856630824372761e-06,
"loss": 1.1655,
"step": 525
},
{
"epoch": 0.3548387096774194,
"grad_norm": 13.177242279052734,
"learning_rate": 9.79689366786141e-06,
"loss": 1.143,
"step": 550
},
{
"epoch": 0.3709677419354839,
"grad_norm": 15.957605361938477,
"learning_rate": 9.737156511350062e-06,
"loss": 1.1414,
"step": 575
},
{
"epoch": 0.3870967741935484,
"grad_norm": 12.467620849609375,
"learning_rate": 9.67741935483871e-06,
"loss": 1.0964,
"step": 600
},
{
"epoch": 0.4032258064516129,
"grad_norm": 15.435978889465332,
"learning_rate": 9.61768219832736e-06,
"loss": 1.1512,
"step": 625
},
{
"epoch": 0.41935483870967744,
"grad_norm": 13.087624549865723,
"learning_rate": 9.557945041816011e-06,
"loss": 1.1338,
"step": 650
},
{
"epoch": 0.43548387096774194,
"grad_norm": 15.716456413269043,
"learning_rate": 9.49820788530466e-06,
"loss": 1.0783,
"step": 675
},
{
"epoch": 0.45161290322580644,
"grad_norm": 14.517507553100586,
"learning_rate": 9.43847072879331e-06,
"loss": 1.0728,
"step": 700
},
{
"epoch": 0.46774193548387094,
"grad_norm": 17.37009620666504,
"learning_rate": 9.37873357228196e-06,
"loss": 1.0317,
"step": 725
},
{
"epoch": 0.4838709677419355,
"grad_norm": 14.03701400756836,
"learning_rate": 9.31899641577061e-06,
"loss": 1.0347,
"step": 750
},
{
"epoch": 0.5,
"grad_norm": 12.431659698486328,
"learning_rate": 9.25925925925926e-06,
"loss": 1.0524,
"step": 775
},
{
"epoch": 0.5161290322580645,
"grad_norm": 12.746413230895996,
"learning_rate": 9.19952210274791e-06,
"loss": 1.0826,
"step": 800
},
{
"epoch": 0.532258064516129,
"grad_norm": 15.521408081054688,
"learning_rate": 9.13978494623656e-06,
"loss": 1.0377,
"step": 825
},
{
"epoch": 0.5483870967741935,
"grad_norm": 15.342901229858398,
"learning_rate": 9.08004778972521e-06,
"loss": 0.9762,
"step": 850
},
{
"epoch": 0.5645161290322581,
"grad_norm": 16.137371063232422,
"learning_rate": 9.02031063321386e-06,
"loss": 1.0725,
"step": 875
},
{
"epoch": 0.5806451612903226,
"grad_norm": 14.61146068572998,
"learning_rate": 8.96057347670251e-06,
"loss": 0.9554,
"step": 900
},
{
"epoch": 0.5967741935483871,
"grad_norm": 13.561723709106445,
"learning_rate": 8.90083632019116e-06,
"loss": 1.0127,
"step": 925
},
{
"epoch": 0.6129032258064516,
"grad_norm": 16.037729263305664,
"learning_rate": 8.84109916367981e-06,
"loss": 0.9621,
"step": 950
},
{
"epoch": 0.6290322580645161,
"grad_norm": 13.945268630981445,
"learning_rate": 8.78136200716846e-06,
"loss": 0.9479,
"step": 975
},
{
"epoch": 0.6451612903225806,
"grad_norm": 15.826567649841309,
"learning_rate": 8.72162485065711e-06,
"loss": 0.9789,
"step": 1000
},
{
"epoch": 0.6451612903225806,
"eval_cer": 60.21685813863431,
"eval_loss": 0.9020848870277405,
"eval_runtime": 953.7359,
"eval_samples_per_second": 2.392,
"eval_steps_per_second": 0.3,
"step": 1000
}
],
"logging_steps": 25,
"max_steps": 4650,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 4.61736640512e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}