pere's picture
End of training
62b0686
{
"best_metric": 7.411630558722919,
"best_model_checkpoint": "../whisper-NST2-unfreeze-constanti-low-lr/checkpoint-14000",
"epoch": 1.0001,
"global_step": 20002,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.47e-06,
"loss": 1.7243,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 4.970000000000001e-06,
"loss": 0.3597,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 7.4700000000000005e-06,
"loss": 0.2237,
"step": 750
},
{
"epoch": 0.05,
"learning_rate": 9.970000000000001e-06,
"loss": 0.1901,
"step": 1000
},
{
"epoch": 0.05,
"eval_loss": 0.30687370896339417,
"eval_runtime": 48.6375,
"eval_samples_per_second": 2.056,
"eval_steps_per_second": 0.144,
"eval_wer": 14.823261117445838,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 1e-05,
"loss": 0.1714,
"step": 1250
},
{
"epoch": 0.07,
"learning_rate": 1e-05,
"loss": 0.1581,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 1e-05,
"loss": 0.1451,
"step": 1750
},
{
"epoch": 0.1,
"learning_rate": 1e-05,
"loss": 0.1323,
"step": 2000
},
{
"epoch": 0.1,
"eval_loss": 0.2687492072582245,
"eval_runtime": 59.4025,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.118,
"eval_wer": 11.2884834663626,
"step": 2000
},
{
"epoch": 0.11,
"learning_rate": 1e-05,
"loss": 0.1283,
"step": 2250
},
{
"epoch": 0.12,
"learning_rate": 1e-05,
"loss": 0.1227,
"step": 2500
},
{
"epoch": 0.14,
"learning_rate": 1e-05,
"loss": 0.1172,
"step": 2750
},
{
"epoch": 0.15,
"learning_rate": 1e-05,
"loss": 0.1137,
"step": 3000
},
{
"epoch": 0.15,
"eval_loss": 0.26204466819763184,
"eval_runtime": 53.0968,
"eval_samples_per_second": 1.883,
"eval_steps_per_second": 0.132,
"eval_wer": 10.832383124287343,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 1e-05,
"loss": 0.1104,
"step": 3250
},
{
"epoch": 0.17,
"learning_rate": 1e-05,
"loss": 0.1071,
"step": 3500
},
{
"epoch": 0.19,
"learning_rate": 1e-05,
"loss": 0.1022,
"step": 3750
},
{
"epoch": 0.2,
"learning_rate": 1e-05,
"loss": 0.1022,
"step": 4000
},
{
"epoch": 0.2,
"eval_loss": 0.297575443983078,
"eval_runtime": 45.3592,
"eval_samples_per_second": 2.205,
"eval_steps_per_second": 0.154,
"eval_wer": 9.007981755986318,
"step": 4000
},
{
"epoch": 0.21,
"learning_rate": 1e-05,
"loss": 0.096,
"step": 4250
},
{
"epoch": 0.23,
"learning_rate": 1e-05,
"loss": 0.0939,
"step": 4500
},
{
"epoch": 0.24,
"learning_rate": 1e-05,
"loss": 0.0918,
"step": 4750
},
{
"epoch": 0.25,
"learning_rate": 1e-05,
"loss": 0.0937,
"step": 5000
},
{
"epoch": 0.25,
"eval_loss": 0.25842034816741943,
"eval_runtime": 48.6209,
"eval_samples_per_second": 2.057,
"eval_steps_per_second": 0.144,
"eval_wer": 9.578107183580387,
"step": 5000
},
{
"epoch": 0.26,
"learning_rate": 1e-05,
"loss": 0.0895,
"step": 5250
},
{
"epoch": 0.28,
"learning_rate": 1e-05,
"loss": 0.0874,
"step": 5500
},
{
"epoch": 0.29,
"learning_rate": 1e-05,
"loss": 0.0875,
"step": 5750
},
{
"epoch": 0.3,
"learning_rate": 1e-05,
"loss": 0.0875,
"step": 6000
},
{
"epoch": 0.3,
"eval_loss": 0.27036869525909424,
"eval_runtime": 76.4925,
"eval_samples_per_second": 1.307,
"eval_steps_per_second": 0.092,
"eval_wer": 20.296465222348917,
"step": 6000
},
{
"epoch": 1.01,
"learning_rate": 1e-05,
"loss": 0.0816,
"step": 6250
},
{
"epoch": 1.02,
"learning_rate": 1e-05,
"loss": 0.0748,
"step": 6500
},
{
"epoch": 1.04,
"learning_rate": 1e-05,
"loss": 0.0656,
"step": 6750
},
{
"epoch": 1.05,
"learning_rate": 1e-05,
"loss": 0.0592,
"step": 7000
},
{
"epoch": 1.05,
"eval_loss": 0.2750946283340454,
"eval_runtime": 45.0139,
"eval_samples_per_second": 2.222,
"eval_steps_per_second": 0.156,
"eval_wer": 9.007981755986318,
"step": 7000
},
{
"epoch": 1.06,
"learning_rate": 1e-05,
"loss": 0.0553,
"step": 7250
},
{
"epoch": 1.07,
"learning_rate": 1e-05,
"loss": 0.0542,
"step": 7500
},
{
"epoch": 1.09,
"learning_rate": 1e-05,
"loss": 0.0533,
"step": 7750
},
{
"epoch": 1.1,
"learning_rate": 1e-05,
"loss": 0.0488,
"step": 8000
},
{
"epoch": 1.1,
"eval_loss": 0.27778884768486023,
"eval_runtime": 49.8405,
"eval_samples_per_second": 2.006,
"eval_steps_per_second": 0.14,
"eval_wer": 8.665906499429875,
"step": 8000
},
{
"epoch": 1.11,
"learning_rate": 1e-05,
"loss": 0.0481,
"step": 8250
},
{
"epoch": 1.12,
"learning_rate": 1e-05,
"loss": 0.0477,
"step": 8500
},
{
"epoch": 1.14,
"learning_rate": 1e-05,
"loss": 0.047,
"step": 8750
},
{
"epoch": 1.15,
"learning_rate": 1e-05,
"loss": 0.0475,
"step": 9000
},
{
"epoch": 1.15,
"eval_loss": 0.2791878581047058,
"eval_runtime": 48.8356,
"eval_samples_per_second": 2.048,
"eval_steps_per_second": 0.143,
"eval_wer": 9.464082098061574,
"step": 9000
},
{
"epoch": 1.16,
"learning_rate": 1e-05,
"loss": 0.045,
"step": 9250
},
{
"epoch": 1.17,
"learning_rate": 1e-05,
"loss": 0.0464,
"step": 9500
},
{
"epoch": 1.19,
"learning_rate": 1e-05,
"loss": 0.0419,
"step": 9750
},
{
"epoch": 1.2,
"learning_rate": 1e-05,
"loss": 0.0439,
"step": 10000
},
{
"epoch": 1.2,
"eval_loss": 0.2879847586154938,
"eval_runtime": 49.0674,
"eval_samples_per_second": 2.038,
"eval_steps_per_second": 0.143,
"eval_wer": 8.323831242873432,
"step": 10000
},
{
"epoch": 1.21,
"learning_rate": 1e-05,
"loss": 0.0413,
"step": 10250
},
{
"epoch": 1.22,
"learning_rate": 1e-05,
"loss": 0.041,
"step": 10500
},
{
"epoch": 1.24,
"learning_rate": 1e-05,
"loss": 0.0413,
"step": 10750
},
{
"epoch": 1.25,
"learning_rate": 1e-05,
"loss": 0.0425,
"step": 11000
},
{
"epoch": 1.25,
"eval_loss": 0.2954462766647339,
"eval_runtime": 46.9453,
"eval_samples_per_second": 2.13,
"eval_steps_per_second": 0.149,
"eval_wer": 8.551881413911062,
"step": 11000
},
{
"epoch": 1.26,
"learning_rate": 1e-05,
"loss": 0.0409,
"step": 11250
},
{
"epoch": 1.27,
"learning_rate": 1e-05,
"loss": 0.0389,
"step": 11500
},
{
"epoch": 1.29,
"learning_rate": 1e-05,
"loss": 0.0403,
"step": 11750
},
{
"epoch": 1.3,
"learning_rate": 1e-05,
"loss": 0.0416,
"step": 12000
},
{
"epoch": 1.3,
"eval_loss": 0.2895849347114563,
"eval_runtime": 72.8965,
"eval_samples_per_second": 1.372,
"eval_steps_per_second": 0.096,
"eval_wer": 20.296465222348917,
"step": 12000
},
{
"epoch": 2.01,
"learning_rate": 1e-05,
"loss": 0.0384,
"step": 12250
},
{
"epoch": 2.02,
"learning_rate": 1e-05,
"loss": 0.0355,
"step": 12500
},
{
"epoch": 2.03,
"learning_rate": 1e-05,
"loss": 0.0323,
"step": 12750
},
{
"epoch": 2.05,
"learning_rate": 1e-05,
"loss": 0.0289,
"step": 13000
},
{
"epoch": 2.05,
"eval_loss": 0.29897502064704895,
"eval_runtime": 54.9916,
"eval_samples_per_second": 1.818,
"eval_steps_per_second": 0.127,
"eval_wer": 7.98175598631699,
"step": 13000
},
{
"epoch": 2.06,
"learning_rate": 1e-05,
"loss": 0.0266,
"step": 13250
},
{
"epoch": 2.07,
"learning_rate": 1e-05,
"loss": 0.0264,
"step": 13500
},
{
"epoch": 2.08,
"learning_rate": 1e-05,
"loss": 0.0267,
"step": 13750
},
{
"epoch": 2.1,
"learning_rate": 1e-05,
"loss": 0.0229,
"step": 14000
},
{
"epoch": 2.1,
"eval_loss": 0.3026929497718811,
"eval_runtime": 48.2743,
"eval_samples_per_second": 2.071,
"eval_steps_per_second": 0.145,
"eval_wer": 7.411630558722919,
"step": 14000
},
{
"epoch": 2.11,
"learning_rate": 1e-05,
"loss": 0.0228,
"step": 14250
},
{
"epoch": 2.12,
"learning_rate": 1e-05,
"loss": 0.024,
"step": 14500
},
{
"epoch": 2.13,
"learning_rate": 1e-05,
"loss": 0.0238,
"step": 14750
},
{
"epoch": 2.15,
"learning_rate": 1e-05,
"loss": 0.0248,
"step": 15000
},
{
"epoch": 2.15,
"eval_loss": 0.29682403802871704,
"eval_runtime": 47.3876,
"eval_samples_per_second": 2.11,
"eval_steps_per_second": 0.148,
"eval_wer": 8.665906499429875,
"step": 15000
},
{
"epoch": 2.16,
"learning_rate": 1e-05,
"loss": 0.022,
"step": 15250
},
{
"epoch": 2.17,
"learning_rate": 1e-05,
"loss": 0.0241,
"step": 15500
},
{
"epoch": 2.18,
"learning_rate": 1e-05,
"loss": 0.021,
"step": 15750
},
{
"epoch": 2.2,
"learning_rate": 1e-05,
"loss": 0.0225,
"step": 16000
},
{
"epoch": 2.2,
"eval_loss": 0.30999651551246643,
"eval_runtime": 56.9907,
"eval_samples_per_second": 1.755,
"eval_steps_per_second": 0.123,
"eval_wer": 8.551881413911062,
"step": 16000
},
{
"epoch": 2.21,
"learning_rate": 1e-05,
"loss": 0.0211,
"step": 16250
},
{
"epoch": 2.22,
"learning_rate": 1e-05,
"loss": 0.0214,
"step": 16500
},
{
"epoch": 2.23,
"learning_rate": 1e-05,
"loss": 0.0215,
"step": 16750
},
{
"epoch": 2.25,
"learning_rate": 1e-05,
"loss": 0.0222,
"step": 17000
},
{
"epoch": 2.25,
"eval_loss": 0.3132005035877228,
"eval_runtime": 54.3277,
"eval_samples_per_second": 1.841,
"eval_steps_per_second": 0.129,
"eval_wer": 9.35005701254276,
"step": 17000
},
{
"epoch": 2.26,
"learning_rate": 1e-05,
"loss": 0.0211,
"step": 17250
},
{
"epoch": 2.27,
"learning_rate": 1e-05,
"loss": 0.0199,
"step": 17500
},
{
"epoch": 2.28,
"learning_rate": 1e-05,
"loss": 0.021,
"step": 17750
},
{
"epoch": 2.3,
"learning_rate": 1e-05,
"loss": 0.0219,
"step": 18000
},
{
"epoch": 2.3,
"eval_loss": 0.3229832053184509,
"eval_runtime": 56.2842,
"eval_samples_per_second": 1.777,
"eval_steps_per_second": 0.124,
"eval_wer": 7.639680729760548,
"step": 18000
},
{
"epoch": 3.01,
"learning_rate": 1e-05,
"loss": 0.0205,
"step": 18250
},
{
"epoch": 3.02,
"learning_rate": 1e-05,
"loss": 0.0194,
"step": 18500
},
{
"epoch": 3.03,
"learning_rate": 1e-05,
"loss": 0.0178,
"step": 18750
},
{
"epoch": 3.04,
"learning_rate": 1e-05,
"loss": 0.0162,
"step": 19000
},
{
"epoch": 3.04,
"eval_loss": 0.33802878856658936,
"eval_runtime": 56.712,
"eval_samples_per_second": 1.763,
"eval_steps_per_second": 0.123,
"eval_wer": 9.806157354618016,
"step": 19000
},
{
"epoch": 3.06,
"learning_rate": 1e-05,
"loss": 0.0144,
"step": 19250
},
{
"epoch": 3.07,
"learning_rate": 1e-05,
"loss": 0.0151,
"step": 19500
},
{
"epoch": 3.08,
"learning_rate": 1e-05,
"loss": 0.0151,
"step": 19750
},
{
"epoch": 3.09,
"learning_rate": 1e-05,
"loss": 0.0132,
"step": 20000
},
{
"epoch": 3.09,
"eval_loss": 0.35617145895957947,
"eval_runtime": 42.4011,
"eval_samples_per_second": 2.358,
"eval_steps_per_second": 0.165,
"eval_wer": 8.551881413911062,
"step": 20000
},
{
"epoch": 3.09,
"step": 20000,
"total_flos": 2.7701254859636736e+20,
"train_loss": 0.0831078713297844,
"train_runtime": 247222.6725,
"train_samples_per_second": 7.766,
"train_steps_per_second": 0.081
},
{
"epoch": 1.0,
"step": 20001,
"total_flos": 2.7702640069558272e+20,
"train_loss": 1.7381801103855406e-06,
"train_runtime": 64.0295,
"train_samples_per_second": 29986.165,
"train_steps_per_second": 312.356
},
{
"epoch": 1.0,
"step": 20002,
"total_flos": 2.7704025279479808e+20,
"train_loss": 5.53145265653126e-07,
"train_runtime": 51.8572,
"train_samples_per_second": 37024.777,
"train_steps_per_second": 385.675
}
],
"max_steps": 20000,
"num_train_epochs": 9223372036854775807,
"total_flos": 2.7704025279479808e+20,
"trial_name": null,
"trial_params": null
}