|
{ |
|
"best_metric": 7.411630558722919, |
|
"best_model_checkpoint": "../whisper-NST2-unfreeze-constanti-low-lr/checkpoint-14000", |
|
"epoch": 1.0001, |
|
"global_step": 20002, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.47e-06, |
|
"loss": 1.7243, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.970000000000001e-06, |
|
"loss": 0.3597, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.4700000000000005e-06, |
|
"loss": 0.2237, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.970000000000001e-06, |
|
"loss": 0.1901, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.30687370896339417, |
|
"eval_runtime": 48.6375, |
|
"eval_samples_per_second": 2.056, |
|
"eval_steps_per_second": 0.144, |
|
"eval_wer": 14.823261117445838, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1714, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1581, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1451, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1323, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.2687492072582245, |
|
"eval_runtime": 59.4025, |
|
"eval_samples_per_second": 1.683, |
|
"eval_steps_per_second": 0.118, |
|
"eval_wer": 11.2884834663626, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1283, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1227, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1172, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1137, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.26204466819763184, |
|
"eval_runtime": 53.0968, |
|
"eval_samples_per_second": 1.883, |
|
"eval_steps_per_second": 0.132, |
|
"eval_wer": 10.832383124287343, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1104, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1071, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1022, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1022, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.297575443983078, |
|
"eval_runtime": 45.3592, |
|
"eval_samples_per_second": 2.205, |
|
"eval_steps_per_second": 0.154, |
|
"eval_wer": 9.007981755986318, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-05, |
|
"loss": 0.096, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0939, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0918, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0937, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.25842034816741943, |
|
"eval_runtime": 48.6209, |
|
"eval_samples_per_second": 2.057, |
|
"eval_steps_per_second": 0.144, |
|
"eval_wer": 9.578107183580387, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0895, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0874, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0875, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0875, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.27036869525909424, |
|
"eval_runtime": 76.4925, |
|
"eval_samples_per_second": 1.307, |
|
"eval_steps_per_second": 0.092, |
|
"eval_wer": 20.296465222348917, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0816, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0748, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0656, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0592, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.2750946283340454, |
|
"eval_runtime": 45.0139, |
|
"eval_samples_per_second": 2.222, |
|
"eval_steps_per_second": 0.156, |
|
"eval_wer": 9.007981755986318, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0553, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0542, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0533, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0488, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 0.27778884768486023, |
|
"eval_runtime": 49.8405, |
|
"eval_samples_per_second": 2.006, |
|
"eval_steps_per_second": 0.14, |
|
"eval_wer": 8.665906499429875, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0481, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0477, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1e-05, |
|
"loss": 0.047, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0475, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.2791878581047058, |
|
"eval_runtime": 48.8356, |
|
"eval_samples_per_second": 2.048, |
|
"eval_steps_per_second": 0.143, |
|
"eval_wer": 9.464082098061574, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.045, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0464, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0419, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0439, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.2879847586154938, |
|
"eval_runtime": 49.0674, |
|
"eval_samples_per_second": 2.038, |
|
"eval_steps_per_second": 0.143, |
|
"eval_wer": 8.323831242873432, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0413, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1e-05, |
|
"loss": 0.041, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0413, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0425, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.2954462766647339, |
|
"eval_runtime": 46.9453, |
|
"eval_samples_per_second": 2.13, |
|
"eval_steps_per_second": 0.149, |
|
"eval_wer": 8.551881413911062, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0409, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0389, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0403, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0416, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.2895849347114563, |
|
"eval_runtime": 72.8965, |
|
"eval_samples_per_second": 1.372, |
|
"eval_steps_per_second": 0.096, |
|
"eval_wer": 20.296465222348917, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0384, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0355, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0323, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0289, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.29897502064704895, |
|
"eval_runtime": 54.9916, |
|
"eval_samples_per_second": 1.818, |
|
"eval_steps_per_second": 0.127, |
|
"eval_wer": 7.98175598631699, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0266, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0264, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0267, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0229, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.3026929497718811, |
|
"eval_runtime": 48.2743, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 0.145, |
|
"eval_wer": 7.411630558722919, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0228, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1e-05, |
|
"loss": 0.024, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0238, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0248, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.29682403802871704, |
|
"eval_runtime": 47.3876, |
|
"eval_samples_per_second": 2.11, |
|
"eval_steps_per_second": 0.148, |
|
"eval_wer": 8.665906499429875, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1e-05, |
|
"loss": 0.022, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0241, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1e-05, |
|
"loss": 0.021, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0225, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.30999651551246643, |
|
"eval_runtime": 56.9907, |
|
"eval_samples_per_second": 1.755, |
|
"eval_steps_per_second": 0.123, |
|
"eval_wer": 8.551881413911062, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0211, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0214, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0215, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0222, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.3132005035877228, |
|
"eval_runtime": 54.3277, |
|
"eval_samples_per_second": 1.841, |
|
"eval_steps_per_second": 0.129, |
|
"eval_wer": 9.35005701254276, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0211, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0199, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1e-05, |
|
"loss": 0.021, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0219, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 0.3229832053184509, |
|
"eval_runtime": 56.2842, |
|
"eval_samples_per_second": 1.777, |
|
"eval_steps_per_second": 0.124, |
|
"eval_wer": 7.639680729760548, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0205, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0194, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0178, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0162, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 0.33802878856658936, |
|
"eval_runtime": 56.712, |
|
"eval_samples_per_second": 1.763, |
|
"eval_steps_per_second": 0.123, |
|
"eval_wer": 9.806157354618016, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0144, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0151, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0151, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0132, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 0.35617145895957947, |
|
"eval_runtime": 42.4011, |
|
"eval_samples_per_second": 2.358, |
|
"eval_steps_per_second": 0.165, |
|
"eval_wer": 8.551881413911062, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"step": 20000, |
|
"total_flos": 2.7701254859636736e+20, |
|
"train_loss": 0.0831078713297844, |
|
"train_runtime": 247222.6725, |
|
"train_samples_per_second": 7.766, |
|
"train_steps_per_second": 0.081 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 20001, |
|
"total_flos": 2.7702640069558272e+20, |
|
"train_loss": 1.7381801103855406e-06, |
|
"train_runtime": 64.0295, |
|
"train_samples_per_second": 29986.165, |
|
"train_steps_per_second": 312.356 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 20002, |
|
"total_flos": 2.7704025279479808e+20, |
|
"train_loss": 5.53145265653126e-07, |
|
"train_runtime": 51.8572, |
|
"train_samples_per_second": 37024.777, |
|
"train_steps_per_second": 385.675 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 2.7704025279479808e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|