|
{ |
|
"best_metric": 0.9720466022505865, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-English-EWT/checkpoint-3500", |
|
"epoch": 15.306122448979592, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.4315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.966778523489933e-05, |
|
"loss": 0.1414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.933221476510068e-05, |
|
"loss": 0.1147, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8996644295302016e-05, |
|
"loss": 0.0869, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.8664429530201344e-05, |
|
"loss": 0.0599, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.9675931448566544, |
|
"eval_loss": 0.12289105355739594, |
|
"eval_runtime": 10.3063, |
|
"eval_samples_per_second": 194.153, |
|
"eval_steps_per_second": 24.354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.8328859060402684e-05, |
|
"loss": 0.0573, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.799328859060403e-05, |
|
"loss": 0.0606, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.7657718120805376e-05, |
|
"loss": 0.0503, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.7322147651006715e-05, |
|
"loss": 0.0279, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.698657718120806e-05, |
|
"loss": 0.0285, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.9700584516282954, |
|
"eval_loss": 0.12456455826759338, |
|
"eval_runtime": 10.3011, |
|
"eval_samples_per_second": 194.252, |
|
"eval_steps_per_second": 24.366, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.66510067114094e-05, |
|
"loss": 0.0309, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.631543624161074e-05, |
|
"loss": 0.0263, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.597986577181208e-05, |
|
"loss": 0.0162, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 4.5644295302013425e-05, |
|
"loss": 0.0175, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.5308724832214764e-05, |
|
"loss": 0.0165, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_accuracy": 0.9707741858523202, |
|
"eval_loss": 0.14595353603363037, |
|
"eval_runtime": 10.2794, |
|
"eval_samples_per_second": 194.66, |
|
"eval_steps_per_second": 24.418, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.497315436241611e-05, |
|
"loss": 0.016, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 4.463758389261745e-05, |
|
"loss": 0.0107, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.4302013422818796e-05, |
|
"loss": 0.0102, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 4.3966442953020135e-05, |
|
"loss": 0.0137, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 4.363087248322148e-05, |
|
"loss": 0.0084, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.970575370790091, |
|
"eval_loss": 0.16072939336299896, |
|
"eval_runtime": 10.2179, |
|
"eval_samples_per_second": 195.833, |
|
"eval_steps_per_second": 24.565, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.329530201342282e-05, |
|
"loss": 0.0061, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 4.2959731543624166e-05, |
|
"loss": 0.0076, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 4.2624161073825505e-05, |
|
"loss": 0.009, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.2288590604026845e-05, |
|
"loss": 0.0076, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.195302013422819e-05, |
|
"loss": 0.005, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_accuracy": 0.9695812954789454, |
|
"eval_loss": 0.18822818994522095, |
|
"eval_runtime": 10.2296, |
|
"eval_samples_per_second": 195.609, |
|
"eval_steps_per_second": 24.537, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.161744966442953e-05, |
|
"loss": 0.0065, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 4.1281879194630876e-05, |
|
"loss": 0.0056, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 4.0946308724832215e-05, |
|
"loss": 0.0058, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 4.061073825503356e-05, |
|
"loss": 0.0045, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 4.02751677852349e-05, |
|
"loss": 0.0054, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_accuracy": 0.9698993995785121, |
|
"eval_loss": 0.17441630363464355, |
|
"eval_runtime": 10.2421, |
|
"eval_samples_per_second": 195.37, |
|
"eval_steps_per_second": 24.507, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 3.993959731543625e-05, |
|
"loss": 0.0046, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 3.9604026845637586e-05, |
|
"loss": 0.0056, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3.926845637583893e-05, |
|
"loss": 0.0039, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 3.893288590604027e-05, |
|
"loss": 0.004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 3.859731543624161e-05, |
|
"loss": 0.0058, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_accuracy": 0.9720466022505865, |
|
"eval_loss": 0.16418756544589996, |
|
"eval_runtime": 10.248, |
|
"eval_samples_per_second": 195.258, |
|
"eval_steps_per_second": 24.493, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 3.826174496644295e-05, |
|
"loss": 0.0039, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 3.7926174496644296e-05, |
|
"loss": 0.0037, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 3.7590604026845635e-05, |
|
"loss": 0.0026, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 3.725503355704698e-05, |
|
"loss": 0.0034, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 3.691946308724833e-05, |
|
"loss": 0.0034, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"eval_accuracy": 0.9707344228398743, |
|
"eval_loss": 0.1923176646232605, |
|
"eval_runtime": 10.2481, |
|
"eval_samples_per_second": 195.256, |
|
"eval_steps_per_second": 24.492, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 3.6583892617449666e-05, |
|
"loss": 0.0033, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 3.624832214765101e-05, |
|
"loss": 0.0029, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 3.591275167785235e-05, |
|
"loss": 0.0046, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 3.55771812080537e-05, |
|
"loss": 0.0037, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 3.524161073825504e-05, |
|
"loss": 0.0032, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"eval_accuracy": 0.9701777406656328, |
|
"eval_loss": 0.19770818948745728, |
|
"eval_runtime": 10.2544, |
|
"eval_samples_per_second": 195.135, |
|
"eval_steps_per_second": 24.477, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 3.4906040268456376e-05, |
|
"loss": 0.0034, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 3.4570469798657716e-05, |
|
"loss": 0.0038, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 3.423489932885906e-05, |
|
"loss": 0.0027, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 3.38993288590604e-05, |
|
"loss": 0.0031, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 3.356375838926175e-05, |
|
"loss": 0.0023, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"eval_accuracy": 0.9706548968149827, |
|
"eval_loss": 0.1978425234556198, |
|
"eval_runtime": 10.2317, |
|
"eval_samples_per_second": 195.569, |
|
"eval_steps_per_second": 24.532, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 3.3228187919463086e-05, |
|
"loss": 0.0034, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 3.289261744966443e-05, |
|
"loss": 0.0027, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 3.255704697986577e-05, |
|
"loss": 0.0036, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 3.222147651006712e-05, |
|
"loss": 0.0034, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 3.188590604026846e-05, |
|
"loss": 0.0034, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"eval_accuracy": 0.9698596365660662, |
|
"eval_loss": 0.2152303010225296, |
|
"eval_runtime": 10.2495, |
|
"eval_samples_per_second": 195.228, |
|
"eval_steps_per_second": 24.489, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 3.1553691275167785e-05, |
|
"loss": 0.0029, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 3.1218120805369124e-05, |
|
"loss": 0.0023, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 3.088255033557047e-05, |
|
"loss": 0.0029, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 3.0546979865771816e-05, |
|
"loss": 0.0033, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 3.0211409395973156e-05, |
|
"loss": 0.0028, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"eval_accuracy": 0.9705356077776452, |
|
"eval_loss": 0.20026059448719025, |
|
"eval_runtime": 10.2479, |
|
"eval_samples_per_second": 195.26, |
|
"eval_steps_per_second": 24.493, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"step": 6000, |
|
"total_flos": 2.508593046513408e+16, |
|
"train_loss": 0.03989025120437145, |
|
"train_runtime": 1640.9678, |
|
"train_samples_per_second": 292.51, |
|
"train_steps_per_second": 9.141 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 39, |
|
"total_flos": 2.508593046513408e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|