|
{ |
|
"best_metric": 0.9700717616323291, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-pos-ud-Arabic-PADT/checkpoint-2500", |
|
"epoch": 26.31578947368421, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 0.9117, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.947382550335571e-05, |
|
"loss": 0.154, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.893691275167786e-05, |
|
"loss": 0.1201, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 0.1031, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.786308724832216e-05, |
|
"loss": 0.0781, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.9692119448394457, |
|
"eval_loss": 0.11606918275356293, |
|
"eval_runtime": 4.6573, |
|
"eval_samples_per_second": 195.177, |
|
"eval_steps_per_second": 24.478, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.73261744966443e-05, |
|
"loss": 0.0712, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.678926174496645e-05, |
|
"loss": 0.0585, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.62523489932886e-05, |
|
"loss": 0.053, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.571543624161075e-05, |
|
"loss": 0.0439, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.51785234899329e-05, |
|
"loss": 0.0393, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_accuracy": 0.9695426436059393, |
|
"eval_loss": 0.12902098894119263, |
|
"eval_runtime": 4.6945, |
|
"eval_samples_per_second": 193.631, |
|
"eval_steps_per_second": 24.284, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 7.464161073825505e-05, |
|
"loss": 0.0338, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 7.410469798657718e-05, |
|
"loss": 0.03, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 7.356778523489933e-05, |
|
"loss": 0.0272, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 7.303087248322148e-05, |
|
"loss": 0.023, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 7.249395973154363e-05, |
|
"loss": 0.0233, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_accuracy": 0.9680544991567182, |
|
"eval_loss": 0.1491442620754242, |
|
"eval_runtime": 4.6841, |
|
"eval_samples_per_second": 194.061, |
|
"eval_steps_per_second": 24.338, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.195704697986577e-05, |
|
"loss": 0.0196, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 7.142013422818792e-05, |
|
"loss": 0.019, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 7.088322147651007e-05, |
|
"loss": 0.0148, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 7.034630872483222e-05, |
|
"loss": 0.0165, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 6.980939597315437e-05, |
|
"loss": 0.0143, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_accuracy": 0.9679222196501207, |
|
"eval_loss": 0.17728525400161743, |
|
"eval_runtime": 4.6951, |
|
"eval_samples_per_second": 193.605, |
|
"eval_steps_per_second": 24.28, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 6.927248322147651e-05, |
|
"loss": 0.0135, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 6.873557046979866e-05, |
|
"loss": 0.0115, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 6.819865771812081e-05, |
|
"loss": 0.0127, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 6.766174496644296e-05, |
|
"loss": 0.0109, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 6.712483221476511e-05, |
|
"loss": 0.0122, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"eval_accuracy": 0.9700717616323291, |
|
"eval_loss": 0.1844806671142578, |
|
"eval_runtime": 4.7037, |
|
"eval_samples_per_second": 193.251, |
|
"eval_steps_per_second": 24.236, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 6.658791946308726e-05, |
|
"loss": 0.0098, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 6.60510067114094e-05, |
|
"loss": 0.0104, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 6.551409395973155e-05, |
|
"loss": 0.0089, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 6.49771812080537e-05, |
|
"loss": 0.0092, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 6.444026845637585e-05, |
|
"loss": 0.0086, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_accuracy": 0.9694103640993419, |
|
"eval_loss": 0.18017974495887756, |
|
"eval_runtime": 4.6947, |
|
"eval_samples_per_second": 193.623, |
|
"eval_steps_per_second": 24.283, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 6.3903355704698e-05, |
|
"loss": 0.0084, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 6.336644295302015e-05, |
|
"loss": 0.0082, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 6.28295302013423e-05, |
|
"loss": 0.0068, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 6.229261744966444e-05, |
|
"loss": 0.0063, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 6.175570469798658e-05, |
|
"loss": 0.0059, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"eval_accuracy": 0.9698402724957836, |
|
"eval_loss": 0.20168748497962952, |
|
"eval_runtime": 4.6845, |
|
"eval_samples_per_second": 194.044, |
|
"eval_steps_per_second": 24.336, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 6.121879194630873e-05, |
|
"loss": 0.0067, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 6.068187919463087e-05, |
|
"loss": 0.0068, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.014496644295302e-05, |
|
"loss": 0.0074, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 5.960805369127517e-05, |
|
"loss": 0.006, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"learning_rate": 5.907114093959732e-05, |
|
"loss": 0.0061, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_accuracy": 0.969873342372433, |
|
"eval_loss": 0.1942814141511917, |
|
"eval_runtime": 4.6772, |
|
"eval_samples_per_second": 194.346, |
|
"eval_steps_per_second": 24.373, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.58, |
|
"learning_rate": 5.8534228187919466e-05, |
|
"loss": 0.0064, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 5.7997315436241614e-05, |
|
"loss": 0.0053, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 5.746040268456376e-05, |
|
"loss": 0.0048, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 5.692348993288591e-05, |
|
"loss": 0.0061, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 5.638657718120806e-05, |
|
"loss": 0.0052, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"eval_accuracy": 0.9685174774298092, |
|
"eval_loss": 0.2084885686635971, |
|
"eval_runtime": 4.6977, |
|
"eval_samples_per_second": 193.498, |
|
"eval_steps_per_second": 24.267, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 5.58496644295302e-05, |
|
"loss": 0.0049, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"learning_rate": 5.531275167785235e-05, |
|
"loss": 0.0041, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"learning_rate": 5.47758389261745e-05, |
|
"loss": 0.0054, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 5.4238926174496645e-05, |
|
"loss": 0.0049, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 5.370738255033558e-05, |
|
"loss": 0.0045, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"eval_accuracy": 0.9700056218790304, |
|
"eval_loss": 0.2086580991744995, |
|
"eval_runtime": 4.685, |
|
"eval_samples_per_second": 194.024, |
|
"eval_steps_per_second": 24.333, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"step": 5000, |
|
"total_flos": 2.088958878798336e+16, |
|
"train_loss": 0.0416443316757679, |
|
"train_runtime": 1404.0367, |
|
"train_samples_per_second": 341.871, |
|
"train_steps_per_second": 10.683 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 79, |
|
"total_flos": 2.088958878798336e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|