bert_base_lda_5_v1_rte / trainer_state.json
gokulsrinivasagan's picture
End of training
555763f verified
raw
history blame
7.5 kB
{
"best_metric": 0.691815197467804,
"best_model_checkpoint": "bert_base_lda_5_v1_rte/checkpoint-110",
"epoch": 16.0,
"eval_steps": 500,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.441200315952301,
"learning_rate": 0.00098,
"loss": 1.0506,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.5270758122743683,
"eval_loss": 0.7786834239959717,
"eval_runtime": 0.1826,
"eval_samples_per_second": 1517.252,
"eval_steps_per_second": 10.955,
"step": 10
},
{
"epoch": 2.0,
"grad_norm": 2.4710278511047363,
"learning_rate": 0.00096,
"loss": 0.7306,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.8054137229919434,
"eval_runtime": 0.1909,
"eval_samples_per_second": 1450.764,
"eval_steps_per_second": 10.475,
"step": 20
},
{
"epoch": 3.0,
"grad_norm": 0.8589054942131042,
"learning_rate": 0.00094,
"loss": 0.7253,
"step": 30
},
{
"epoch": 3.0,
"eval_accuracy": 0.5234657039711191,
"eval_loss": 0.6984149217605591,
"eval_runtime": 0.1857,
"eval_samples_per_second": 1492.0,
"eval_steps_per_second": 10.773,
"step": 30
},
{
"epoch": 4.0,
"grad_norm": 0.399823933839798,
"learning_rate": 0.00092,
"loss": 0.7014,
"step": 40
},
{
"epoch": 4.0,
"eval_accuracy": 0.5270758122743683,
"eval_loss": 0.6923157572746277,
"eval_runtime": 0.1846,
"eval_samples_per_second": 1500.535,
"eval_steps_per_second": 10.834,
"step": 40
},
{
"epoch": 5.0,
"grad_norm": 0.2132512331008911,
"learning_rate": 0.0009000000000000001,
"loss": 0.7016,
"step": 50
},
{
"epoch": 5.0,
"eval_accuracy": 0.51985559566787,
"eval_loss": 0.7043237090110779,
"eval_runtime": 0.1943,
"eval_samples_per_second": 1425.341,
"eval_steps_per_second": 10.291,
"step": 50
},
{
"epoch": 6.0,
"grad_norm": 0.2804683446884155,
"learning_rate": 0.00088,
"loss": 0.6998,
"step": 60
},
{
"epoch": 6.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6968496441841125,
"eval_runtime": 0.187,
"eval_samples_per_second": 1481.54,
"eval_steps_per_second": 10.697,
"step": 60
},
{
"epoch": 7.0,
"grad_norm": 0.1722436547279358,
"learning_rate": 0.00086,
"loss": 0.6943,
"step": 70
},
{
"epoch": 7.0,
"eval_accuracy": 0.51985559566787,
"eval_loss": 0.6937753558158875,
"eval_runtime": 0.1875,
"eval_samples_per_second": 1477.581,
"eval_steps_per_second": 10.668,
"step": 70
},
{
"epoch": 8.0,
"grad_norm": 0.15857921540737152,
"learning_rate": 0.00084,
"loss": 0.6937,
"step": 80
},
{
"epoch": 8.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.695806086063385,
"eval_runtime": 0.1858,
"eval_samples_per_second": 1490.666,
"eval_steps_per_second": 10.763,
"step": 80
},
{
"epoch": 9.0,
"grad_norm": 0.1902921497821808,
"learning_rate": 0.00082,
"loss": 0.6962,
"step": 90
},
{
"epoch": 9.0,
"eval_accuracy": 0.48014440433212996,
"eval_loss": 0.6922100782394409,
"eval_runtime": 0.1902,
"eval_samples_per_second": 1456.246,
"eval_steps_per_second": 10.514,
"step": 90
},
{
"epoch": 10.0,
"grad_norm": 0.2601146101951599,
"learning_rate": 0.0008,
"loss": 0.6955,
"step": 100
},
{
"epoch": 10.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6962996125221252,
"eval_runtime": 0.1893,
"eval_samples_per_second": 1463.542,
"eval_steps_per_second": 10.567,
"step": 100
},
{
"epoch": 11.0,
"grad_norm": 0.403894305229187,
"learning_rate": 0.0007800000000000001,
"loss": 0.6948,
"step": 110
},
{
"epoch": 11.0,
"eval_accuracy": 0.5270758122743683,
"eval_loss": 0.691815197467804,
"eval_runtime": 0.1877,
"eval_samples_per_second": 1475.946,
"eval_steps_per_second": 10.657,
"step": 110
},
{
"epoch": 12.0,
"grad_norm": 0.07746425271034241,
"learning_rate": 0.00076,
"loss": 0.6931,
"step": 120
},
{
"epoch": 12.0,
"eval_accuracy": 0.51985559566787,
"eval_loss": 0.6937612891197205,
"eval_runtime": 0.1885,
"eval_samples_per_second": 1469.253,
"eval_steps_per_second": 10.608,
"step": 120
},
{
"epoch": 13.0,
"grad_norm": 0.0822499617934227,
"learning_rate": 0.00074,
"loss": 0.6932,
"step": 130
},
{
"epoch": 13.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6948471069335938,
"eval_runtime": 0.1857,
"eval_samples_per_second": 1491.974,
"eval_steps_per_second": 10.772,
"step": 130
},
{
"epoch": 14.0,
"grad_norm": 0.06322963535785675,
"learning_rate": 0.0007199999999999999,
"loss": 0.6934,
"step": 140
},
{
"epoch": 14.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6921818852424622,
"eval_runtime": 0.1873,
"eval_samples_per_second": 1478.54,
"eval_steps_per_second": 10.675,
"step": 140
},
{
"epoch": 15.0,
"grad_norm": 0.08335640281438828,
"learning_rate": 0.0007,
"loss": 0.6933,
"step": 150
},
{
"epoch": 15.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6936061382293701,
"eval_runtime": 0.1851,
"eval_samples_per_second": 1496.787,
"eval_steps_per_second": 10.807,
"step": 150
},
{
"epoch": 16.0,
"grad_norm": 0.07828615605831146,
"learning_rate": 0.00068,
"loss": 0.6933,
"step": 160
},
{
"epoch": 16.0,
"eval_accuracy": 0.51985559566787,
"eval_loss": 0.6943253874778748,
"eval_runtime": 0.1872,
"eval_samples_per_second": 1479.887,
"eval_steps_per_second": 10.685,
"step": 160
},
{
"epoch": 16.0,
"step": 160,
"total_flos": 5241172222771200.0,
"train_loss": 0.7218841880559921,
"train_runtime": 103.9484,
"train_samples_per_second": 1197.71,
"train_steps_per_second": 4.81
}
],
"logging_steps": 1,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5241172222771200.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}