|
{ |
|
"best_metric": 1.6190818548202515, |
|
"best_model_checkpoint": "bert_base_lda_5_v1_stsb/checkpoint-138", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 253, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.5698792934417725, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.6199, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.0762828613089408, |
|
"eval_loss": 2.5334348678588867, |
|
"eval_pearson": 0.08810304441387712, |
|
"eval_runtime": 0.9594, |
|
"eval_samples_per_second": 1563.484, |
|
"eval_spearmanr": 0.06446267820400446, |
|
"eval_steps_per_second": 6.254, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.151108741760254, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.9109, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.36476464733810526, |
|
"eval_loss": 2.047811985015869, |
|
"eval_pearson": 0.36724434323508826, |
|
"eval_runtime": 0.9781, |
|
"eval_samples_per_second": 1533.516, |
|
"eval_spearmanr": 0.36228495144112227, |
|
"eval_steps_per_second": 6.134, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 22.73438835144043, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.4094, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.5038154114449356, |
|
"eval_loss": 1.7320016622543335, |
|
"eval_pearson": 0.5080314849305678, |
|
"eval_runtime": 0.9668, |
|
"eval_samples_per_second": 1551.517, |
|
"eval_spearmanr": 0.4995993379593033, |
|
"eval_steps_per_second": 6.206, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 10.041772842407227, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.9385, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.5257169620696511, |
|
"eval_loss": 1.9693756103515625, |
|
"eval_pearson": 0.5238214152085215, |
|
"eval_runtime": 0.9678, |
|
"eval_samples_per_second": 1549.828, |
|
"eval_spearmanr": 0.5276125089307807, |
|
"eval_steps_per_second": 6.199, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 17.2652645111084, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6759, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.5430029550606046, |
|
"eval_loss": 1.6463857889175415, |
|
"eval_pearson": 0.5449225085129881, |
|
"eval_runtime": 0.9788, |
|
"eval_samples_per_second": 1532.5, |
|
"eval_spearmanr": 0.5410834016082212, |
|
"eval_steps_per_second": 6.13, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 11.10145092010498, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.5133, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.562871620868381, |
|
"eval_loss": 1.6190818548202515, |
|
"eval_pearson": 0.5646223727019971, |
|
"eval_runtime": 0.9701, |
|
"eval_samples_per_second": 1546.162, |
|
"eval_spearmanr": 0.5611208690347649, |
|
"eval_steps_per_second": 6.185, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 7.592547416687012, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.377, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.5338739723007462, |
|
"eval_loss": 1.7542551755905151, |
|
"eval_pearson": 0.5361279903927652, |
|
"eval_runtime": 0.9773, |
|
"eval_samples_per_second": 1534.871, |
|
"eval_spearmanr": 0.5316199542087273, |
|
"eval_steps_per_second": 6.139, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 7.5484232902526855, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.3223, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.5719622639217419, |
|
"eval_loss": 1.6248753070831299, |
|
"eval_pearson": 0.5752071229769086, |
|
"eval_runtime": 0.9811, |
|
"eval_samples_per_second": 1528.848, |
|
"eval_spearmanr": 0.5687174048665751, |
|
"eval_steps_per_second": 6.115, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 11.02824592590332, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.2679, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.5653106471913698, |
|
"eval_loss": 1.679746150970459, |
|
"eval_pearson": 0.5684789908543003, |
|
"eval_runtime": 0.9683, |
|
"eval_samples_per_second": 1549.087, |
|
"eval_spearmanr": 0.5621423035284394, |
|
"eval_steps_per_second": 6.196, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.702976703643799, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2156, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.5539869393273291, |
|
"eval_loss": 1.6664037704467773, |
|
"eval_pearson": 0.559161559056532, |
|
"eval_runtime": 0.9742, |
|
"eval_samples_per_second": 1539.752, |
|
"eval_spearmanr": 0.5488123195981262, |
|
"eval_steps_per_second": 6.159, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.3981852531433105, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.1962, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.5756187387849405, |
|
"eval_loss": 1.6273460388183594, |
|
"eval_pearson": 0.5784070931166707, |
|
"eval_runtime": 0.9851, |
|
"eval_samples_per_second": 1522.642, |
|
"eval_spearmanr": 0.5728303844532103, |
|
"eval_steps_per_second": 6.091, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 253, |
|
"total_flos": 8319365318060544.0, |
|
"train_loss": 0.8587995838270828, |
|
"train_runtime": 139.3814, |
|
"train_samples_per_second": 2062.327, |
|
"train_steps_per_second": 8.251 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8319365318060544.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|