{ "best_metric": 1.6190818548202515, "best_model_checkpoint": "bert_base_lda_5_v1_stsb/checkpoint-138", "epoch": 11.0, "eval_steps": 500, "global_step": 253, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.5698792934417725, "learning_rate": 4.9e-05, "loss": 2.6199, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.0762828613089408, "eval_loss": 2.5334348678588867, "eval_pearson": 0.08810304441387712, "eval_runtime": 0.9594, "eval_samples_per_second": 1563.484, "eval_spearmanr": 0.06446267820400446, "eval_steps_per_second": 6.254, "step": 23 }, { "epoch": 2.0, "grad_norm": 14.151108741760254, "learning_rate": 4.8e-05, "loss": 1.9109, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.36476464733810526, "eval_loss": 2.047811985015869, "eval_pearson": 0.36724434323508826, "eval_runtime": 0.9781, "eval_samples_per_second": 1533.516, "eval_spearmanr": 0.36228495144112227, "eval_steps_per_second": 6.134, "step": 46 }, { "epoch": 3.0, "grad_norm": 22.73438835144043, "learning_rate": 4.7e-05, "loss": 1.4094, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.5038154114449356, "eval_loss": 1.7320016622543335, "eval_pearson": 0.5080314849305678, "eval_runtime": 0.9668, "eval_samples_per_second": 1551.517, "eval_spearmanr": 0.4995993379593033, "eval_steps_per_second": 6.206, "step": 69 }, { "epoch": 4.0, "grad_norm": 10.041772842407227, "learning_rate": 4.600000000000001e-05, "loss": 0.9385, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.5257169620696511, "eval_loss": 1.9693756103515625, "eval_pearson": 0.5238214152085215, "eval_runtime": 0.9678, "eval_samples_per_second": 1549.828, "eval_spearmanr": 0.5276125089307807, "eval_steps_per_second": 6.199, "step": 92 }, { "epoch": 5.0, "grad_norm": 17.2652645111084, "learning_rate": 4.5e-05, "loss": 0.6759, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.5430029550606046, "eval_loss": 1.6463857889175415, "eval_pearson": 0.5449225085129881, "eval_runtime": 0.9788, "eval_samples_per_second": 1532.5, "eval_spearmanr": 0.5410834016082212, "eval_steps_per_second": 6.13, "step": 115 }, { "epoch": 6.0, "grad_norm": 11.10145092010498, "learning_rate": 4.4000000000000006e-05, "loss": 0.5133, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.562871620868381, "eval_loss": 1.6190818548202515, "eval_pearson": 0.5646223727019971, "eval_runtime": 0.9701, "eval_samples_per_second": 1546.162, "eval_spearmanr": 0.5611208690347649, "eval_steps_per_second": 6.185, "step": 138 }, { "epoch": 7.0, "grad_norm": 7.592547416687012, "learning_rate": 4.3e-05, "loss": 0.377, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.5338739723007462, "eval_loss": 1.7542551755905151, "eval_pearson": 0.5361279903927652, "eval_runtime": 0.9773, "eval_samples_per_second": 1534.871, "eval_spearmanr": 0.5316199542087273, "eval_steps_per_second": 6.139, "step": 161 }, { "epoch": 8.0, "grad_norm": 7.5484232902526855, "learning_rate": 4.2e-05, "loss": 0.3223, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.5719622639217419, "eval_loss": 1.6248753070831299, "eval_pearson": 0.5752071229769086, "eval_runtime": 0.9811, "eval_samples_per_second": 1528.848, "eval_spearmanr": 0.5687174048665751, "eval_steps_per_second": 6.115, "step": 184 }, { "epoch": 9.0, "grad_norm": 11.02824592590332, "learning_rate": 4.1e-05, "loss": 0.2679, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.5653106471913698, "eval_loss": 1.679746150970459, "eval_pearson": 0.5684789908543003, "eval_runtime": 0.9683, "eval_samples_per_second": 1549.087, "eval_spearmanr": 0.5621423035284394, "eval_steps_per_second": 6.196, "step": 207 }, { "epoch": 10.0, "grad_norm": 5.702976703643799, "learning_rate": 4e-05, "loss": 0.2156, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.5539869393273291, "eval_loss": 1.6664037704467773, "eval_pearson": 0.559161559056532, "eval_runtime": 0.9742, "eval_samples_per_second": 1539.752, "eval_spearmanr": 0.5488123195981262, "eval_steps_per_second": 6.159, "step": 230 }, { "epoch": 11.0, "grad_norm": 4.3981852531433105, "learning_rate": 3.9000000000000006e-05, "loss": 0.1962, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.5756187387849405, "eval_loss": 1.6273460388183594, "eval_pearson": 0.5784070931166707, "eval_runtime": 0.9851, "eval_samples_per_second": 1522.642, "eval_spearmanr": 0.5728303844532103, "eval_steps_per_second": 6.091, "step": 253 }, { "epoch": 11.0, "step": 253, "total_flos": 8319365318060544.0, "train_loss": 0.8587995838270828, "train_runtime": 139.3814, "train_samples_per_second": 2062.327, "train_steps_per_second": 8.251 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8319365318060544.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }