{ "best_metric": 0.4396927058696747, "best_model_checkpoint": "bert_tiny_lda_5_v1_sst2/checkpoint-264", "epoch": 6.0, "eval_steps": 500, "global_step": 1584, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 9.570955276489258, "learning_rate": 4.9e-05, "loss": 0.402, "step": 264 }, { "epoch": 1.0, "eval_accuracy": 0.8153669724770642, "eval_loss": 0.4396927058696747, "eval_runtime": 0.2768, "eval_samples_per_second": 3150.401, "eval_steps_per_second": 14.451, "step": 264 }, { "epoch": 2.0, "grad_norm": 10.17089557647705, "learning_rate": 4.8e-05, "loss": 0.2385, "step": 528 }, { "epoch": 2.0, "eval_accuracy": 0.8038990825688074, "eval_loss": 0.49887505173683167, "eval_runtime": 0.2789, "eval_samples_per_second": 3126.276, "eval_steps_per_second": 14.341, "step": 528 }, { "epoch": 3.0, "grad_norm": 6.174217224121094, "learning_rate": 4.7e-05, "loss": 0.1813, "step": 792 }, { "epoch": 3.0, "eval_accuracy": 0.7970183486238532, "eval_loss": 0.5259420871734619, "eval_runtime": 0.2888, "eval_samples_per_second": 3019.151, "eval_steps_per_second": 13.849, "step": 792 }, { "epoch": 4.0, "grad_norm": 6.113431930541992, "learning_rate": 4.600000000000001e-05, "loss": 0.1449, "step": 1056 }, { "epoch": 4.0, "eval_accuracy": 0.8004587155963303, "eval_loss": 0.5909135341644287, "eval_runtime": 0.2819, "eval_samples_per_second": 3093.767, "eval_steps_per_second": 14.192, "step": 1056 }, { "epoch": 5.0, "grad_norm": 3.880650043487549, "learning_rate": 4.5e-05, "loss": 0.1159, "step": 1320 }, { "epoch": 5.0, "eval_accuracy": 0.8027522935779816, "eval_loss": 0.6297979950904846, "eval_runtime": 0.2757, "eval_samples_per_second": 3162.953, "eval_steps_per_second": 14.509, "step": 1320 }, { "epoch": 6.0, "grad_norm": 4.169188976287842, "learning_rate": 4.4000000000000006e-05, "loss": 0.0955, "step": 1584 }, { "epoch": 6.0, "eval_accuracy": 0.7935779816513762, "eval_loss": 0.6731319427490234, "eval_runtime": 0.2813, "eval_samples_per_second": 3100.384, "eval_steps_per_second": 14.222, "step": 1584 }, { "epoch": 6.0, "step": 1584, "total_flos": 1.0596790573590528e+16, "train_loss": 0.19634188305247913, "train_runtime": 225.2591, "train_samples_per_second": 14949.233, "train_steps_per_second": 58.599 } ], "logging_steps": 1, "max_steps": 13200, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0596790573590528e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }