{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2938090241343127, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "eval_accuracy": 0.7687216869069928, "eval_f1": 0.7665424923390772, "eval_loss": 0.9364227652549744, "eval_runtime": 684.6631, "eval_samples_per_second": 197.963, "eval_steps_per_second": 0.774, "step": 400 }, { "epoch": 0.05, "learning_rate": 9.994774396642183e-06, "loss": 1.2511, "step": 500 }, { "epoch": 0.08, "eval_accuracy": 0.8296492496569228, "eval_f1": 0.8293241244922317, "eval_loss": 0.8166272044181824, "eval_runtime": 684.0816, "eval_samples_per_second": 198.131, "eval_steps_per_second": 0.775, "step": 800 }, { "epoch": 0.1, "learning_rate": 9.989538300104932e-06, "loss": 0.8696, "step": 1000 }, { "epoch": 0.13, "eval_accuracy": 0.846795732562086, "eval_f1": 0.8476031571339556, "eval_loss": 0.7674374580383301, "eval_runtime": 684.9072, "eval_samples_per_second": 197.892, "eval_steps_per_second": 0.774, "step": 1200 }, { "epoch": 0.16, "learning_rate": 9.98429171038825e-06, "loss": 0.8001, "step": 1500 }, { "epoch": 0.17, "eval_accuracy": 0.8562469565730644, "eval_f1": 0.8555885462291756, "eval_loss": 0.7412897348403931, "eval_runtime": 685.4288, "eval_samples_per_second": 197.742, "eval_steps_per_second": 0.773, "step": 1600 }, { "epoch": 0.21, "learning_rate": 9.97906610703043e-06, "loss": 0.7653, "step": 2000 }, { "epoch": 0.21, "eval_accuracy": 0.8584087119479408, "eval_f1": 0.8589656756888883, "eval_loss": 0.7346311807632446, "eval_runtime": 1253.8639, "eval_samples_per_second": 108.096, "eval_steps_per_second": 0.423, "step": 2000 }, { "epoch": 0.25, "eval_accuracy": 0.8606590033791262, "eval_f1": 0.8613764873641668, "eval_loss": 0.7210016846656799, "eval_runtime": 749.1336, "eval_samples_per_second": 180.926, "eval_steps_per_second": 0.707, "step": 2400 }, { "epoch": 0.26, "learning_rate": 9.973840503672613e-06, "loss": 0.7517, "step": 2500 }, { "epoch": 0.29, "eval_accuracy": 0.8668491493160589, "eval_f1": 0.8662439909072627, "eval_loss": 0.7115087509155273, "eval_runtime": 748.5848, "eval_samples_per_second": 181.059, "eval_steps_per_second": 0.708, "step": 2800 } ], "max_steps": 953000, "num_train_epochs": 100, "total_flos": 1.1748750372701952e+17, "trial_name": null, "trial_params": null }