{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.997442455242968, "global_step": 3900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.9274965524673462, "eval_loss": 0.23106738924980164, "eval_runtime": 10.0851, "eval_samples_per_second": 72.483, "eval_steps_per_second": 9.122, "step": 195 }, { "epoch": 2.0, "eval_accuracy": 0.9165526628494263, "eval_loss": 0.3205892741680145, "eval_runtime": 10.086, "eval_samples_per_second": 72.477, "eval_steps_per_second": 9.122, "step": 390 }, { "epoch": 2.56, "learning_rate": 1.2754443842729812e-05, "loss": 0.1729, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.9274965524673462, "eval_loss": 0.360751748085022, "eval_runtime": 9.9443, "eval_samples_per_second": 73.509, "eval_steps_per_second": 9.251, "step": 585 }, { "epoch": 4.0, "eval_accuracy": 0.9233925938606262, "eval_loss": 0.4383067190647125, "eval_runtime": 10.0596, "eval_samples_per_second": 72.667, "eval_steps_per_second": 9.145, "step": 780 }, { "epoch": 5.0, "eval_accuracy": 0.9206566214561462, "eval_loss": 0.5189540982246399, "eval_runtime": 10.0775, "eval_samples_per_second": 72.538, "eval_steps_per_second": 9.129, "step": 975 }, { "epoch": 5.13, "learning_rate": 1.0878790336446016e-05, "loss": 0.0589, "step": 1000 }, { "epoch": 6.0, "eval_accuracy": 0.9233925938606262, "eval_loss": 0.5728073716163635, "eval_runtime": 9.9637, "eval_samples_per_second": 73.366, "eval_steps_per_second": 9.233, "step": 1170 }, { "epoch": 7.0, "eval_accuracy": 0.9274965524673462, "eval_loss": 0.5684310793876648, "eval_runtime": 10.2744, "eval_samples_per_second": 71.148, "eval_steps_per_second": 8.954, "step": 1365 }, { "epoch": 7.69, "learning_rate": 9.003136830162221e-06, "loss": 0.0171, "step": 1500 }, { "epoch": 8.0, "eval_accuracy": 0.9261285662651062, "eval_loss": 0.5847771167755127, "eval_runtime": 10.1261, "eval_samples_per_second": 72.189, "eval_steps_per_second": 9.085, "step": 1560 }, { "epoch": 9.0, "eval_accuracy": 0.928864598274231, "eval_loss": 0.5692142248153687, "eval_runtime": 10.1226, "eval_samples_per_second": 72.214, "eval_steps_per_second": 9.089, "step": 1755 }, { "epoch": 10.0, "eval_accuracy": 0.9261285662651062, "eval_loss": 0.6423346400260925, "eval_runtime": 10.1278, "eval_samples_per_second": 72.178, "eval_steps_per_second": 9.084, "step": 1950 }, { "epoch": 10.26, "learning_rate": 7.1274833238784245e-06, "loss": 0.0094, "step": 2000 }, { "epoch": 11.0, "eval_accuracy": 0.9274965524673462, "eval_loss": 0.6332233548164368, "eval_runtime": 10.2523, "eval_samples_per_second": 71.301, "eval_steps_per_second": 8.974, "step": 2145 }, { "epoch": 12.0, "eval_accuracy": 0.9316005706787109, "eval_loss": 0.6691190600395203, "eval_runtime": 10.205, "eval_samples_per_second": 71.631, "eval_steps_per_second": 9.015, "step": 2340 }, { "epoch": 12.82, "learning_rate": 5.2518298175946285e-06, "loss": 0.0015, "step": 2500 }, { "epoch": 13.0, "eval_accuracy": 0.930232584476471, "eval_loss": 0.6589073538780212, "eval_runtime": 10.1851, "eval_samples_per_second": 71.771, "eval_steps_per_second": 9.033, "step": 2535 }, { "epoch": 14.0, "eval_accuracy": 0.928864598274231, "eval_loss": 0.6745939254760742, "eval_runtime": 10.1553, "eval_samples_per_second": 71.982, "eval_steps_per_second": 9.059, "step": 2730 }, { "epoch": 15.0, "eval_accuracy": 0.9357045292854309, "eval_loss": 0.6391911506652832, "eval_runtime": 10.1864, "eval_samples_per_second": 71.762, "eval_steps_per_second": 9.032, "step": 2925 }, { "epoch": 15.38, "learning_rate": 3.376176311310833e-06, "loss": 0.0002, "step": 3000 }, { "epoch": 16.0, "eval_accuracy": 0.928864598274231, "eval_loss": 0.6711709499359131, "eval_runtime": 10.0882, "eval_samples_per_second": 72.461, "eval_steps_per_second": 9.12, "step": 3120 }, { "epoch": 17.0, "eval_accuracy": 0.9343365430831909, "eval_loss": 0.6648762226104736, "eval_runtime": 9.9723, "eval_samples_per_second": 73.303, "eval_steps_per_second": 9.226, "step": 3315 }, { "epoch": 17.95, "learning_rate": 1.5005228050270366e-06, "loss": 0.0, "step": 3500 }, { "epoch": 18.0, "eval_accuracy": 0.930232584476471, "eval_loss": 0.6720365881919861, "eval_runtime": 10.1302, "eval_samples_per_second": 72.16, "eval_steps_per_second": 9.082, "step": 3510 }, { "epoch": 19.0, "eval_accuracy": 0.9316005706787109, "eval_loss": 0.6750090718269348, "eval_runtime": 10.1546, "eval_samples_per_second": 71.987, "eval_steps_per_second": 9.06, "step": 3705 }, { "epoch": 20.0, "eval_accuracy": 0.9316005706787109, "eval_loss": 0.6705575585365295, "eval_runtime": 10.275, "eval_samples_per_second": 71.144, "eval_steps_per_second": 8.954, "step": 3900 }, { "epoch": 20.0, "step": 3900, "total_flos": 1.4573542659723264e+16, "train_loss": 0.03345794048399115, "train_runtime": 4631.396, "train_samples_per_second": 13.508, "train_steps_per_second": 0.842 } ], "max_steps": 3900, "num_train_epochs": 20, "total_flos": 1.4573542659723264e+16, "trial_name": null, "trial_params": null }