{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.3712, "step": 100 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 1.8732, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.593873417721519, "eval_loss": 2.011091947555542, "eval_runtime": 5.3852, "eval_samples_per_second": 92.847, "eval_steps_per_second": 11.699, "step": 250 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.7329, "step": 300 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.6233, "step": 400 }, { "epoch": 2.0, "learning_rate": 0.0005, "loss": 1.6142, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6051139240506329, "eval_loss": 1.844284176826477, "eval_runtime": 5.3165, "eval_samples_per_second": 94.047, "eval_steps_per_second": 11.85, "step": 500 }, { "epoch": 2.4, "learning_rate": 0.0005, "loss": 1.1625, "step": 600 }, { "epoch": 2.8, "learning_rate": 0.0005, "loss": 1.206, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.6007341772151898, "eval_loss": 1.981817603111267, "eval_runtime": 5.3466, "eval_samples_per_second": 93.517, "eval_steps_per_second": 11.783, "step": 750 }, { "epoch": 3.2, "learning_rate": 0.0005, "loss": 1.0064, "step": 800 }, { "epoch": 3.6, "learning_rate": 0.0005, "loss": 0.8141, "step": 900 }, { "epoch": 4.0, "learning_rate": 0.0005, "loss": 0.8693, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.594126582278481, "eval_loss": 2.2100281715393066, "eval_runtime": 4.739, "eval_samples_per_second": 105.508, "eval_steps_per_second": 13.294, "step": 1000 }, { "epoch": 4.4, "learning_rate": 0.0005, "loss": 0.5477, "step": 1100 }, { "epoch": 4.8, "learning_rate": 0.0005, "loss": 0.6023, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.5910126582278481, "eval_loss": 2.375615119934082, "eval_runtime": 4.6068, "eval_samples_per_second": 108.536, "eval_steps_per_second": 13.676, "step": 1250 }, { "epoch": 5.2, "learning_rate": 0.0005, "loss": 0.514, "step": 1300 }, { "epoch": 5.6, "learning_rate": 0.0005, "loss": 0.4479, "step": 1400 }, { "epoch": 6.0, "learning_rate": 0.0005, "loss": 0.4717, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.5895696202531645, "eval_loss": 2.542104482650757, "eval_runtime": 4.662, "eval_samples_per_second": 107.251, "eval_steps_per_second": 13.514, "step": 1500 }, { "epoch": 6.4, "learning_rate": 0.0005, "loss": 0.3723, "step": 1600 }, { "epoch": 6.8, "learning_rate": 0.0005, "loss": 0.3938, "step": 1700 }, { "epoch": 7.0, "eval_accuracy": 0.5891139240506329, "eval_loss": 2.658656597137451, "eval_runtime": 4.9363, "eval_samples_per_second": 101.29, "eval_steps_per_second": 12.763, "step": 1750 }, { "epoch": 7.2, "learning_rate": 0.0005, "loss": 0.3727, "step": 1800 }, { "epoch": 7.6, "learning_rate": 0.0005, "loss": 0.3538, "step": 1900 }, { "epoch": 8.0, "learning_rate": 0.0005, "loss": 0.3697, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.5872911392405064, "eval_loss": 2.7531518936157227, "eval_runtime": 4.7177, "eval_samples_per_second": 105.985, "eval_steps_per_second": 13.354, "step": 2000 }, { "epoch": 8.4, "learning_rate": 0.0005, "loss": 0.3431, "step": 2100 }, { "epoch": 8.8, "learning_rate": 0.0005, "loss": 0.3617, "step": 2200 }, { "epoch": 9.0, "eval_accuracy": 0.5870126582278481, "eval_loss": 2.7664403915405273, "eval_runtime": 4.7999, "eval_samples_per_second": 104.169, "eval_steps_per_second": 13.125, "step": 2250 }, { "epoch": 9.2, "learning_rate": 0.0005, "loss": 0.3424, "step": 2300 }, { "epoch": 9.6, "learning_rate": 0.0005, "loss": 0.3418, "step": 2400 }, { "epoch": 10.0, "learning_rate": 0.0005, "loss": 0.3607, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.5866835443037974, "eval_loss": 2.8513779640197754, "eval_runtime": 4.6699, "eval_samples_per_second": 107.069, "eval_steps_per_second": 13.491, "step": 2500 }, { "epoch": 10.4, "learning_rate": 0.0005, "loss": 0.339, "step": 2600 }, { "epoch": 10.8, "learning_rate": 0.0005, "loss": 0.3414, "step": 2700 }, { "epoch": 11.0, "eval_accuracy": 0.5860759493670886, "eval_loss": 2.8931641578674316, "eval_runtime": 4.8648, "eval_samples_per_second": 102.78, "eval_steps_per_second": 12.95, "step": 2750 }, { "epoch": 11.2, "learning_rate": 0.0005, "loss": 0.3302, "step": 2800 }, { "epoch": 11.6, "learning_rate": 0.0005, "loss": 0.329, "step": 2900 }, { "epoch": 12.0, "learning_rate": 0.0005, "loss": 0.3439, "step": 3000 }, { "epoch": 12.0, "eval_accuracy": 0.5854683544303797, "eval_loss": 2.9544754028320312, "eval_runtime": 5.1699, "eval_samples_per_second": 96.713, "eval_steps_per_second": 12.186, "step": 3000 }, { "epoch": 12.4, "learning_rate": 0.0005, "loss": 0.3182, "step": 3100 }, { "epoch": 12.8, "learning_rate": 0.0005, "loss": 0.335, "step": 3200 }, { "epoch": 13.0, "eval_accuracy": 0.5842784810126582, "eval_loss": 2.899094343185425, "eval_runtime": 4.8433, "eval_samples_per_second": 103.236, "eval_steps_per_second": 13.008, "step": 3250 }, { "epoch": 13.2, "learning_rate": 0.0005, "loss": 0.327, "step": 3300 }, { "epoch": 13.6, "learning_rate": 0.0005, "loss": 0.3223, "step": 3400 }, { "epoch": 14.0, "learning_rate": 0.0005, "loss": 0.3391, "step": 3500 }, { "epoch": 14.0, "eval_accuracy": 0.5839746835443038, "eval_loss": 2.879316806793213, "eval_runtime": 4.9294, "eval_samples_per_second": 101.433, "eval_steps_per_second": 12.781, "step": 3500 }, { "epoch": 14.4, "learning_rate": 0.0005, "loss": 0.3128, "step": 3600 }, { "epoch": 14.8, "learning_rate": 0.0005, "loss": 0.328, "step": 3700 }, { "epoch": 15.0, "eval_accuracy": 0.5851392405063292, "eval_loss": 2.8953680992126465, "eval_runtime": 4.6969, "eval_samples_per_second": 106.452, "eval_steps_per_second": 13.413, "step": 3750 }, { "epoch": 15.2, "learning_rate": 0.0005, "loss": 0.3233, "step": 3800 }, { "epoch": 15.6, "learning_rate": 0.0005, "loss": 0.3216, "step": 3900 }, { "epoch": 16.0, "learning_rate": 0.0005, "loss": 0.3351, "step": 4000 }, { "epoch": 16.0, "eval_accuracy": 0.5838481012658228, "eval_loss": 2.913999080657959, "eval_runtime": 4.7522, "eval_samples_per_second": 105.214, "eval_steps_per_second": 13.257, "step": 4000 }, { "epoch": 16.4, "learning_rate": 0.0005, "loss": 0.3087, "step": 4100 }, { "epoch": 16.8, "learning_rate": 0.0005, "loss": 0.3252, "step": 4200 }, { "epoch": 17.0, "eval_accuracy": 0.5825063291139241, "eval_loss": 2.929701089859009, "eval_runtime": 4.8518, "eval_samples_per_second": 103.055, "eval_steps_per_second": 12.985, "step": 4250 }, { "epoch": 17.2, "learning_rate": 0.0005, "loss": 0.3152, "step": 4300 }, { "epoch": 17.6, "learning_rate": 0.0005, "loss": 0.3161, "step": 4400 }, { "epoch": 18.0, "learning_rate": 0.0005, "loss": 0.332, "step": 4500 }, { "epoch": 18.0, "eval_accuracy": 0.5833924050632912, "eval_loss": 2.9811997413635254, "eval_runtime": 5.341, "eval_samples_per_second": 93.615, "eval_steps_per_second": 11.796, "step": 4500 }, { "epoch": 18.4, "learning_rate": 0.0005, "loss": 0.3089, "step": 4600 }, { "epoch": 18.8, "learning_rate": 0.0005, "loss": 0.324, "step": 4700 }, { "epoch": 19.0, "eval_accuracy": 0.5807848101265822, "eval_loss": 2.982343912124634, "eval_runtime": 4.9018, "eval_samples_per_second": 102.002, "eval_steps_per_second": 12.852, "step": 4750 }, { "epoch": 19.2, "learning_rate": 0.0005, "loss": 0.3169, "step": 4800 }, { "epoch": 19.6, "learning_rate": 0.0005, "loss": 0.3116, "step": 4900 }, { "epoch": 20.0, "learning_rate": 0.0005, "loss": 0.3329, "step": 5000 }, { "epoch": 20.0, "eval_accuracy": 0.5813164556962025, "eval_loss": 2.9419918060302734, "eval_runtime": 4.5961, "eval_samples_per_second": 108.787, "eval_steps_per_second": 13.707, "step": 5000 }, { "epoch": 20.0, "step": 5000, "total_flos": 3.1967425075347456e+17, "train_loss": 0.5721206008911133, "train_runtime": 3939.3187, "train_samples_per_second": 40.616, "train_steps_per_second": 1.269 } ], "logging_steps": 100, "max_steps": 5000, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.1967425075347456e+17, "trial_name": null, "trial_params": null }