{ "best_metric": 0.10869565217391304, "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-3", "epoch": 34.285714285714285, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.86678314208984, "eval_runtime": 0.6737, "eval_samples_per_second": 68.283, "eval_steps_per_second": 4.453, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.8652114868164, "eval_runtime": 0.6176, "eval_samples_per_second": 74.476, "eval_steps_per_second": 4.857, "step": 7 }, { "epoch": 2.86, "learning_rate": 2.2916666666666667e-05, "loss": 114.139, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.86360168457031, "eval_runtime": 0.6252, "eval_samples_per_second": 73.581, "eval_steps_per_second": 4.799, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.86296081542969, "eval_runtime": 0.6111, "eval_samples_per_second": 75.27, "eval_steps_per_second": 4.909, "step": 14 }, { "epoch": 4.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.86167907714844, "eval_runtime": 0.6116, "eval_samples_per_second": 75.206, "eval_steps_per_second": 4.905, "step": 17 }, { "epoch": 5.71, "learning_rate": 2.0833333333333336e-05, "loss": 113.4957, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.85923767089844, "eval_runtime": 0.6187, "eval_samples_per_second": 74.355, "eval_steps_per_second": 4.849, "step": 21 }, { "epoch": 6.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.8578872680664, "eval_runtime": 0.6091, "eval_samples_per_second": 75.517, "eval_steps_per_second": 4.925, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.8578109741211, "eval_runtime": 0.6207, "eval_samples_per_second": 74.114, "eval_steps_per_second": 4.834, "step": 28 }, { "epoch": 8.57, "learning_rate": 1.8750000000000002e-05, "loss": 111.7345, "step": 30 }, { "epoch": 8.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.8550033569336, "eval_runtime": 0.6176, "eval_samples_per_second": 74.476, "eval_steps_per_second": 4.857, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.85313415527344, "eval_runtime": 0.5966, "eval_samples_per_second": 77.098, "eval_steps_per_second": 5.028, "step": 35 }, { "epoch": 10.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.85196685791016, "eval_runtime": 0.6032, "eval_samples_per_second": 76.266, "eval_steps_per_second": 4.974, "step": 38 }, { "epoch": 11.43, "learning_rate": 1.6666666666666667e-05, "loss": 115.9214, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.84968566894531, "eval_runtime": 0.5951, "eval_samples_per_second": 77.294, "eval_steps_per_second": 5.041, "step": 42 }, { "epoch": 12.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.8483657836914, "eval_runtime": 0.6171, "eval_samples_per_second": 74.538, "eval_steps_per_second": 4.861, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.84552001953125, "eval_runtime": 0.6081, "eval_samples_per_second": 75.641, "eval_steps_per_second": 4.933, "step": 49 }, { "epoch": 14.29, "learning_rate": 1.4583333333333335e-05, "loss": 112.3215, "step": 50 }, { "epoch": 14.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.83916473388672, "eval_runtime": 0.7172, "eval_samples_per_second": 64.142, "eval_steps_per_second": 4.183, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.83506774902344, "eval_runtime": 0.6271, "eval_samples_per_second": 73.349, "eval_steps_per_second": 4.784, "step": 56 }, { "epoch": 16.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.83540344238281, "eval_runtime": 0.6112, "eval_samples_per_second": 75.268, "eval_steps_per_second": 4.909, "step": 59 }, { "epoch": 17.14, "learning_rate": 1.25e-05, "loss": 113.1908, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.83159637451172, "eval_runtime": 0.6036, "eval_samples_per_second": 76.204, "eval_steps_per_second": 4.97, "step": 63 }, { "epoch": 18.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.82946014404297, "eval_runtime": 0.6282, "eval_samples_per_second": 73.23, "eval_steps_per_second": 4.776, "step": 66 }, { "epoch": 20.0, "learning_rate": 1.0416666666666668e-05, "loss": 114.062, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.82839965820312, "eval_runtime": 0.5991, "eval_samples_per_second": 76.778, "eval_steps_per_second": 5.007, "step": 70 }, { "epoch": 20.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.8253402709961, "eval_runtime": 0.5961, "eval_samples_per_second": 77.162, "eval_steps_per_second": 5.032, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.82349395751953, "eval_runtime": 0.7817, "eval_samples_per_second": 58.847, "eval_steps_per_second": 3.838, "step": 77 }, { "epoch": 22.86, "learning_rate": 8.333333333333334e-06, "loss": 114.5312, "step": 80 }, { "epoch": 22.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.82068634033203, "eval_runtime": 0.6071, "eval_samples_per_second": 75.766, "eval_steps_per_second": 4.941, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.81257629394531, "eval_runtime": 0.6061, "eval_samples_per_second": 75.891, "eval_steps_per_second": 4.949, "step": 84 }, { "epoch": 24.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.81000518798828, "eval_runtime": 0.6011, "eval_samples_per_second": 76.522, "eval_steps_per_second": 4.991, "step": 87 }, { "epoch": 25.71, "learning_rate": 6.25e-06, "loss": 114.5216, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.80531311035156, "eval_runtime": 0.5966, "eval_samples_per_second": 77.098, "eval_steps_per_second": 5.028, "step": 91 }, { "epoch": 26.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.80321502685547, "eval_runtime": 0.6162, "eval_samples_per_second": 74.657, "eval_steps_per_second": 4.869, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.803466796875, "eval_runtime": 0.5991, "eval_samples_per_second": 76.777, "eval_steps_per_second": 5.007, "step": 98 }, { "epoch": 28.57, "learning_rate": 4.166666666666667e-06, "loss": 112.7612, "step": 100 }, { "epoch": 28.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.7991714477539, "eval_runtime": 0.6142, "eval_samples_per_second": 74.9, "eval_steps_per_second": 4.885, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.7939224243164, "eval_runtime": 0.6141, "eval_samples_per_second": 74.902, "eval_steps_per_second": 4.885, "step": 105 }, { "epoch": 30.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.79671478271484, "eval_runtime": 0.6337, "eval_samples_per_second": 72.594, "eval_steps_per_second": 4.734, "step": 108 }, { "epoch": 31.43, "learning_rate": 2.0833333333333334e-06, "loss": 114.2748, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.79725646972656, "eval_runtime": 0.6392, "eval_samples_per_second": 71.97, "eval_steps_per_second": 4.694, "step": 112 }, { "epoch": 32.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.79711151123047, "eval_runtime": 0.5956, "eval_samples_per_second": 77.228, "eval_steps_per_second": 5.037, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.7907943725586, "eval_runtime": 0.5981, "eval_samples_per_second": 76.906, "eval_steps_per_second": 5.016, "step": 119 }, { "epoch": 34.29, "learning_rate": 0.0, "loss": 114.0708, "step": 120 }, { "epoch": 34.29, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.79315948486328, "eval_runtime": 0.5946, "eval_samples_per_second": 77.358, "eval_steps_per_second": 5.045, "step": 120 }, { "epoch": 34.29, "step": 120, "total_flos": 2.0027429927092224e+16, "train_loss": 113.75203348795573, "train_runtime": 131.4365, "train_samples_per_second": 64.822, "train_steps_per_second": 0.913 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.0027429927092224e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }