{ "best_metric": 0.717391304347826, "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-66", "epoch": 34.285714285714285, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_accuracy": 0.5217391304347826, "eval_loss": 1.370601773262024, "eval_runtime": 0.6692, "eval_samples_per_second": 68.744, "eval_steps_per_second": 4.483, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 1.1519584655761719, "eval_runtime": 0.6777, "eval_samples_per_second": 67.88, "eval_steps_per_second": 4.427, "step": 7 }, { "epoch": 2.86, "learning_rate": 0.004166666666666667, "loss": 1.2948, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.6086956521739131, "eval_loss": 1.293420433998108, "eval_runtime": 0.6372, "eval_samples_per_second": 72.195, "eval_steps_per_second": 4.708, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 1.213881254196167, "eval_runtime": 0.8102, "eval_samples_per_second": 56.775, "eval_steps_per_second": 3.703, "step": 14 }, { "epoch": 4.86, "eval_accuracy": 0.5217391304347826, "eval_loss": 2.594619035720825, "eval_runtime": 0.6367, "eval_samples_per_second": 72.253, "eval_steps_per_second": 4.712, "step": 17 }, { "epoch": 5.71, "learning_rate": 0.004629629629629629, "loss": 1.1888, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.4782608695652174, "eval_loss": 1.121720314025879, "eval_runtime": 0.6392, "eval_samples_per_second": 71.969, "eval_steps_per_second": 4.694, "step": 21 }, { "epoch": 6.86, "eval_accuracy": 0.5869565217391305, "eval_loss": 1.1050187349319458, "eval_runtime": 0.6016, "eval_samples_per_second": 76.458, "eval_steps_per_second": 4.986, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.391304347826087, "eval_loss": 1.6684364080429077, "eval_runtime": 0.6072, "eval_samples_per_second": 75.764, "eval_steps_per_second": 4.941, "step": 28 }, { "epoch": 8.57, "learning_rate": 0.004166666666666667, "loss": 1.1236, "step": 30 }, { "epoch": 8.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8967575430870056, "eval_runtime": 0.6271, "eval_samples_per_second": 73.349, "eval_steps_per_second": 4.784, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.2403887510299683, "eval_runtime": 0.6136, "eval_samples_per_second": 74.962, "eval_steps_per_second": 4.889, "step": 35 }, { "epoch": 10.86, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9956691861152649, "eval_runtime": 0.6206, "eval_samples_per_second": 74.116, "eval_steps_per_second": 4.834, "step": 38 }, { "epoch": 11.43, "learning_rate": 0.0037037037037037034, "loss": 1.0477, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.5, "eval_loss": 1.169378399848938, "eval_runtime": 0.6266, "eval_samples_per_second": 73.407, "eval_steps_per_second": 4.787, "step": 42 }, { "epoch": 12.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9246761202812195, "eval_runtime": 0.6251, "eval_samples_per_second": 73.583, "eval_steps_per_second": 4.799, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.9464444518089294, "eval_runtime": 0.6056, "eval_samples_per_second": 75.952, "eval_steps_per_second": 4.953, "step": 49 }, { "epoch": 14.29, "learning_rate": 0.0032407407407407406, "loss": 1.0264, "step": 50 }, { "epoch": 14.86, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.8716233968734741, "eval_runtime": 0.6542, "eval_samples_per_second": 70.319, "eval_steps_per_second": 4.586, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8908547759056091, "eval_runtime": 0.5941, "eval_samples_per_second": 77.424, "eval_steps_per_second": 5.049, "step": 56 }, { "epoch": 16.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8653961420059204, "eval_runtime": 0.6262, "eval_samples_per_second": 73.464, "eval_steps_per_second": 4.791, "step": 59 }, { "epoch": 17.14, "learning_rate": 0.002777777777777778, "loss": 0.909, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9091392755508423, "eval_runtime": 0.5961, "eval_samples_per_second": 77.164, "eval_steps_per_second": 5.032, "step": 63 }, { "epoch": 18.86, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8398311734199524, "eval_runtime": 0.6447, "eval_samples_per_second": 71.356, "eval_steps_per_second": 4.654, "step": 66 }, { "epoch": 20.0, "learning_rate": 0.0023148148148148147, "loss": 0.8914, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9392647743225098, "eval_runtime": 0.5991, "eval_samples_per_second": 76.776, "eval_steps_per_second": 5.007, "step": 70 }, { "epoch": 20.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9566595554351807, "eval_runtime": 0.6272, "eval_samples_per_second": 73.347, "eval_steps_per_second": 4.783, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9479435086250305, "eval_runtime": 0.6122, "eval_samples_per_second": 75.145, "eval_steps_per_second": 4.901, "step": 77 }, { "epoch": 22.86, "learning_rate": 0.0018518518518518517, "loss": 0.8573, "step": 80 }, { "epoch": 22.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9308140873908997, "eval_runtime": 0.6176, "eval_samples_per_second": 74.477, "eval_steps_per_second": 4.857, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8986259698867798, "eval_runtime": 0.6021, "eval_samples_per_second": 76.395, "eval_steps_per_second": 4.982, "step": 84 }, { "epoch": 24.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9345232844352722, "eval_runtime": 0.6651, "eval_samples_per_second": 69.157, "eval_steps_per_second": 4.51, "step": 87 }, { "epoch": 25.71, "learning_rate": 0.001388888888888889, "loss": 0.8249, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9843014478683472, "eval_runtime": 0.8742, "eval_samples_per_second": 52.618, "eval_steps_per_second": 3.432, "step": 91 }, { "epoch": 26.86, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9945157766342163, "eval_runtime": 0.9362, "eval_samples_per_second": 49.133, "eval_steps_per_second": 3.204, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.9493198394775391, "eval_runtime": 0.6161, "eval_samples_per_second": 74.659, "eval_steps_per_second": 4.869, "step": 98 }, { "epoch": 28.57, "learning_rate": 0.0009259259259259259, "loss": 0.8307, "step": 100 }, { "epoch": 28.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9074158072471619, "eval_runtime": 0.9137, "eval_samples_per_second": 50.344, "eval_steps_per_second": 3.283, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.949679970741272, "eval_runtime": 0.5932, "eval_samples_per_second": 77.552, "eval_steps_per_second": 5.058, "step": 105 }, { "epoch": 30.86, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9572489261627197, "eval_runtime": 0.6501, "eval_samples_per_second": 70.753, "eval_steps_per_second": 4.614, "step": 108 }, { "epoch": 31.43, "learning_rate": 0.0004629629629629629, "loss": 0.7781, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9732490181922913, "eval_runtime": 0.6066, "eval_samples_per_second": 75.827, "eval_steps_per_second": 4.945, "step": 112 }, { "epoch": 32.86, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.9417593479156494, "eval_runtime": 0.6372, "eval_samples_per_second": 72.195, "eval_steps_per_second": 4.708, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9532932043075562, "eval_runtime": 0.6031, "eval_samples_per_second": 76.268, "eval_steps_per_second": 4.974, "step": 119 }, { "epoch": 34.29, "learning_rate": 0.0, "loss": 0.8104, "step": 120 }, { "epoch": 34.29, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9481474757194519, "eval_runtime": 0.6322, "eval_samples_per_second": 72.766, "eval_steps_per_second": 4.746, "step": 120 }, { "epoch": 34.29, "step": 120, "total_flos": 2.0027429927092224e+16, "train_loss": 0.965270467599233, "train_runtime": 137.441, "train_samples_per_second": 61.99, "train_steps_per_second": 0.873 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.0027429927092224e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }