{ "best_metric": 0.10869565217391304, "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-3", "epoch": 34.285714285714285, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.97916412353516, "eval_runtime": 0.6657, "eval_samples_per_second": 69.104, "eval_steps_per_second": 4.507, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.9181137084961, "eval_runtime": 0.5771, "eval_samples_per_second": 79.702, "eval_steps_per_second": 5.198, "step": 7 }, { "epoch": 2.86, "learning_rate": 0.00027499999999999996, "loss": 114.2395, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.88134765625, "eval_runtime": 0.5806, "eval_samples_per_second": 79.223, "eval_steps_per_second": 5.167, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.81156921386719, "eval_runtime": 0.6172, "eval_samples_per_second": 74.535, "eval_steps_per_second": 4.861, "step": 14 }, { "epoch": 4.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.69862365722656, "eval_runtime": 0.6011, "eval_samples_per_second": 76.52, "eval_steps_per_second": 4.99, "step": 17 }, { "epoch": 5.71, "learning_rate": 0.00025, "loss": 113.364, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.47984313964844, "eval_runtime": 0.5961, "eval_samples_per_second": 77.162, "eval_steps_per_second": 5.032, "step": 21 }, { "epoch": 6.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 113.23371887207031, "eval_runtime": 0.5811, "eval_samples_per_second": 79.156, "eval_steps_per_second": 5.162, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 112.82231140136719, "eval_runtime": 0.6192, "eval_samples_per_second": 74.295, "eval_steps_per_second": 4.845, "step": 28 }, { "epoch": 8.57, "learning_rate": 0.000225, "loss": 111.1062, "step": 30 }, { "epoch": 8.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 112.57879638671875, "eval_runtime": 0.6086, "eval_samples_per_second": 75.578, "eval_steps_per_second": 4.929, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 112.4156723022461, "eval_runtime": 0.7142, "eval_samples_per_second": 64.411, "eval_steps_per_second": 4.201, "step": 35 }, { "epoch": 10.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 112.55355072021484, "eval_runtime": 0.6382, "eval_samples_per_second": 72.082, "eval_steps_per_second": 4.701, "step": 38 }, { "epoch": 11.43, "learning_rate": 0.00019999999999999998, "loss": 114.7018, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.70433807373047, "eval_runtime": 0.5881, "eval_samples_per_second": 78.214, "eval_steps_per_second": 5.101, "step": 42 }, { "epoch": 12.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.65534973144531, "eval_runtime": 0.5961, "eval_samples_per_second": 77.162, "eval_steps_per_second": 5.032, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.61336517333984, "eval_runtime": 0.6682, "eval_samples_per_second": 68.845, "eval_steps_per_second": 4.49, "step": 49 }, { "epoch": 14.29, "learning_rate": 0.000175, "loss": 110.4914, "step": 50 }, { "epoch": 14.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.59801483154297, "eval_runtime": 0.5861, "eval_samples_per_second": 78.481, "eval_steps_per_second": 5.118, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.76158142089844, "eval_runtime": 0.6762, "eval_samples_per_second": 68.03, "eval_steps_per_second": 4.437, "step": 56 }, { "epoch": 16.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.15796661376953, "eval_runtime": 0.5936, "eval_samples_per_second": 77.488, "eval_steps_per_second": 5.054, "step": 59 }, { "epoch": 17.14, "learning_rate": 0.00015, "loss": 110.734, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.71080780029297, "eval_runtime": 0.6126, "eval_samples_per_second": 75.084, "eval_steps_per_second": 4.897, "step": 63 }, { "epoch": 18.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 111.52064514160156, "eval_runtime": 0.5926, "eval_samples_per_second": 77.619, "eval_steps_per_second": 5.062, "step": 66 }, { "epoch": 20.0, "learning_rate": 0.000125, "loss": 111.3772, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 110.88188171386719, "eval_runtime": 0.6091, "eval_samples_per_second": 75.517, "eval_steps_per_second": 4.925, "step": 70 }, { "epoch": 20.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 110.802490234375, "eval_runtime": 0.6172, "eval_samples_per_second": 74.536, "eval_steps_per_second": 4.861, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 110.3514633178711, "eval_runtime": 0.5981, "eval_samples_per_second": 76.906, "eval_steps_per_second": 5.016, "step": 77 }, { "epoch": 22.86, "learning_rate": 9.999999999999999e-05, "loss": 111.2973, "step": 80 }, { "epoch": 22.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 110.23866271972656, "eval_runtime": 0.6086, "eval_samples_per_second": 75.578, "eval_steps_per_second": 4.929, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.80725860595703, "eval_runtime": 0.6562, "eval_samples_per_second": 70.104, "eval_steps_per_second": 4.572, "step": 84 }, { "epoch": 24.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.82267761230469, "eval_runtime": 0.5876, "eval_samples_per_second": 78.279, "eval_steps_per_second": 5.105, "step": 87 }, { "epoch": 25.71, "learning_rate": 7.5e-05, "loss": 110.7135, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.60858154296875, "eval_runtime": 0.6076, "eval_samples_per_second": 75.702, "eval_steps_per_second": 4.937, "step": 91 }, { "epoch": 26.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.5834732055664, "eval_runtime": 0.5922, "eval_samples_per_second": 77.683, "eval_steps_per_second": 5.066, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.5829849243164, "eval_runtime": 0.6041, "eval_samples_per_second": 76.142, "eval_steps_per_second": 4.966, "step": 98 }, { "epoch": 28.57, "learning_rate": 4.9999999999999996e-05, "loss": 109.1264, "step": 100 }, { "epoch": 28.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.16484832763672, "eval_runtime": 0.5956, "eval_samples_per_second": 77.228, "eval_steps_per_second": 5.037, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.06578063964844, "eval_runtime": 0.6146, "eval_samples_per_second": 74.84, "eval_steps_per_second": 4.881, "step": 105 }, { "epoch": 30.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.2845687866211, "eval_runtime": 0.6191, "eval_samples_per_second": 74.297, "eval_steps_per_second": 4.845, "step": 108 }, { "epoch": 31.43, "learning_rate": 2.4999999999999998e-05, "loss": 110.2885, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.02674865722656, "eval_runtime": 0.5961, "eval_samples_per_second": 77.162, "eval_steps_per_second": 5.032, "step": 112 }, { "epoch": 32.86, "eval_accuracy": 0.10869565217391304, "eval_loss": 109.09725189208984, "eval_runtime": 0.6096, "eval_samples_per_second": 75.454, "eval_steps_per_second": 4.921, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 108.87687683105469, "eval_runtime": 0.6301, "eval_samples_per_second": 73.0, "eval_steps_per_second": 4.761, "step": 119 }, { "epoch": 34.29, "learning_rate": 0.0, "loss": 110.1122, "step": 120 }, { "epoch": 34.29, "eval_accuracy": 0.10869565217391304, "eval_loss": 108.8197250366211, "eval_runtime": 0.7547, "eval_samples_per_second": 60.953, "eval_steps_per_second": 3.975, "step": 120 }, { "epoch": 34.29, "step": 120, "total_flos": 2.0027429927092224e+16, "train_loss": 111.46265258789063, "train_runtime": 130.8746, "train_samples_per_second": 65.1, "train_steps_per_second": 0.917 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.0027429927092224e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }