{ "best_metric": 0.7391304347826086, "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-45", "epoch": 34.285714285714285, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.3835715055465698, "eval_runtime": 0.849, "eval_samples_per_second": 54.181, "eval_steps_per_second": 3.534, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.3326613903045654, "eval_runtime": 0.6171, "eval_samples_per_second": 74.538, "eval_steps_per_second": 4.861, "step": 7 }, { "epoch": 2.86, "learning_rate": 0.00125, "loss": 1.3567, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.168088436126709, "eval_runtime": 0.6072, "eval_samples_per_second": 75.764, "eval_steps_per_second": 4.941, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.044042944908142, "eval_runtime": 0.6302, "eval_samples_per_second": 72.998, "eval_steps_per_second": 4.761, "step": 14 }, { "epoch": 4.86, "eval_accuracy": 0.6304347826086957, "eval_loss": 1.0461686849594116, "eval_runtime": 0.6512, "eval_samples_per_second": 70.643, "eval_steps_per_second": 4.607, "step": 17 }, { "epoch": 5.71, "learning_rate": 0.001388888888888889, "loss": 1.0903, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.9293990731239319, "eval_runtime": 0.6116, "eval_samples_per_second": 75.207, "eval_steps_per_second": 4.905, "step": 21 }, { "epoch": 6.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9571677446365356, "eval_runtime": 0.6031, "eval_samples_per_second": 76.268, "eval_steps_per_second": 4.974, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9286340475082397, "eval_runtime": 0.6212, "eval_samples_per_second": 74.056, "eval_steps_per_second": 4.83, "step": 28 }, { "epoch": 8.57, "learning_rate": 0.00125, "loss": 1.0969, "step": 30 }, { "epoch": 8.86, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.9229152798652649, "eval_runtime": 0.6537, "eval_samples_per_second": 70.374, "eval_steps_per_second": 4.59, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.9061374664306641, "eval_runtime": 0.6311, "eval_samples_per_second": 72.884, "eval_steps_per_second": 4.753, "step": 35 }, { "epoch": 10.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8341346383094788, "eval_runtime": 0.6232, "eval_samples_per_second": 73.818, "eval_steps_per_second": 4.814, "step": 38 }, { "epoch": 11.43, "learning_rate": 0.0011111111111111111, "loss": 0.8923, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.878592848777771, "eval_runtime": 0.6071, "eval_samples_per_second": 75.766, "eval_steps_per_second": 4.941, "step": 42 }, { "epoch": 12.86, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8596428632736206, "eval_runtime": 0.6192, "eval_samples_per_second": 74.295, "eval_steps_per_second": 4.845, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8902113437652588, "eval_runtime": 0.6051, "eval_samples_per_second": 76.016, "eval_steps_per_second": 4.958, "step": 49 }, { "epoch": 14.29, "learning_rate": 0.0009722222222222222, "loss": 0.7289, "step": 50 }, { "epoch": 14.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8024305105209351, "eval_runtime": 0.6026, "eval_samples_per_second": 76.33, "eval_steps_per_second": 4.978, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9340682625770569, "eval_runtime": 0.6272, "eval_samples_per_second": 73.347, "eval_steps_per_second": 4.783, "step": 56 }, { "epoch": 16.86, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0463521480560303, "eval_runtime": 0.6191, "eval_samples_per_second": 74.297, "eval_steps_per_second": 4.845, "step": 59 }, { "epoch": 17.14, "learning_rate": 0.0008333333333333334, "loss": 0.6609, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9922565817832947, "eval_runtime": 0.5991, "eval_samples_per_second": 76.776, "eval_steps_per_second": 5.007, "step": 63 }, { "epoch": 18.86, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8224782347679138, "eval_runtime": 0.6182, "eval_samples_per_second": 74.415, "eval_steps_per_second": 4.853, "step": 66 }, { "epoch": 20.0, "learning_rate": 0.0006944444444444445, "loss": 0.6527, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.874836802482605, "eval_runtime": 0.6096, "eval_samples_per_second": 75.454, "eval_steps_per_second": 4.921, "step": 70 }, { "epoch": 20.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8051551580429077, "eval_runtime": 0.5866, "eval_samples_per_second": 78.412, "eval_steps_per_second": 5.114, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8861276507377625, "eval_runtime": 0.6181, "eval_samples_per_second": 74.417, "eval_steps_per_second": 4.853, "step": 77 }, { "epoch": 22.86, "learning_rate": 0.0005555555555555556, "loss": 0.493, "step": 80 }, { "epoch": 22.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9554585218429565, "eval_runtime": 0.6196, "eval_samples_per_second": 74.236, "eval_steps_per_second": 4.841, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.0335818529129028, "eval_runtime": 0.6146, "eval_samples_per_second": 74.84, "eval_steps_per_second": 4.881, "step": 84 }, { "epoch": 24.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9960870146751404, "eval_runtime": 0.6046, "eval_samples_per_second": 76.078, "eval_steps_per_second": 4.962, "step": 87 }, { "epoch": 25.71, "learning_rate": 0.0004166666666666667, "loss": 0.4088, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.0399607419967651, "eval_runtime": 0.6222, "eval_samples_per_second": 73.937, "eval_steps_per_second": 4.822, "step": 91 }, { "epoch": 26.86, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.053550362586975, "eval_runtime": 0.6206, "eval_samples_per_second": 74.116, "eval_steps_per_second": 4.834, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.1388055086135864, "eval_runtime": 0.6152, "eval_samples_per_second": 74.778, "eval_steps_per_second": 4.877, "step": 98 }, { "epoch": 28.57, "learning_rate": 0.0002777777777777778, "loss": 0.4047, "step": 100 }, { "epoch": 28.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.2294652462005615, "eval_runtime": 0.6572, "eval_samples_per_second": 69.998, "eval_steps_per_second": 4.565, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.2627309560775757, "eval_runtime": 0.6271, "eval_samples_per_second": 73.349, "eval_steps_per_second": 4.784, "step": 105 }, { "epoch": 30.86, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.2371925115585327, "eval_runtime": 0.6617, "eval_samples_per_second": 69.522, "eval_steps_per_second": 4.534, "step": 108 }, { "epoch": 31.43, "learning_rate": 0.0001388888888888889, "loss": 0.3681, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.2918990850448608, "eval_runtime": 0.6482, "eval_samples_per_second": 70.97, "eval_steps_per_second": 4.628, "step": 112 }, { "epoch": 32.86, "eval_accuracy": 0.6521739130434783, "eval_loss": 1.2453422546386719, "eval_runtime": 0.6331, "eval_samples_per_second": 72.653, "eval_steps_per_second": 4.738, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.2612279653549194, "eval_runtime": 0.6181, "eval_samples_per_second": 74.417, "eval_steps_per_second": 4.853, "step": 119 }, { "epoch": 34.29, "learning_rate": 0.0, "loss": 0.353, "step": 120 }, { "epoch": 34.29, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.2610585689544678, "eval_runtime": 0.6256, "eval_samples_per_second": 73.524, "eval_steps_per_second": 4.795, "step": 120 }, { "epoch": 34.29, "step": 120, "total_flos": 2.0027429927092224e+16, "train_loss": 0.7088598291079203, "train_runtime": 131.3917, "train_samples_per_second": 64.844, "train_steps_per_second": 0.913 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.0027429927092224e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }