{ "best_metric": 0.9064, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuning/checkpoint-351", "epoch": 0.997867803837953, "eval_steps": 500, "global_step": 351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 12.392043113708496, "learning_rate": 1.388888888888889e-05, "loss": 2.38, "step": 10 }, { "epoch": 0.06, "grad_norm": 9.366357803344727, "learning_rate": 2.777777777777778e-05, "loss": 2.0468, "step": 20 }, { "epoch": 0.09, "grad_norm": 10.620584487915039, "learning_rate": 4.166666666666667e-05, "loss": 1.5116, "step": 30 }, { "epoch": 0.11, "grad_norm": 9.78640079498291, "learning_rate": 4.936507936507937e-05, "loss": 1.0056, "step": 40 }, { "epoch": 0.14, "grad_norm": 7.943943977355957, "learning_rate": 4.7777777777777784e-05, "loss": 0.7888, "step": 50 }, { "epoch": 0.17, "grad_norm": 9.149613380432129, "learning_rate": 4.6190476190476194e-05, "loss": 0.6486, "step": 60 }, { "epoch": 0.2, "grad_norm": 7.764841079711914, "learning_rate": 4.460317460317461e-05, "loss": 0.6232, "step": 70 }, { "epoch": 0.23, "grad_norm": 8.434979438781738, "learning_rate": 4.301587301587302e-05, "loss": 0.5555, "step": 80 }, { "epoch": 0.26, "grad_norm": 8.733748435974121, "learning_rate": 4.1428571428571437e-05, "loss": 0.4872, "step": 90 }, { "epoch": 0.28, "grad_norm": 7.663728713989258, "learning_rate": 3.984126984126984e-05, "loss": 0.5385, "step": 100 }, { "epoch": 0.31, "grad_norm": 6.9408698081970215, "learning_rate": 3.8253968253968256e-05, "loss": 0.4591, "step": 110 }, { "epoch": 0.34, "grad_norm": 6.367518424987793, "learning_rate": 3.6666666666666666e-05, "loss": 0.4449, "step": 120 }, { "epoch": 0.37, "grad_norm": 6.589195728302002, "learning_rate": 3.5079365079365075e-05, "loss": 0.4365, "step": 130 }, { "epoch": 0.4, "grad_norm": 8.5034818649292, "learning_rate": 3.349206349206349e-05, "loss": 0.4538, "step": 140 }, { "epoch": 0.43, "grad_norm": 6.432806491851807, "learning_rate": 3.19047619047619e-05, "loss": 0.4182, "step": 150 }, { "epoch": 0.45, "grad_norm": 5.926751613616943, "learning_rate": 3.0317460317460318e-05, "loss": 0.3683, "step": 160 }, { "epoch": 0.48, "grad_norm": 7.516120433807373, "learning_rate": 2.8730158730158728e-05, "loss": 0.4315, "step": 170 }, { "epoch": 0.51, "grad_norm": 7.042306423187256, "learning_rate": 2.714285714285714e-05, "loss": 0.3883, "step": 180 }, { "epoch": 0.54, "grad_norm": 6.7353620529174805, "learning_rate": 2.5555555555555554e-05, "loss": 0.4158, "step": 190 }, { "epoch": 0.57, "grad_norm": 6.9553914070129395, "learning_rate": 2.396825396825397e-05, "loss": 0.3893, "step": 200 }, { "epoch": 0.6, "grad_norm": 6.948991298675537, "learning_rate": 2.2380952380952384e-05, "loss": 0.4288, "step": 210 }, { "epoch": 0.63, "grad_norm": 6.90835428237915, "learning_rate": 2.0793650793650797e-05, "loss": 0.368, "step": 220 }, { "epoch": 0.65, "grad_norm": 7.47681999206543, "learning_rate": 1.920634920634921e-05, "loss": 0.4212, "step": 230 }, { "epoch": 0.68, "grad_norm": 5.4326887130737305, "learning_rate": 1.761904761904762e-05, "loss": 0.4101, "step": 240 }, { "epoch": 0.71, "grad_norm": 5.068437576293945, "learning_rate": 1.6031746031746033e-05, "loss": 0.3544, "step": 250 }, { "epoch": 0.74, "grad_norm": 5.686431884765625, "learning_rate": 1.4444444444444444e-05, "loss": 0.3252, "step": 260 }, { "epoch": 0.77, "grad_norm": 5.882190227508545, "learning_rate": 1.2857142857142857e-05, "loss": 0.3475, "step": 270 }, { "epoch": 0.8, "grad_norm": 6.3953447341918945, "learning_rate": 1.126984126984127e-05, "loss": 0.3164, "step": 280 }, { "epoch": 0.82, "grad_norm": 7.278485298156738, "learning_rate": 9.682539682539683e-06, "loss": 0.328, "step": 290 }, { "epoch": 0.85, "grad_norm": 6.7763166427612305, "learning_rate": 8.095238095238097e-06, "loss": 0.3463, "step": 300 }, { "epoch": 0.88, "grad_norm": 7.21019172668457, "learning_rate": 6.507936507936509e-06, "loss": 0.3762, "step": 310 }, { "epoch": 0.91, "grad_norm": 8.0834379196167, "learning_rate": 4.920634920634921e-06, "loss": 0.3538, "step": 320 }, { "epoch": 0.94, "grad_norm": 5.969318389892578, "learning_rate": 3.3333333333333333e-06, "loss": 0.3159, "step": 330 }, { "epoch": 0.97, "grad_norm": 7.134821891784668, "learning_rate": 1.7460317460317462e-06, "loss": 0.3528, "step": 340 }, { "epoch": 1.0, "grad_norm": 6.126035213470459, "learning_rate": 1.5873015873015874e-07, "loss": 0.2928, "step": 350 }, { "epoch": 1.0, "eval_accuracy": 0.9064, "eval_loss": 0.27925798296928406, "eval_runtime": 77.4382, "eval_samples_per_second": 64.568, "eval_steps_per_second": 2.027, "step": 351 }, { "epoch": 1.0, "step": 351, "total_flos": 3.4803216831306793e+18, "train_loss": 0.5742807560666674, "train_runtime": 2256.218, "train_samples_per_second": 19.945, "train_steps_per_second": 0.156 } ], "logging_steps": 10, "max_steps": 351, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 3.4803216831306793e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }