{ "best_metric": 0.6818181818181818, "best_model_checkpoint": "videomae-base-finetuned-Custom_Dataset_Finetune/checkpoint-126", "epoch": 3.2439999999999998, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 4.941892147064209, "learning_rate": 2.4e-05, "loss": 1.8353, "step": 10 }, { "epoch": 0.08, "grad_norm": 6.511570453643799, "learning_rate": 4.8e-05, "loss": 1.7037, "step": 20 }, { "epoch": 0.12, "grad_norm": 5.7441301345825195, "learning_rate": 5.8666666666666665e-05, "loss": 1.4799, "step": 30 }, { "epoch": 0.16, "grad_norm": 6.4645514488220215, "learning_rate": 5.6e-05, "loss": 1.2797, "step": 40 }, { "epoch": 0.2, "grad_norm": 6.132994651794434, "learning_rate": 5.333333333333333e-05, "loss": 1.1603, "step": 50 }, { "epoch": 0.24, "grad_norm": 4.506332874298096, "learning_rate": 5.066666666666667e-05, "loss": 0.9284, "step": 60 }, { "epoch": 0.252, "eval_accuracy": 0.5151515151515151, "eval_loss": 0.9844135046005249, "eval_runtime": 60.7277, "eval_samples_per_second": 1.087, "eval_steps_per_second": 0.082, "step": 63 }, { "epoch": 1.028, "grad_norm": 6.4644880294799805, "learning_rate": 4.8e-05, "loss": 0.918, "step": 70 }, { "epoch": 1.068, "grad_norm": 10.729351997375488, "learning_rate": 4.5333333333333335e-05, "loss": 0.9294, "step": 80 }, { "epoch": 1.108, "grad_norm": 8.79098129272461, "learning_rate": 4.266666666666667e-05, "loss": 0.9386, "step": 90 }, { "epoch": 1.148, "grad_norm": 4.564964294433594, "learning_rate": 3.9999999999999996e-05, "loss": 1.0353, "step": 100 }, { "epoch": 1.188, "grad_norm": 6.161099433898926, "learning_rate": 3.733333333333334e-05, "loss": 0.9968, "step": 110 }, { "epoch": 1.228, "grad_norm": 4.650356292724609, "learning_rate": 3.4666666666666665e-05, "loss": 0.9402, "step": 120 }, { "epoch": 1.252, "eval_accuracy": 0.6818181818181818, "eval_loss": 0.9521052241325378, "eval_runtime": 62.8381, "eval_samples_per_second": 1.05, "eval_steps_per_second": 0.08, "step": 126 }, { "epoch": 2.016, "grad_norm": 3.565009117126465, "learning_rate": 3.2e-05, "loss": 0.8584, "step": 130 }, { "epoch": 2.056, "grad_norm": 5.3584489822387695, "learning_rate": 2.9333333333333333e-05, "loss": 0.8446, "step": 140 }, { "epoch": 2.096, "grad_norm": 9.390802383422852, "learning_rate": 2.6666666666666667e-05, "loss": 0.8279, "step": 150 }, { "epoch": 2.136, "grad_norm": 7.512342929840088, "learning_rate": 2.4e-05, "loss": 0.8049, "step": 160 }, { "epoch": 2.176, "grad_norm": 4.40623664855957, "learning_rate": 2.1333333333333335e-05, "loss": 0.8291, "step": 170 }, { "epoch": 2.216, "grad_norm": 6.4893059730529785, "learning_rate": 1.866666666666667e-05, "loss": 0.8639, "step": 180 }, { "epoch": 2.252, "eval_accuracy": 0.6060606060606061, "eval_loss": 0.7931472063064575, "eval_runtime": 63.7339, "eval_samples_per_second": 1.036, "eval_steps_per_second": 0.078, "step": 189 }, { "epoch": 3.004, "grad_norm": 5.078976154327393, "learning_rate": 1.6e-05, "loss": 0.7345, "step": 190 }, { "epoch": 3.044, "grad_norm": 2.724031925201416, "learning_rate": 1.3333333333333333e-05, "loss": 0.7462, "step": 200 }, { "epoch": 3.084, "grad_norm": 3.8141984939575195, "learning_rate": 1.0666666666666667e-05, "loss": 0.7004, "step": 210 }, { "epoch": 3.124, "grad_norm": 3.8294148445129395, "learning_rate": 8e-06, "loss": 0.7095, "step": 220 }, { "epoch": 3.164, "grad_norm": 5.064866065979004, "learning_rate": 5.333333333333334e-06, "loss": 0.7274, "step": 230 }, { "epoch": 3.204, "grad_norm": 3.320218801498413, "learning_rate": 2.666666666666667e-06, "loss": 0.6674, "step": 240 }, { "epoch": 3.2439999999999998, "grad_norm": 3.968177080154419, "learning_rate": 0.0, "loss": 0.6195, "step": 250 }, { "epoch": 3.2439999999999998, "eval_accuracy": 0.6363636363636364, "eval_loss": 0.910934329032898, "eval_runtime": 73.6738, "eval_samples_per_second": 0.896, "eval_steps_per_second": 0.068, "step": 250 }, { "epoch": 3.2439999999999998, "step": 250, "total_flos": 4.9769595049134e+18, "train_loss": 0.9631701583862304, "train_runtime": 5330.9549, "train_samples_per_second": 0.75, "train_steps_per_second": 0.047 }, { "epoch": 3.2439999999999998, "eval_accuracy": 0.7586206896551724, "eval_loss": 0.8059645295143127, "eval_runtime": 63.0154, "eval_samples_per_second": 0.92, "eval_steps_per_second": 0.063, "step": 250 }, { "epoch": 3.2439999999999998, "eval_accuracy": 0.7586206896551724, "eval_loss": 0.8059644103050232, "eval_runtime": 62.6184, "eval_samples_per_second": 0.926, "eval_steps_per_second": 0.064, "step": 250 } ], "logging_steps": 10, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.9769595049134e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }