{ "best_metric": 1.3801825046539307, "best_model_checkpoint": "./vit-emotion-classification/checkpoint-300", "epoch": 10.0, "eval_steps": 100, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "grad_norm": 2.134533643722534, "learning_rate": 0.000195, "loss": 2.0279, "step": 10 }, { "epoch": 0.5, "grad_norm": 1.939251184463501, "learning_rate": 0.00019, "loss": 1.7986, "step": 20 }, { "epoch": 0.75, "grad_norm": 1.8353347778320312, "learning_rate": 0.00018500000000000002, "loss": 1.6841, "step": 30 }, { "epoch": 1.0, "grad_norm": 2.401373863220215, "learning_rate": 0.00018, "loss": 1.5968, "step": 40 }, { "epoch": 1.25, "grad_norm": 2.080655813217163, "learning_rate": 0.000175, "loss": 1.3349, "step": 50 }, { "epoch": 1.5, "grad_norm": 2.146280527114868, "learning_rate": 0.00017, "loss": 1.2608, "step": 60 }, { "epoch": 1.75, "grad_norm": 2.722163438796997, "learning_rate": 0.000165, "loss": 1.2728, "step": 70 }, { "epoch": 2.0, "grad_norm": 1.9687062501907349, "learning_rate": 0.00016, "loss": 1.1254, "step": 80 }, { "epoch": 2.25, "grad_norm": 2.55076265335083, "learning_rate": 0.000155, "loss": 0.8877, "step": 90 }, { "epoch": 2.5, "grad_norm": 2.256789445877075, "learning_rate": 0.00015000000000000001, "loss": 0.8454, "step": 100 }, { "epoch": 2.5, "eval_accuracy": 0.48125, "eval_loss": 1.4373115301132202, "eval_runtime": 0.8376, "eval_samples_per_second": 191.019, "eval_steps_per_second": 23.877, "step": 100 }, { "epoch": 2.75, "grad_norm": 2.4422082901000977, "learning_rate": 0.000145, "loss": 0.8337, "step": 110 }, { "epoch": 3.0, "grad_norm": 3.179633140563965, "learning_rate": 0.00014, "loss": 0.882, "step": 120 }, { "epoch": 3.25, "grad_norm": 1.4560832977294922, "learning_rate": 0.00013500000000000003, "loss": 0.5198, "step": 130 }, { "epoch": 3.5, "grad_norm": 2.0190017223358154, "learning_rate": 0.00013000000000000002, "loss": 0.4344, "step": 140 }, { "epoch": 3.75, "grad_norm": 3.2591023445129395, "learning_rate": 0.000125, "loss": 0.4698, "step": 150 }, { "epoch": 4.0, "grad_norm": 2.652550220489502, "learning_rate": 0.00012, "loss": 0.54, "step": 160 }, { "epoch": 4.25, "grad_norm": 0.5937463641166687, "learning_rate": 0.00011499999999999999, "loss": 0.2737, "step": 170 }, { "epoch": 4.5, "grad_norm": 1.0296827554702759, "learning_rate": 0.00011000000000000002, "loss": 0.2402, "step": 180 }, { "epoch": 4.75, "grad_norm": 3.007828712463379, "learning_rate": 0.000105, "loss": 0.1952, "step": 190 }, { "epoch": 5.0, "grad_norm": 1.936133623123169, "learning_rate": 0.0001, "loss": 0.2022, "step": 200 }, { "epoch": 5.0, "eval_accuracy": 0.55, "eval_loss": 1.406724214553833, "eval_runtime": 0.8264, "eval_samples_per_second": 193.601, "eval_steps_per_second": 24.2, "step": 200 }, { "epoch": 5.25, "grad_norm": 0.3376343250274658, "learning_rate": 9.5e-05, "loss": 0.1086, "step": 210 }, { "epoch": 5.5, "grad_norm": 2.637477397918701, "learning_rate": 9e-05, "loss": 0.109, "step": 220 }, { "epoch": 5.75, "grad_norm": 3.05291485786438, "learning_rate": 8.5e-05, "loss": 0.0957, "step": 230 }, { "epoch": 6.0, "grad_norm": 3.055624485015869, "learning_rate": 8e-05, "loss": 0.136, "step": 240 }, { "epoch": 6.25, "grad_norm": 0.30109065771102905, "learning_rate": 7.500000000000001e-05, "loss": 0.0741, "step": 250 }, { "epoch": 6.5, "grad_norm": 0.19268257915973663, "learning_rate": 7e-05, "loss": 0.0664, "step": 260 }, { "epoch": 6.75, "grad_norm": 0.15611745417118073, "learning_rate": 6.500000000000001e-05, "loss": 0.0559, "step": 270 }, { "epoch": 7.0, "grad_norm": 0.21113821864128113, "learning_rate": 6e-05, "loss": 0.0599, "step": 280 }, { "epoch": 7.25, "grad_norm": 0.160753071308136, "learning_rate": 5.500000000000001e-05, "loss": 0.049, "step": 290 }, { "epoch": 7.5, "grad_norm": 0.13460688292980194, "learning_rate": 5e-05, "loss": 0.0474, "step": 300 }, { "epoch": 7.5, "eval_accuracy": 0.6125, "eval_loss": 1.3801825046539307, "eval_runtime": 0.8118, "eval_samples_per_second": 197.084, "eval_steps_per_second": 24.635, "step": 300 }, { "epoch": 7.75, "grad_norm": 0.13902044296264648, "learning_rate": 4.5e-05, "loss": 0.0452, "step": 310 }, { "epoch": 8.0, "grad_norm": 0.12478330731391907, "learning_rate": 4e-05, "loss": 0.0424, "step": 320 }, { "epoch": 8.25, "grad_norm": 0.11532563716173172, "learning_rate": 3.5e-05, "loss": 0.0409, "step": 330 }, { "epoch": 8.5, "grad_norm": 0.11974634230136871, "learning_rate": 3e-05, "loss": 0.0395, "step": 340 }, { "epoch": 8.75, "grad_norm": 0.1203409880399704, "learning_rate": 2.5e-05, "loss": 0.0393, "step": 350 }, { "epoch": 9.0, "grad_norm": 0.13047201931476593, "learning_rate": 2e-05, "loss": 0.0386, "step": 360 }, { "epoch": 9.25, "grad_norm": 0.10817253589630127, "learning_rate": 1.5e-05, "loss": 0.0376, "step": 370 }, { "epoch": 9.5, "grad_norm": 0.12842506170272827, "learning_rate": 1e-05, "loss": 0.037, "step": 380 }, { "epoch": 9.75, "grad_norm": 0.12066592276096344, "learning_rate": 5e-06, "loss": 0.0371, "step": 390 }, { "epoch": 10.0, "grad_norm": 0.13028773665428162, "learning_rate": 0.0, "loss": 0.0368, "step": 400 }, { "epoch": 10.0, "eval_accuracy": 0.59375, "eval_loss": 1.4388375282287598, "eval_runtime": 0.8121, "eval_samples_per_second": 197.013, "eval_steps_per_second": 24.627, "step": 400 }, { "epoch": 10.0, "step": 400, "total_flos": 4.959754037231616e+17, "train_loss": 0.4905405020713806, "train_runtime": 117.1653, "train_samples_per_second": 54.624, "train_steps_per_second": 3.414 } ], "logging_steps": 10, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.959754037231616e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }