{ "best_metric": 1.0, "best_model_checkpoint": "YAHIA/ViViT-b-16x2-ShopLifting-Dataset\\checkpoint-596", "epoch": 1.4983221476510067, "eval_steps": 500, "global_step": 596, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 10.692367553710938, "learning_rate": 8.333333333333334e-06, "loss": 0.7508, "step": 10 }, { "epoch": 0.03, "grad_norm": 15.156712532043457, "learning_rate": 1.6666666666666667e-05, "loss": 0.5978, "step": 20 }, { "epoch": 0.05, "grad_norm": 1.6303211450576782, "learning_rate": 2.5e-05, "loss": 0.6351, "step": 30 }, { "epoch": 0.07, "grad_norm": 41.47138214111328, "learning_rate": 3.3333333333333335e-05, "loss": 0.9622, "step": 40 }, { "epoch": 0.08, "grad_norm": 1.7870268821716309, "learning_rate": 4.166666666666667e-05, "loss": 0.3437, "step": 50 }, { "epoch": 0.1, "grad_norm": 0.17213623225688934, "learning_rate": 5e-05, "loss": 0.4405, "step": 60 }, { "epoch": 0.12, "grad_norm": 0.06276705861091614, "learning_rate": 4.906716417910448e-05, "loss": 0.2694, "step": 70 }, { "epoch": 0.13, "grad_norm": 1.269716501235962, "learning_rate": 4.813432835820896e-05, "loss": 0.6896, "step": 80 }, { "epoch": 0.15, "grad_norm": 30.071969985961914, "learning_rate": 4.7201492537313436e-05, "loss": 1.224, "step": 90 }, { "epoch": 0.17, "grad_norm": 11.760859489440918, "learning_rate": 4.626865671641791e-05, "loss": 0.6247, "step": 100 }, { "epoch": 0.18, "grad_norm": 0.22317390143871307, "learning_rate": 4.5335820895522394e-05, "loss": 0.2921, "step": 110 }, { "epoch": 0.2, "grad_norm": 27.252477645874023, "learning_rate": 4.440298507462687e-05, "loss": 0.6916, "step": 120 }, { "epoch": 0.22, "grad_norm": 0.2607596814632416, "learning_rate": 4.3470149253731345e-05, "loss": 0.2637, "step": 130 }, { "epoch": 0.23, "grad_norm": 25.883403778076172, "learning_rate": 4.253731343283582e-05, "loss": 0.3661, "step": 140 }, { "epoch": 0.25, "grad_norm": 0.009978611953556538, "learning_rate": 4.16044776119403e-05, "loss": 0.2078, "step": 150 }, { "epoch": 0.27, "grad_norm": 28.141712188720703, "learning_rate": 4.067164179104478e-05, "loss": 0.4631, "step": 160 }, { "epoch": 0.29, "grad_norm": 0.08477218449115753, "learning_rate": 3.9738805970149254e-05, "loss": 0.7177, "step": 170 }, { "epoch": 0.3, "grad_norm": 0.6206006407737732, "learning_rate": 3.8805970149253736e-05, "loss": 0.0064, "step": 180 }, { "epoch": 0.32, "grad_norm": 22.071073532104492, "learning_rate": 3.787313432835821e-05, "loss": 0.7343, "step": 190 }, { "epoch": 0.34, "grad_norm": 134.8112335205078, "learning_rate": 3.694029850746269e-05, "loss": 0.1803, "step": 200 }, { "epoch": 0.35, "grad_norm": 54.530120849609375, "learning_rate": 3.600746268656717e-05, "loss": 0.135, "step": 210 }, { "epoch": 0.37, "grad_norm": 0.017338775098323822, "learning_rate": 3.5074626865671645e-05, "loss": 0.0485, "step": 220 }, { "epoch": 0.39, "grad_norm": 0.008222455158829689, "learning_rate": 3.414179104477612e-05, "loss": 0.311, "step": 230 }, { "epoch": 0.4, "grad_norm": 0.012648209929466248, "learning_rate": 3.32089552238806e-05, "loss": 0.253, "step": 240 }, { "epoch": 0.42, "grad_norm": 0.00926213525235653, "learning_rate": 3.227611940298508e-05, "loss": 0.0245, "step": 250 }, { "epoch": 0.44, "grad_norm": 1.1042240858078003, "learning_rate": 3.1343283582089554e-05, "loss": 0.2106, "step": 260 }, { "epoch": 0.45, "grad_norm": 0.034143492579460144, "learning_rate": 3.0410447761194033e-05, "loss": 0.268, "step": 270 }, { "epoch": 0.47, "grad_norm": 0.12180909514427185, "learning_rate": 2.9477611940298512e-05, "loss": 0.0042, "step": 280 }, { "epoch": 0.49, "grad_norm": 0.00459245964884758, "learning_rate": 2.8544776119402988e-05, "loss": 0.0004, "step": 290 }, { "epoch": 0.5, "eval_accuracy": 0.952755905511811, "eval_loss": 0.26606377959251404, "eval_runtime": 510.7369, "eval_samples_per_second": 0.249, "eval_steps_per_second": 0.125, "step": 299 }, { "epoch": 1.0, "grad_norm": 0.006104098167270422, "learning_rate": 2.7611940298507467e-05, "loss": 0.0002, "step": 300 }, { "epoch": 1.02, "grad_norm": 37.364593505859375, "learning_rate": 2.6679104477611942e-05, "loss": 0.046, "step": 310 }, { "epoch": 1.04, "grad_norm": 0.030606430023908615, "learning_rate": 2.574626865671642e-05, "loss": 0.0002, "step": 320 }, { "epoch": 1.05, "grad_norm": 0.3013696074485779, "learning_rate": 2.4813432835820897e-05, "loss": 0.5804, "step": 330 }, { "epoch": 1.07, "grad_norm": 0.003663327544927597, "learning_rate": 2.3880597014925373e-05, "loss": 0.0001, "step": 340 }, { "epoch": 1.09, "grad_norm": 0.004311152268201113, "learning_rate": 2.294776119402985e-05, "loss": 0.0004, "step": 350 }, { "epoch": 1.1, "grad_norm": 0.04659305512905121, "learning_rate": 2.201492537313433e-05, "loss": 0.0002, "step": 360 }, { "epoch": 1.12, "grad_norm": 0.037149786949157715, "learning_rate": 2.1082089552238806e-05, "loss": 0.001, "step": 370 }, { "epoch": 1.14, "grad_norm": 0.03413153439760208, "learning_rate": 2.0149253731343285e-05, "loss": 0.0004, "step": 380 }, { "epoch": 1.15, "grad_norm": 0.041676830500364304, "learning_rate": 1.921641791044776e-05, "loss": 0.0056, "step": 390 }, { "epoch": 1.17, "grad_norm": 0.0016505724051967263, "learning_rate": 1.828358208955224e-05, "loss": 0.0002, "step": 400 }, { "epoch": 1.19, "grad_norm": 0.0031964355148375034, "learning_rate": 1.735074626865672e-05, "loss": 0.0001, "step": 410 }, { "epoch": 1.2, "grad_norm": 0.004493652377277613, "learning_rate": 1.6417910447761194e-05, "loss": 0.0001, "step": 420 }, { "epoch": 1.22, "grad_norm": 0.0042636813595891, "learning_rate": 1.5485074626865673e-05, "loss": 0.0001, "step": 430 }, { "epoch": 1.24, "grad_norm": 0.028110649436712265, "learning_rate": 1.455223880597015e-05, "loss": 0.0003, "step": 440 }, { "epoch": 1.25, "grad_norm": 0.0027514537796378136, "learning_rate": 1.3619402985074628e-05, "loss": 0.0001, "step": 450 }, { "epoch": 1.27, "grad_norm": 0.003923814278095961, "learning_rate": 1.2686567164179105e-05, "loss": 0.0013, "step": 460 }, { "epoch": 1.29, "grad_norm": 0.001681660651229322, "learning_rate": 1.1753731343283584e-05, "loss": 0.1662, "step": 470 }, { "epoch": 1.3, "grad_norm": 0.00280452030710876, "learning_rate": 1.082089552238806e-05, "loss": 0.0037, "step": 480 }, { "epoch": 1.32, "grad_norm": 0.03961377218365669, "learning_rate": 9.888059701492537e-06, "loss": 0.0002, "step": 490 }, { "epoch": 1.34, "grad_norm": 0.0012254673056304455, "learning_rate": 8.955223880597016e-06, "loss": 0.0076, "step": 500 }, { "epoch": 1.35, "grad_norm": 0.0026522520929574966, "learning_rate": 8.022388059701493e-06, "loss": 0.0003, "step": 510 }, { "epoch": 1.37, "grad_norm": 0.017898578196763992, "learning_rate": 7.08955223880597e-06, "loss": 0.0005, "step": 520 }, { "epoch": 1.39, "grad_norm": 0.001244415994733572, "learning_rate": 6.156716417910448e-06, "loss": 0.0002, "step": 530 }, { "epoch": 1.4, "grad_norm": 0.0038874938618391752, "learning_rate": 5.2238805970149255e-06, "loss": 0.0001, "step": 540 }, { "epoch": 1.42, "grad_norm": 0.0015441217692568898, "learning_rate": 4.291044776119403e-06, "loss": 0.0001, "step": 550 }, { "epoch": 1.44, "grad_norm": 0.002049211412668228, "learning_rate": 3.358208955223881e-06, "loss": 0.0001, "step": 560 }, { "epoch": 1.45, "grad_norm": 0.09079094976186752, "learning_rate": 2.4253731343283585e-06, "loss": 0.0004, "step": 570 }, { "epoch": 1.47, "grad_norm": 0.00330289825797081, "learning_rate": 1.4925373134328358e-06, "loss": 0.0001, "step": 580 }, { "epoch": 1.49, "grad_norm": 0.002893247874453664, "learning_rate": 5.597014925373135e-07, "loss": 0.0001, "step": 590 }, { "epoch": 1.5, "eval_accuracy": 1.0, "eval_loss": 0.00015069438086356968, "eval_runtime": 601.9366, "eval_samples_per_second": 0.211, "eval_steps_per_second": 0.106, "step": 596 }, { "epoch": 1.5, "step": 596, "total_flos": 3.0513984388253614e+18, "train_loss": 0.21028166316409014, "train_runtime": 8990.1785, "train_samples_per_second": 0.133, "train_steps_per_second": 0.066 }, { "epoch": 1.5, "eval_accuracy": 1.0, "eval_loss": 0.0027257483452558517, "eval_runtime": 678.7878, "eval_samples_per_second": 0.206, "eval_steps_per_second": 0.103, "step": 596 }, { "epoch": 1.5, "eval_accuracy": 1.0, "eval_loss": 0.002725747646763921, "eval_runtime": 663.5855, "eval_samples_per_second": 0.211, "eval_steps_per_second": 0.105, "step": 596 } ], "logging_steps": 10, "max_steps": 596, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 3.0513984388253614e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }