|
{ |
|
"best_metric": 0.5263157894736842, |
|
"best_model_checkpoint": "videomae-base-sarco-aumentado/checkpoint-33", |
|
"epoch": 9.071875, |
|
"eval_steps": 500, |
|
"global_step": 320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 7.657312870025635, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 1.414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 10.452422142028809, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.2125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 6.368149757385254, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.7603, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 1.5046850442886353, |
|
"eval_runtime": 9.1382, |
|
"eval_samples_per_second": 2.079, |
|
"eval_steps_per_second": 0.328, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.021875, |
|
"grad_norm": 17.073139190673828, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.4153, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.053125, |
|
"grad_norm": 8.040921211242676, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.1479, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.084375, |
|
"grad_norm": 0.2993019223213196, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.0509, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 2.6908650398254395, |
|
"eval_runtime": 5.6622, |
|
"eval_samples_per_second": 3.356, |
|
"eval_steps_per_second": 0.53, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.0125, |
|
"grad_norm": 10.6167573928833, |
|
"learning_rate": 4.340277777777778e-05, |
|
"loss": 0.0109, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.04375, |
|
"grad_norm": 1.5939055681228638, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0043, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.075, |
|
"grad_norm": 0.037472404539585114, |
|
"learning_rate": 3.993055555555556e-05, |
|
"loss": 0.0422, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.123065948486328, |
|
"eval_runtime": 5.6279, |
|
"eval_samples_per_second": 3.376, |
|
"eval_steps_per_second": 0.533, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 3.003125, |
|
"grad_norm": 0.02211487852036953, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 0.0025, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.034375, |
|
"grad_norm": 0.01639764942228794, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"loss": 0.0016, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.065625, |
|
"grad_norm": 0.01938652992248535, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.0159, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.096875, |
|
"grad_norm": 0.017730316147208214, |
|
"learning_rate": 3.2986111111111115e-05, |
|
"loss": 0.0085, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.3051815032958984, |
|
"eval_runtime": 5.7488, |
|
"eval_samples_per_second": 3.305, |
|
"eval_steps_per_second": 0.522, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 4.025, |
|
"grad_norm": 0.017928821966052055, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0096, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.05625, |
|
"grad_norm": 0.0493478998541832, |
|
"learning_rate": 2.951388888888889e-05, |
|
"loss": 0.0013, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.0875, |
|
"grad_norm": 0.02256612479686737, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0016, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.3200395107269287, |
|
"eval_runtime": 5.4706, |
|
"eval_samples_per_second": 3.473, |
|
"eval_steps_per_second": 0.548, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 5.015625, |
|
"grad_norm": 0.017535550519824028, |
|
"learning_rate": 2.604166666666667e-05, |
|
"loss": 0.0016, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.046875, |
|
"grad_norm": 0.012387892231345177, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.078125, |
|
"grad_norm": 0.014036540873348713, |
|
"learning_rate": 2.2569444444444447e-05, |
|
"loss": 0.0011, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.3733017444610596, |
|
"eval_runtime": 5.6974, |
|
"eval_samples_per_second": 3.335, |
|
"eval_steps_per_second": 0.527, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 6.00625, |
|
"grad_norm": 0.011159941554069519, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0009, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.0375, |
|
"grad_norm": 0.02007906697690487, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.0009, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.06875, |
|
"grad_norm": 0.01185558270663023, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.0009, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.012541444040834904, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.0008, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.3552279472351074, |
|
"eval_runtime": 5.687, |
|
"eval_samples_per_second": 3.341, |
|
"eval_steps_per_second": 0.528, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 7.028125, |
|
"grad_norm": 0.008836383931338787, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0007, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.059375, |
|
"grad_norm": 0.010615321807563305, |
|
"learning_rate": 1.2152777777777779e-05, |
|
"loss": 0.0008, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.090625, |
|
"grad_norm": 0.010104900225996971, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.0007, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.392703056335449, |
|
"eval_runtime": 5.807, |
|
"eval_samples_per_second": 3.272, |
|
"eval_steps_per_second": 0.517, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 8.01875, |
|
"grad_norm": 0.0091309929266572, |
|
"learning_rate": 8.680555555555556e-06, |
|
"loss": 0.0007, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.009017580188810825, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0007, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.08125, |
|
"grad_norm": 0.009177467785775661, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.0007, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.103125, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.4122025966644287, |
|
"eval_runtime": 5.7143, |
|
"eval_samples_per_second": 3.325, |
|
"eval_steps_per_second": 0.525, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 9.009375, |
|
"grad_norm": 0.009887206368148327, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.0007, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.040625, |
|
"grad_norm": 0.35840049386024475, |
|
"learning_rate": 1.7361111111111112e-06, |
|
"loss": 0.0009, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.071875, |
|
"grad_norm": 0.01002708449959755, |
|
"learning_rate": 0.0, |
|
"loss": 0.0007, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.071875, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 3.4094326496124268, |
|
"eval_runtime": 6.4245, |
|
"eval_samples_per_second": 2.957, |
|
"eval_steps_per_second": 0.467, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.071875, |
|
"step": 320, |
|
"total_flos": 3.1451231201258373e+18, |
|
"train_loss": 0.12853713909717043, |
|
"train_runtime": 960.1181, |
|
"train_samples_per_second": 2.666, |
|
"train_steps_per_second": 0.333 |
|
}, |
|
{ |
|
"epoch": 9.071875, |
|
"eval_accuracy": 0.3958333333333333, |
|
"eval_loss": 1.3436609506607056, |
|
"eval_runtime": 22.1033, |
|
"eval_samples_per_second": 2.172, |
|
"eval_steps_per_second": 0.271, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.071875, |
|
"eval_accuracy": 0.3958333333333333, |
|
"eval_loss": 1.3436609506607056, |
|
"eval_runtime": 14.4365, |
|
"eval_samples_per_second": 3.325, |
|
"eval_steps_per_second": 0.416, |
|
"step": 320 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 320, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1451231201258373e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|