|
{ |
|
"best_metric": 0.32018494606018066, |
|
"best_model_checkpoint": "facial_emotions_image_detection/checkpoint-8260", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 8260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.829694747924805, |
|
"learning_rate": 2.856230031948882e-06, |
|
"loss": 0.399, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 10.43336296081543, |
|
"learning_rate": 2.696485623003195e-06, |
|
"loss": 0.3907, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8980532379817243, |
|
"eval_loss": 0.331790030002594, |
|
"eval_runtime": 228.9339, |
|
"eval_samples_per_second": 109.944, |
|
"eval_steps_per_second": 13.746, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 13.529434204101562, |
|
"learning_rate": 2.536741214057508e-06, |
|
"loss": 0.3646, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 13.766672134399414, |
|
"learning_rate": 2.376996805111821e-06, |
|
"loss": 0.3674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8970997218911403, |
|
"eval_loss": 0.33308491110801697, |
|
"eval_runtime": 226.1763, |
|
"eval_samples_per_second": 111.285, |
|
"eval_steps_per_second": 13.914, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 15.129395484924316, |
|
"learning_rate": 2.217252396166134e-06, |
|
"loss": 0.3738, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 15.538688659667969, |
|
"learning_rate": 2.057507987220447e-06, |
|
"loss": 0.3534, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 15.868049621582031, |
|
"learning_rate": 1.8977635782747604e-06, |
|
"loss": 0.3467, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8961064759634485, |
|
"eval_loss": 0.33278992772102356, |
|
"eval_runtime": 228.0713, |
|
"eval_samples_per_second": 110.36, |
|
"eval_steps_per_second": 13.798, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 12.208673477172852, |
|
"learning_rate": 1.7380191693290735e-06, |
|
"loss": 0.3321, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 12.523385047912598, |
|
"learning_rate": 1.5782747603833867e-06, |
|
"loss": 0.322, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8984505363528009, |
|
"eval_loss": 0.3272715210914612, |
|
"eval_runtime": 226.7446, |
|
"eval_samples_per_second": 111.006, |
|
"eval_steps_per_second": 13.879, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 15.311544418334961, |
|
"learning_rate": 1.4185303514376998e-06, |
|
"loss": 0.3288, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 13.10958194732666, |
|
"learning_rate": 1.2587859424920128e-06, |
|
"loss": 0.3182, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9007548669050457, |
|
"eval_loss": 0.3223397731781006, |
|
"eval_runtime": 225.1518, |
|
"eval_samples_per_second": 111.791, |
|
"eval_steps_per_second": 13.977, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 10.398481369018555, |
|
"learning_rate": 1.0990415335463259e-06, |
|
"loss": 0.2982, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"grad_norm": 20.114734649658203, |
|
"learning_rate": 9.39297124600639e-07, |
|
"loss": 0.2946, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"grad_norm": 20.724897384643555, |
|
"learning_rate": 7.795527156549522e-07, |
|
"loss": 0.3081, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9009137862534764, |
|
"eval_loss": 0.3215692341327667, |
|
"eval_runtime": 224.6793, |
|
"eval_samples_per_second": 112.026, |
|
"eval_steps_per_second": 14.007, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"grad_norm": 13.87393856048584, |
|
"learning_rate": 6.198083067092652e-07, |
|
"loss": 0.296, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"grad_norm": 8.323355674743652, |
|
"learning_rate": 4.600638977635783e-07, |
|
"loss": 0.2816, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9007945967421533, |
|
"eval_loss": 0.32018494606018066, |
|
"eval_runtime": 228.3984, |
|
"eval_samples_per_second": 110.202, |
|
"eval_steps_per_second": 13.779, |
|
"step": 8260 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 2.047980331630782e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|