|
{ |
|
"best_metric": 1.143545389175415, |
|
"best_model_checkpoint": "facial_age_image_detection/checkpoint-7200", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 7200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6146026781350796, |
|
"eval_loss": 1.1694062948226929, |
|
"eval_runtime": 105.8046, |
|
"eval_samples_per_second": 96.697, |
|
"eval_steps_per_second": 3.024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6155800996969993, |
|
"eval_loss": 1.168785572052002, |
|
"eval_runtime": 110.1471, |
|
"eval_samples_per_second": 92.885, |
|
"eval_steps_per_second": 2.905, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 8.204376220703125, |
|
"learning_rate": 4.685314685314685e-07, |
|
"loss": 0.7138, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6099110546378653, |
|
"eval_loss": 1.1679891347885132, |
|
"eval_runtime": 102.9363, |
|
"eval_samples_per_second": 99.392, |
|
"eval_steps_per_second": 3.109, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6177304271332226, |
|
"eval_loss": 1.1608326435089111, |
|
"eval_runtime": 101.8768, |
|
"eval_samples_per_second": 100.425, |
|
"eval_steps_per_second": 3.141, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 14.190340042114258, |
|
"learning_rate": 4.335664335664335e-07, |
|
"loss": 0.7034, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6098133124816734, |
|
"eval_loss": 1.1647840738296509, |
|
"eval_runtime": 103.2275, |
|
"eval_samples_per_second": 99.111, |
|
"eval_steps_per_second": 3.1, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6123546085426644, |
|
"eval_loss": 1.1614784002304077, |
|
"eval_runtime": 106.5014, |
|
"eval_samples_per_second": 96.064, |
|
"eval_steps_per_second": 3.005, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 18.558853149414062, |
|
"learning_rate": 3.9860139860139855e-07, |
|
"loss": 0.6951, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6186101065389502, |
|
"eval_loss": 1.160543441772461, |
|
"eval_runtime": 105.023, |
|
"eval_samples_per_second": 97.417, |
|
"eval_steps_per_second": 3.047, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6172417163522628, |
|
"eval_loss": 1.1572130918502808, |
|
"eval_runtime": 105.5204, |
|
"eval_samples_per_second": 96.958, |
|
"eval_steps_per_second": 3.033, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 9.678888320922852, |
|
"learning_rate": 3.636363636363636e-07, |
|
"loss": 0.6885, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6171439741960708, |
|
"eval_loss": 1.1544899940490723, |
|
"eval_runtime": 102.6064, |
|
"eval_samples_per_second": 99.711, |
|
"eval_steps_per_second": 3.119, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6179259114456065, |
|
"eval_loss": 1.155420184135437, |
|
"eval_runtime": 104.82, |
|
"eval_samples_per_second": 97.605, |
|
"eval_steps_per_second": 3.053, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"grad_norm": 13.007719039916992, |
|
"learning_rate": 3.286713286713286e-07, |
|
"loss": 0.6806, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6176326849770306, |
|
"eval_loss": 1.1572375297546387, |
|
"eval_runtime": 103.6156, |
|
"eval_samples_per_second": 98.74, |
|
"eval_steps_per_second": 3.088, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6146026781350796, |
|
"eval_loss": 1.1544034481048584, |
|
"eval_runtime": 102.3651, |
|
"eval_samples_per_second": 99.946, |
|
"eval_steps_per_second": 3.126, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 12.4281005859375, |
|
"learning_rate": 2.9370629370629366e-07, |
|
"loss": 0.6767, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6182191379141824, |
|
"eval_loss": 1.1541900634765625, |
|
"eval_runtime": 105.249, |
|
"eval_samples_per_second": 97.208, |
|
"eval_steps_per_second": 3.04, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6143094516665037, |
|
"eval_loss": 1.1535159349441528, |
|
"eval_runtime": 106.0372, |
|
"eval_samples_per_second": 96.485, |
|
"eval_steps_per_second": 3.018, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"grad_norm": 7.5727410316467285, |
|
"learning_rate": 2.5874125874125877e-07, |
|
"loss": 0.667, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6190988173199101, |
|
"eval_loss": 1.1515381336212158, |
|
"eval_runtime": 102.4541, |
|
"eval_samples_per_second": 99.859, |
|
"eval_steps_per_second": 3.123, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6183168800703743, |
|
"eval_loss": 1.1484887599945068, |
|
"eval_runtime": 111.0745, |
|
"eval_samples_per_second": 92.109, |
|
"eval_steps_per_second": 2.881, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"grad_norm": 10.47463607788086, |
|
"learning_rate": 2.2377622377622377e-07, |
|
"loss": 0.6652, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6168507477274948, |
|
"eval_loss": 1.1494479179382324, |
|
"eval_runtime": 104.2245, |
|
"eval_samples_per_second": 98.163, |
|
"eval_steps_per_second": 3.07, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6195875281008699, |
|
"eval_loss": 1.1498117446899414, |
|
"eval_runtime": 103.8649, |
|
"eval_samples_per_second": 98.503, |
|
"eval_steps_per_second": 3.081, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 15.560189247131348, |
|
"learning_rate": 1.888111888111888e-07, |
|
"loss": 0.6619, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.619489785944678, |
|
"eval_loss": 1.1467111110687256, |
|
"eval_runtime": 104.6948, |
|
"eval_samples_per_second": 97.722, |
|
"eval_steps_per_second": 3.057, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6170462320398789, |
|
"eval_loss": 1.1486930847167969, |
|
"eval_runtime": 105.6513, |
|
"eval_samples_per_second": 96.837, |
|
"eval_steps_per_second": 3.029, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"grad_norm": 10.84726333618164, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"loss": 0.6586, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6190988173199101, |
|
"eval_loss": 1.1456117630004883, |
|
"eval_runtime": 102.8016, |
|
"eval_samples_per_second": 99.522, |
|
"eval_steps_per_second": 3.113, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6169484898836868, |
|
"eval_loss": 1.145732045173645, |
|
"eval_runtime": 103.7561, |
|
"eval_samples_per_second": 98.606, |
|
"eval_steps_per_second": 3.084, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 22.92, |
|
"grad_norm": 10.03998851776123, |
|
"learning_rate": 1.1888111888111887e-07, |
|
"loss": 0.652, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6170462320398789, |
|
"eval_loss": 1.145518183708191, |
|
"eval_runtime": 102.871, |
|
"eval_samples_per_second": 99.455, |
|
"eval_steps_per_second": 3.111, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6180236536017985, |
|
"eval_loss": 1.1446188688278198, |
|
"eval_runtime": 101.7602, |
|
"eval_samples_per_second": 100.54, |
|
"eval_steps_per_second": 3.145, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 13.074454307556152, |
|
"learning_rate": 8.391608391608391e-08, |
|
"loss": 0.6536, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.619489785944678, |
|
"eval_loss": 1.1448231935501099, |
|
"eval_runtime": 101.7751, |
|
"eval_samples_per_second": 100.526, |
|
"eval_steps_per_second": 3.144, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6186101065389502, |
|
"eval_loss": 1.1442244052886963, |
|
"eval_runtime": 102.0024, |
|
"eval_samples_per_second": 100.302, |
|
"eval_steps_per_second": 3.137, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6180236536017985, |
|
"eval_loss": 1.1441760063171387, |
|
"eval_runtime": 102.5718, |
|
"eval_samples_per_second": 99.745, |
|
"eval_steps_per_second": 3.12, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"grad_norm": 12.248133659362793, |
|
"learning_rate": 4.895104895104895e-08, |
|
"loss": 0.6512, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6179259114456065, |
|
"eval_loss": 1.1439582109451294, |
|
"eval_runtime": 103.381, |
|
"eval_samples_per_second": 98.964, |
|
"eval_steps_per_second": 3.095, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6181213957579904, |
|
"eval_loss": 1.1439974308013916, |
|
"eval_runtime": 102.6407, |
|
"eval_samples_per_second": 99.678, |
|
"eval_steps_per_second": 3.118, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"grad_norm": 7.591431617736816, |
|
"learning_rate": 1.3986013986013985e-08, |
|
"loss": 0.6473, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6190010751637182, |
|
"eval_loss": 1.143545389175415, |
|
"eval_runtime": 105.5925, |
|
"eval_samples_per_second": 96.891, |
|
"eval_steps_per_second": 3.031, |
|
"step": 7200 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 3.5680151743533187e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|