|
{ |
|
"best_metric": 0.3781418800354004, |
|
"best_model_checkpoint": "facial_emotions_image_detection/checkpoint-15740", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 15740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 11.892455101013184, |
|
"learning_rate": 3.885277246653919e-06, |
|
"loss": 0.5534, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 12.262578964233398, |
|
"learning_rate": 3.7578075207138302e-06, |
|
"loss": 0.5424, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.160516738891602, |
|
"learning_rate": 3.630337794773741e-06, |
|
"loss": 0.5402, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8678585617798967, |
|
"eval_loss": 0.4141866862773895, |
|
"eval_runtime": 113.7657, |
|
"eval_samples_per_second": 110.622, |
|
"eval_steps_per_second": 13.835, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 12.081930160522461, |
|
"learning_rate": 3.5028680688336517e-06, |
|
"loss": 0.509, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 8.023213386535645, |
|
"learning_rate": 3.375398342893563e-06, |
|
"loss": 0.4984, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 11.671684265136719, |
|
"learning_rate": 3.2479286169534735e-06, |
|
"loss": 0.4817, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8661899086213747, |
|
"eval_loss": 0.41461509466171265, |
|
"eval_runtime": 111.4496, |
|
"eval_samples_per_second": 112.921, |
|
"eval_steps_per_second": 14.123, |
|
"step": 3148 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 11.465852737426758, |
|
"learning_rate": 3.120458891013384e-06, |
|
"loss": 0.4686, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 13.02270221710205, |
|
"learning_rate": 2.992989165073295e-06, |
|
"loss": 0.451, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 7.672973155975342, |
|
"learning_rate": 2.8655194391332055e-06, |
|
"loss": 0.4459, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8698450536352801, |
|
"eval_loss": 0.40479913353919983, |
|
"eval_runtime": 111.5158, |
|
"eval_samples_per_second": 112.854, |
|
"eval_steps_per_second": 14.115, |
|
"step": 4722 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 12.763694763183594, |
|
"learning_rate": 2.7380497131931164e-06, |
|
"loss": 0.4335, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 10.964895248413086, |
|
"learning_rate": 2.6105799872530273e-06, |
|
"loss": 0.4325, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 12.650814056396484, |
|
"learning_rate": 2.4831102613129383e-06, |
|
"loss": 0.4031, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8707985697258641, |
|
"eval_loss": 0.3987765610218048, |
|
"eval_runtime": 110.9162, |
|
"eval_samples_per_second": 113.464, |
|
"eval_steps_per_second": 14.191, |
|
"step": 6296 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 7.15053653717041, |
|
"learning_rate": 2.3556405353728488e-06, |
|
"loss": 0.4112, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 17.31258773803711, |
|
"learning_rate": 2.2281708094327597e-06, |
|
"loss": 0.3886, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 7.938920497894287, |
|
"learning_rate": 2.1007010834926702e-06, |
|
"loss": 0.3804, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8742947953913389, |
|
"eval_loss": 0.39545899629592896, |
|
"eval_runtime": 110.9484, |
|
"eval_samples_per_second": 113.431, |
|
"eval_steps_per_second": 14.187, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 11.187897682189941, |
|
"learning_rate": 1.973231357552581e-06, |
|
"loss": 0.3856, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"grad_norm": 12.73786735534668, |
|
"learning_rate": 1.845761631612492e-06, |
|
"loss": 0.3732, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"grad_norm": 6.2277374267578125, |
|
"learning_rate": 1.7182919056724028e-06, |
|
"loss": 0.3733, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8767580452920143, |
|
"eval_loss": 0.3847721815109253, |
|
"eval_runtime": 113.9748, |
|
"eval_samples_per_second": 110.419, |
|
"eval_steps_per_second": 13.81, |
|
"step": 9444 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 9.49077320098877, |
|
"learning_rate": 1.5908221797323135e-06, |
|
"loss": 0.3552, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"grad_norm": 11.691384315490723, |
|
"learning_rate": 1.4633524537922245e-06, |
|
"loss": 0.355, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 7.24980354309082, |
|
"learning_rate": 1.335882727852135e-06, |
|
"loss": 0.347, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"grad_norm": 15.983756065368652, |
|
"learning_rate": 1.2084130019120457e-06, |
|
"loss": 0.3462, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.877870480731029, |
|
"eval_loss": 0.38435637950897217, |
|
"eval_runtime": 114.6747, |
|
"eval_samples_per_second": 109.745, |
|
"eval_steps_per_second": 13.726, |
|
"step": 11018 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 6.091674327850342, |
|
"learning_rate": 1.0809432759719566e-06, |
|
"loss": 0.3481, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"grad_norm": 14.106987953186035, |
|
"learning_rate": 9.534735500318673e-07, |
|
"loss": 0.3373, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"grad_norm": 8.448904991149902, |
|
"learning_rate": 8.260038240917782e-07, |
|
"loss": 0.324, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.876201827572507, |
|
"eval_loss": 0.3851085305213928, |
|
"eval_runtime": 113.668, |
|
"eval_samples_per_second": 110.717, |
|
"eval_steps_per_second": 13.847, |
|
"step": 12592 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"grad_norm": 6.959225177764893, |
|
"learning_rate": 6.98534098151689e-07, |
|
"loss": 0.3254, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 10.736397743225098, |
|
"learning_rate": 5.710643722115998e-07, |
|
"loss": 0.3179, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"grad_norm": 10.716438293457031, |
|
"learning_rate": 4.435946462715105e-07, |
|
"loss": 0.3217, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8787445371473976, |
|
"eval_loss": 0.3801954388618469, |
|
"eval_runtime": 113.1501, |
|
"eval_samples_per_second": 111.224, |
|
"eval_steps_per_second": 13.911, |
|
"step": 14166 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"grad_norm": 12.26369571685791, |
|
"learning_rate": 3.161249203314213e-07, |
|
"loss": 0.309, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"grad_norm": 6.435930252075195, |
|
"learning_rate": 1.8865519439133203e-07, |
|
"loss": 0.3158, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"grad_norm": 15.131240844726562, |
|
"learning_rate": 6.118546845124282e-08, |
|
"loss": 0.3105, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8789829161700436, |
|
"eval_loss": 0.3781418800354004, |
|
"eval_runtime": 112.3381, |
|
"eval_samples_per_second": 112.028, |
|
"eval_steps_per_second": 14.011, |
|
"step": 15740 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15740, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.900966581033497e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|