|
{ |
|
"best_metric": 0.1280793398618698, |
|
"best_model_checkpoint": "deepfake_vs_real_image_detection/checkpoint-35008", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 35008, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11425959780621572, |
|
"grad_norm": 0.9279178977012634, |
|
"learning_rate": 9.87127410034899e-07, |
|
"loss": 0.6754, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22851919561243145, |
|
"grad_norm": 1.445857286453247, |
|
"learning_rate": 9.72824532295898e-07, |
|
"loss": 0.5645, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34277879341864714, |
|
"grad_norm": 2.294506788253784, |
|
"learning_rate": 9.585216545568968e-07, |
|
"loss": 0.3891, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4570383912248629, |
|
"grad_norm": 2.4269814491271973, |
|
"learning_rate": 9.442187768178958e-07, |
|
"loss": 0.2769, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5712979890310786, |
|
"grad_norm": 8.800315856933594, |
|
"learning_rate": 9.299158990788947e-07, |
|
"loss": 0.225, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6855575868372943, |
|
"grad_norm": 2.500453233718872, |
|
"learning_rate": 9.156130213398935e-07, |
|
"loss": 0.1974, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79981718464351, |
|
"grad_norm": 2.538663864135742, |
|
"learning_rate": 9.013101436008924e-07, |
|
"loss": 0.1816, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9140767824497258, |
|
"grad_norm": 3.2626090049743652, |
|
"learning_rate": 8.870072658618913e-07, |
|
"loss": 0.1687, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9183575124277163, |
|
"eval_loss": 0.26723140478134155, |
|
"eval_runtime": 613.6678, |
|
"eval_samples_per_second": 64.25, |
|
"eval_steps_per_second": 8.032, |
|
"step": 4376 |
|
}, |
|
{ |
|
"epoch": 1.0283363802559415, |
|
"grad_norm": 7.749739170074463, |
|
"learning_rate": 8.727043881228902e-07, |
|
"loss": 0.161, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.1425959780621573, |
|
"grad_norm": 3.7298998832702637, |
|
"learning_rate": 8.584015103838892e-07, |
|
"loss": 0.148, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.2568555758683728, |
|
"grad_norm": 1.4705345630645752, |
|
"learning_rate": 8.440986326448881e-07, |
|
"loss": 0.1456, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.3711151736745886, |
|
"grad_norm": 3.6541574001312256, |
|
"learning_rate": 8.297957549058871e-07, |
|
"loss": 0.1427, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.4853747714808043, |
|
"grad_norm": 2.981785535812378, |
|
"learning_rate": 8.15492877166886e-07, |
|
"loss": 0.1376, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.59963436928702, |
|
"grad_norm": 5.706754684448242, |
|
"learning_rate": 8.011899994278849e-07, |
|
"loss": 0.1257, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.7138939670932358, |
|
"grad_norm": 4.674542427062988, |
|
"learning_rate": 7.868871216888838e-07, |
|
"loss": 0.1247, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.8281535648994516, |
|
"grad_norm": 10.191073417663574, |
|
"learning_rate": 7.725842439498827e-07, |
|
"loss": 0.124, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.9424131627056673, |
|
"grad_norm": 8.716536521911621, |
|
"learning_rate": 7.582813662108815e-07, |
|
"loss": 0.1196, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9366440093334686, |
|
"eval_loss": 0.19802308082580566, |
|
"eval_runtime": 414.2816, |
|
"eval_samples_per_second": 95.172, |
|
"eval_steps_per_second": 11.898, |
|
"step": 8752 |
|
}, |
|
{ |
|
"epoch": 2.056672760511883, |
|
"grad_norm": 0.8615439534187317, |
|
"learning_rate": 7.439784884718805e-07, |
|
"loss": 0.1161, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.170932358318099, |
|
"grad_norm": 4.869253158569336, |
|
"learning_rate": 7.296756107328794e-07, |
|
"loss": 0.1169, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.2851919561243146, |
|
"grad_norm": 4.5460333824157715, |
|
"learning_rate": 7.153727329938783e-07, |
|
"loss": 0.1095, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.3994515539305303, |
|
"grad_norm": 0.5965484976768494, |
|
"learning_rate": 7.010698552548773e-07, |
|
"loss": 0.1131, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.5137111517367456, |
|
"grad_norm": 2.817739725112915, |
|
"learning_rate": 6.867669775158762e-07, |
|
"loss": 0.1056, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.6279707495429614, |
|
"grad_norm": 2.231886625289917, |
|
"learning_rate": 6.724640997768751e-07, |
|
"loss": 0.1086, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.742230347349177, |
|
"grad_norm": 7.3367600440979, |
|
"learning_rate": 6.58161222037874e-07, |
|
"loss": 0.0982, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.856489945155393, |
|
"grad_norm": 1.7964383363723755, |
|
"learning_rate": 6.438583442988729e-07, |
|
"loss": 0.1018, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.9707495429616086, |
|
"grad_norm": 6.008702754974365, |
|
"learning_rate": 6.295554665598718e-07, |
|
"loss": 0.1044, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9433904839200569, |
|
"eval_loss": 0.17182812094688416, |
|
"eval_runtime": 413.3537, |
|
"eval_samples_per_second": 95.386, |
|
"eval_steps_per_second": 11.924, |
|
"step": 13128 |
|
}, |
|
{ |
|
"epoch": 3.0850091407678244, |
|
"grad_norm": 8.11001205444336, |
|
"learning_rate": 6.152525888208708e-07, |
|
"loss": 0.0962, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.19926873857404, |
|
"grad_norm": 3.732985019683838, |
|
"learning_rate": 6.009497110818696e-07, |
|
"loss": 0.0903, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.313528336380256, |
|
"grad_norm": 8.172978401184082, |
|
"learning_rate": 5.866468333428685e-07, |
|
"loss": 0.1025, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.4277879341864717, |
|
"grad_norm": 7.130813121795654, |
|
"learning_rate": 5.723439556038675e-07, |
|
"loss": 0.0934, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.5420475319926874, |
|
"grad_norm": 10.737488746643066, |
|
"learning_rate": 5.580410778648664e-07, |
|
"loss": 0.0937, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.656307129798903, |
|
"grad_norm": 5.984869480133057, |
|
"learning_rate": 5.437382001258653e-07, |
|
"loss": 0.0947, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.770566727605119, |
|
"grad_norm": 1.0398708581924438, |
|
"learning_rate": 5.294353223868642e-07, |
|
"loss": 0.0944, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.8848263254113347, |
|
"grad_norm": 4.8392510414123535, |
|
"learning_rate": 5.151324446478631e-07, |
|
"loss": 0.0908, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.9990859232175504, |
|
"grad_norm": 6.028433322906494, |
|
"learning_rate": 5.00829566908862e-07, |
|
"loss": 0.0897, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9511514659632748, |
|
"eval_loss": 0.1482020914554596, |
|
"eval_runtime": 408.5773, |
|
"eval_samples_per_second": 96.501, |
|
"eval_steps_per_second": 12.064, |
|
"step": 17504 |
|
}, |
|
{ |
|
"epoch": 4.113345521023766, |
|
"grad_norm": 1.668658971786499, |
|
"learning_rate": 4.86526689169861e-07, |
|
"loss": 0.0867, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.227605118829982, |
|
"grad_norm": 3.3406217098236084, |
|
"learning_rate": 4.7222381143085986e-07, |
|
"loss": 0.0933, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.341864716636198, |
|
"grad_norm": 6.337904930114746, |
|
"learning_rate": 4.579209336918588e-07, |
|
"loss": 0.0876, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.456124314442413, |
|
"grad_norm": 4.625348091125488, |
|
"learning_rate": 4.436180559528577e-07, |
|
"loss": 0.0857, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.570383912248629, |
|
"grad_norm": 0.618135929107666, |
|
"learning_rate": 4.293151782138566e-07, |
|
"loss": 0.0875, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.684643510054845, |
|
"grad_norm": 17.393951416015625, |
|
"learning_rate": 4.1501230047485555e-07, |
|
"loss": 0.0866, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.798903107861061, |
|
"grad_norm": 0.8322166800498962, |
|
"learning_rate": 4.0070942273585443e-07, |
|
"loss": 0.088, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.913162705667276, |
|
"grad_norm": 0.31342649459838867, |
|
"learning_rate": 3.864065449968533e-07, |
|
"loss": 0.0884, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9540681749010855, |
|
"eval_loss": 0.13915510475635529, |
|
"eval_runtime": 410.8966, |
|
"eval_samples_per_second": 95.956, |
|
"eval_steps_per_second": 11.996, |
|
"step": 21880 |
|
}, |
|
{ |
|
"epoch": 5.027422303473492, |
|
"grad_norm": 15.877787590026855, |
|
"learning_rate": 3.7210366725785225e-07, |
|
"loss": 0.0907, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.141681901279708, |
|
"grad_norm": 3.825509548187256, |
|
"learning_rate": 3.578007895188512e-07, |
|
"loss": 0.0784, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.255941499085923, |
|
"grad_norm": 8.778756141662598, |
|
"learning_rate": 3.434979117798501e-07, |
|
"loss": 0.0839, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.3702010968921385, |
|
"grad_norm": 3.6557865142822266, |
|
"learning_rate": 3.29195034040849e-07, |
|
"loss": 0.0834, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.484460694698354, |
|
"grad_norm": 4.403536319732666, |
|
"learning_rate": 3.148921563018479e-07, |
|
"loss": 0.0846, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.59872029250457, |
|
"grad_norm": 1.8743895292282104, |
|
"learning_rate": 3.005892785628468e-07, |
|
"loss": 0.0818, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.712979890310786, |
|
"grad_norm": 5.720378875732422, |
|
"learning_rate": 2.8628640082384576e-07, |
|
"loss": 0.0791, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.8272394881170015, |
|
"grad_norm": 9.525908470153809, |
|
"learning_rate": 2.719835230848447e-07, |
|
"loss": 0.0887, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.941499085923217, |
|
"grad_norm": 13.019225120544434, |
|
"learning_rate": 2.576806453458435e-07, |
|
"loss": 0.0785, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9561479151871767, |
|
"eval_loss": 0.13129685819149017, |
|
"eval_runtime": 380.134, |
|
"eval_samples_per_second": 103.721, |
|
"eval_steps_per_second": 12.966, |
|
"step": 26256 |
|
}, |
|
{ |
|
"epoch": 6.055758683729433, |
|
"grad_norm": 8.515515327453613, |
|
"learning_rate": 2.4337776760684246e-07, |
|
"loss": 0.0781, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.170018281535649, |
|
"grad_norm": 2.822601318359375, |
|
"learning_rate": 2.290748898678414e-07, |
|
"loss": 0.0792, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.2842778793418645, |
|
"grad_norm": 0.4864025115966797, |
|
"learning_rate": 2.147720121288403e-07, |
|
"loss": 0.0789, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.39853747714808, |
|
"grad_norm": 1.266471266746521, |
|
"learning_rate": 2.0046913438983924e-07, |
|
"loss": 0.085, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.512797074954296, |
|
"grad_norm": 0.17314019799232483, |
|
"learning_rate": 1.8616625665083812e-07, |
|
"loss": 0.0771, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 6.627056672760512, |
|
"grad_norm": 0.11477702111005783, |
|
"learning_rate": 1.7186337891183706e-07, |
|
"loss": 0.083, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 6.7413162705667276, |
|
"grad_norm": 3.228081703186035, |
|
"learning_rate": 1.5756050117283597e-07, |
|
"loss": 0.0815, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 6.855575868372943, |
|
"grad_norm": 8.033062934875488, |
|
"learning_rate": 1.4325762343383488e-07, |
|
"loss": 0.0769, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.969835466179159, |
|
"grad_norm": 3.3470427989959717, |
|
"learning_rate": 1.289547456948338e-07, |
|
"loss": 0.0827, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9570609718981434, |
|
"eval_loss": 0.13065218925476074, |
|
"eval_runtime": 375.1187, |
|
"eval_samples_per_second": 105.108, |
|
"eval_steps_per_second": 13.14, |
|
"step": 30632 |
|
}, |
|
{ |
|
"epoch": 7.084095063985375, |
|
"grad_norm": 3.7121076583862305, |
|
"learning_rate": 1.1465186795583271e-07, |
|
"loss": 0.0788, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 7.198354661791591, |
|
"grad_norm": 1.4164518117904663, |
|
"learning_rate": 1.0034899021683162e-07, |
|
"loss": 0.0844, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 7.312614259597806, |
|
"grad_norm": 0.26970207691192627, |
|
"learning_rate": 8.604611247783053e-08, |
|
"loss": 0.0795, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 7.426873857404022, |
|
"grad_norm": 2.8127174377441406, |
|
"learning_rate": 7.174323473882945e-08, |
|
"loss": 0.0846, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 7.541133455210238, |
|
"grad_norm": 11.107344627380371, |
|
"learning_rate": 5.744035699982836e-08, |
|
"loss": 0.0797, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 7.655393053016454, |
|
"grad_norm": 10.199967384338379, |
|
"learning_rate": 4.3137479260827274e-08, |
|
"loss": 0.0776, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 7.769652650822669, |
|
"grad_norm": 0.1155412346124649, |
|
"learning_rate": 2.883460152182619e-08, |
|
"loss": 0.0758, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 7.883912248628885, |
|
"grad_norm": 0.0839085578918457, |
|
"learning_rate": 1.4531723782825104e-08, |
|
"loss": 0.0762, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 7.998171846435101, |
|
"grad_norm": 7.536131858825684, |
|
"learning_rate": 2.2884604382401738e-10, |
|
"loss": 0.0767, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9575682256264584, |
|
"eval_loss": 0.1280793398618698, |
|
"eval_runtime": 395.9176, |
|
"eval_samples_per_second": 99.586, |
|
"eval_steps_per_second": 12.45, |
|
"step": 35008 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 35008, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.679226824017687e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|