|
{ |
|
"best_metric": 0.8732149076976663, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-FER2013/checkpoint-2020", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.699901819229126, |
|
"learning_rate": 2.4752475247524753e-06, |
|
"loss": 0.7003, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40010973811149597, |
|
"learning_rate": 4.950495049504951e-06, |
|
"loss": 0.6978, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3860187530517578, |
|
"learning_rate": 7.4257425742574256e-06, |
|
"loss": 0.6891, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.38579830527305603, |
|
"learning_rate": 9.900990099009901e-06, |
|
"loss": 0.6837, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5694485306739807, |
|
"learning_rate": 1.2376237623762377e-05, |
|
"loss": 0.6643, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.5950794816017151, |
|
"learning_rate": 1.4851485148514851e-05, |
|
"loss": 0.642, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.7590372562408447, |
|
"learning_rate": 1.7326732673267325e-05, |
|
"loss": 0.6213, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.7452055215835571, |
|
"learning_rate": 1.9801980198019803e-05, |
|
"loss": 0.6079, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.1108556985855103, |
|
"learning_rate": 2.227722772277228e-05, |
|
"loss": 0.568, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.0815961360931396, |
|
"learning_rate": 2.4752475247524754e-05, |
|
"loss": 0.5555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.206498146057129, |
|
"learning_rate": 2.722772277227723e-05, |
|
"loss": 0.5587, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.8447802066802979, |
|
"learning_rate": 2.9702970297029702e-05, |
|
"loss": 0.5502, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.9129859805107117, |
|
"learning_rate": 3.217821782178218e-05, |
|
"loss": 0.5442, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.3158477544784546, |
|
"learning_rate": 3.465346534653465e-05, |
|
"loss": 0.509, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.6786607503890991, |
|
"learning_rate": 3.712871287128713e-05, |
|
"loss": 0.5129, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0716958045959473, |
|
"learning_rate": 3.9603960396039605e-05, |
|
"loss": 0.5043, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8416863679885864, |
|
"learning_rate": 4.207920792079208e-05, |
|
"loss": 0.4766, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.109766721725464, |
|
"learning_rate": 4.455445544554456e-05, |
|
"loss": 0.4746, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7481091022491455, |
|
"learning_rate": 4.702970297029703e-05, |
|
"loss": 0.5162, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.037062406539917, |
|
"learning_rate": 4.950495049504951e-05, |
|
"loss": 0.4811, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.800417972831766, |
|
"eval_loss": 0.43151769042015076, |
|
"eval_runtime": 15.3075, |
|
"eval_samples_per_second": 187.555, |
|
"eval_steps_per_second": 5.879, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.0516308546066284, |
|
"learning_rate": 4.977997799779978e-05, |
|
"loss": 0.4625, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0761890411376953, |
|
"learning_rate": 4.950495049504951e-05, |
|
"loss": 0.4659, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.9692820310592651, |
|
"learning_rate": 4.9229922992299234e-05, |
|
"loss": 0.4893, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.9031820893287659, |
|
"learning_rate": 4.895489548954896e-05, |
|
"loss": 0.4801, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.241443395614624, |
|
"learning_rate": 4.867986798679868e-05, |
|
"loss": 0.465, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.1679638624191284, |
|
"learning_rate": 4.8404840484048406e-05, |
|
"loss": 0.4396, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.4820034503936768, |
|
"learning_rate": 4.812981298129813e-05, |
|
"loss": 0.4573, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.0649698972702026, |
|
"learning_rate": 4.785478547854786e-05, |
|
"loss": 0.4616, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.2591346502304077, |
|
"learning_rate": 4.7579757975797585e-05, |
|
"loss": 0.4661, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.9309344291687012, |
|
"learning_rate": 4.730473047304731e-05, |
|
"loss": 0.4792, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.9727767109870911, |
|
"learning_rate": 4.702970297029703e-05, |
|
"loss": 0.471, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.0227768421173096, |
|
"learning_rate": 4.675467546754676e-05, |
|
"loss": 0.4779, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.7914155721664429, |
|
"learning_rate": 4.647964796479648e-05, |
|
"loss": 0.4552, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.1211057901382446, |
|
"learning_rate": 4.62046204620462e-05, |
|
"loss": 0.4542, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.8994653820991516, |
|
"learning_rate": 4.592959295929593e-05, |
|
"loss": 0.469, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.6254159212112427, |
|
"learning_rate": 4.5654565456545655e-05, |
|
"loss": 0.4209, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.175451397895813, |
|
"learning_rate": 4.537953795379538e-05, |
|
"loss": 0.4707, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.6570440530776978, |
|
"learning_rate": 4.510451045104511e-05, |
|
"loss": 0.4587, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.0916353464126587, |
|
"learning_rate": 4.4829482948294834e-05, |
|
"loss": 0.4507, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.0758109092712402, |
|
"learning_rate": 4.455445544554456e-05, |
|
"loss": 0.4287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8432601880877743, |
|
"eval_loss": 0.357921838760376, |
|
"eval_runtime": 15.3987, |
|
"eval_samples_per_second": 186.444, |
|
"eval_steps_per_second": 5.845, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.7765569686889648, |
|
"learning_rate": 4.427942794279428e-05, |
|
"loss": 0.4157, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 1.1473923921585083, |
|
"learning_rate": 4.4004400440044006e-05, |
|
"loss": 0.4227, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 7.452829837799072, |
|
"learning_rate": 4.372937293729373e-05, |
|
"loss": 0.4033, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 1.391101360321045, |
|
"learning_rate": 4.345434543454346e-05, |
|
"loss": 0.4563, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 1.0371557474136353, |
|
"learning_rate": 4.3179317931793185e-05, |
|
"loss": 0.4487, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.998115062713623, |
|
"learning_rate": 4.2904290429042904e-05, |
|
"loss": 0.4178, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 1.1916583776474, |
|
"learning_rate": 4.262926292629263e-05, |
|
"loss": 0.4361, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 1.2483221292495728, |
|
"learning_rate": 4.2354235423542356e-05, |
|
"loss": 0.4317, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.2603436708450317, |
|
"learning_rate": 4.207920792079208e-05, |
|
"loss": 0.4252, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 1.614512324333191, |
|
"learning_rate": 4.18041804180418e-05, |
|
"loss": 0.4213, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 1.5845041275024414, |
|
"learning_rate": 4.152915291529153e-05, |
|
"loss": 0.4276, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.9629144072532654, |
|
"learning_rate": 4.1254125412541255e-05, |
|
"loss": 0.4168, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 1.0476891994476318, |
|
"learning_rate": 4.097909790979098e-05, |
|
"loss": 0.4166, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.2000118494033813, |
|
"learning_rate": 4.070407040704071e-05, |
|
"loss": 0.4238, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 1.608628511428833, |
|
"learning_rate": 4.042904290429043e-05, |
|
"loss": 0.3974, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 2.0195207595825195, |
|
"learning_rate": 4.015401540154016e-05, |
|
"loss": 0.4087, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 1.2935571670532227, |
|
"learning_rate": 3.987898789878988e-05, |
|
"loss": 0.396, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 1.2056093215942383, |
|
"learning_rate": 3.9603960396039605e-05, |
|
"loss": 0.4191, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.9370687007904053, |
|
"learning_rate": 3.932893289328933e-05, |
|
"loss": 0.4323, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.9331934452056885, |
|
"learning_rate": 3.905390539053906e-05, |
|
"loss": 0.4184, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8467432950191571, |
|
"eval_loss": 0.35166114568710327, |
|
"eval_runtime": 15.3286, |
|
"eval_samples_per_second": 187.297, |
|
"eval_steps_per_second": 5.871, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 1.35804283618927, |
|
"learning_rate": 3.877887788778878e-05, |
|
"loss": 0.4034, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 1.5728434324264526, |
|
"learning_rate": 3.8503850385038503e-05, |
|
"loss": 0.3917, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 1.0493104457855225, |
|
"learning_rate": 3.822882288228823e-05, |
|
"loss": 0.416, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 1.2043938636779785, |
|
"learning_rate": 3.7953795379537956e-05, |
|
"loss": 0.3913, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 1.6263302564620972, |
|
"learning_rate": 3.767876787678768e-05, |
|
"loss": 0.3815, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 1.8161449432373047, |
|
"learning_rate": 3.74037403740374e-05, |
|
"loss": 0.3802, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 1.2450302839279175, |
|
"learning_rate": 3.712871287128713e-05, |
|
"loss": 0.387, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 1.1230440139770508, |
|
"learning_rate": 3.6853685368536854e-05, |
|
"loss": 0.4023, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 1.3349334001541138, |
|
"learning_rate": 3.657865786578658e-05, |
|
"loss": 0.4092, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 1.1348090171813965, |
|
"learning_rate": 3.6303630363036307e-05, |
|
"loss": 0.3724, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 1.1959893703460693, |
|
"learning_rate": 3.602860286028603e-05, |
|
"loss": 0.415, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 1.0938156843185425, |
|
"learning_rate": 3.575357535753576e-05, |
|
"loss": 0.3931, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 1.199876308441162, |
|
"learning_rate": 3.5478547854785485e-05, |
|
"loss": 0.3817, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 1.3452043533325195, |
|
"learning_rate": 3.5203520352035205e-05, |
|
"loss": 0.3897, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 1.3791470527648926, |
|
"learning_rate": 3.492849284928493e-05, |
|
"loss": 0.3828, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 1.7758994102478027, |
|
"learning_rate": 3.465346534653465e-05, |
|
"loss": 0.382, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 1.0924941301345825, |
|
"learning_rate": 3.4378437843784377e-05, |
|
"loss": 0.4048, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 1.1729379892349243, |
|
"learning_rate": 3.41034103410341e-05, |
|
"loss": 0.3716, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"grad_norm": 1.1679847240447998, |
|
"learning_rate": 3.382838283828383e-05, |
|
"loss": 0.3726, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 1.166401743888855, |
|
"learning_rate": 3.3553355335533555e-05, |
|
"loss": 0.3931, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.855451062347614, |
|
"eval_loss": 0.33083683252334595, |
|
"eval_runtime": 15.5188, |
|
"eval_samples_per_second": 185.001, |
|
"eval_steps_per_second": 5.799, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 1.225820541381836, |
|
"learning_rate": 3.327832783278328e-05, |
|
"loss": 0.3976, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 1.0681790113449097, |
|
"learning_rate": 3.300330033003301e-05, |
|
"loss": 0.3641, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 1.3162754774093628, |
|
"learning_rate": 3.272827282728273e-05, |
|
"loss": 0.3601, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 1.5509825944900513, |
|
"learning_rate": 3.2453245324532453e-05, |
|
"loss": 0.3777, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 1.4965535402297974, |
|
"learning_rate": 3.217821782178218e-05, |
|
"loss": 0.3662, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 1.846308946609497, |
|
"learning_rate": 3.1903190319031906e-05, |
|
"loss": 0.3755, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"grad_norm": 1.008911371231079, |
|
"learning_rate": 3.162816281628163e-05, |
|
"loss": 0.3283, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 1.7026519775390625, |
|
"learning_rate": 3.135313531353136e-05, |
|
"loss": 0.3782, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 1.3682829141616821, |
|
"learning_rate": 3.1078107810781085e-05, |
|
"loss": 0.3943, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"grad_norm": 1.2787247896194458, |
|
"learning_rate": 3.0803080308030804e-05, |
|
"loss": 0.3651, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 1.2230846881866455, |
|
"learning_rate": 3.052805280528053e-05, |
|
"loss": 0.3489, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 1.4894435405731201, |
|
"learning_rate": 3.0253025302530253e-05, |
|
"loss": 0.372, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 1.6204265356063843, |
|
"learning_rate": 2.9977997799779976e-05, |
|
"loss": 0.3637, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 1.4866546392440796, |
|
"learning_rate": 2.9702970297029702e-05, |
|
"loss": 0.3657, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 1.8092739582061768, |
|
"learning_rate": 2.942794279427943e-05, |
|
"loss": 0.375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 1.3936327695846558, |
|
"learning_rate": 2.9152915291529155e-05, |
|
"loss": 0.3737, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.3176568746566772, |
|
"learning_rate": 2.8877887788778878e-05, |
|
"loss": 0.3565, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"grad_norm": 1.4575417041778564, |
|
"learning_rate": 2.8602860286028604e-05, |
|
"loss": 0.3611, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 1.5857540369033813, |
|
"learning_rate": 2.832783278327833e-05, |
|
"loss": 0.3656, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 1.0636128187179565, |
|
"learning_rate": 2.8052805280528056e-05, |
|
"loss": 0.3396, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.4022269248962402, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3667, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8610240334378265, |
|
"eval_loss": 0.3203551471233368, |
|
"eval_runtime": 15.3956, |
|
"eval_samples_per_second": 186.482, |
|
"eval_steps_per_second": 5.846, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 1.3242789506912231, |
|
"learning_rate": 2.7502750275027505e-05, |
|
"loss": 0.3563, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 1.8484835624694824, |
|
"learning_rate": 2.722772277227723e-05, |
|
"loss": 0.3295, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"grad_norm": 1.2119253873825073, |
|
"learning_rate": 2.6952695269526958e-05, |
|
"loss": 0.3789, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 1.6589637994766235, |
|
"learning_rate": 2.667766776677668e-05, |
|
"loss": 0.3379, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"grad_norm": 1.5714747905731201, |
|
"learning_rate": 2.64026402640264e-05, |
|
"loss": 0.3675, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 1.3527103662490845, |
|
"learning_rate": 2.6127612761276126e-05, |
|
"loss": 0.3573, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"grad_norm": 1.055513620376587, |
|
"learning_rate": 2.5852585258525853e-05, |
|
"loss": 0.3478, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"grad_norm": 1.6614227294921875, |
|
"learning_rate": 2.557755775577558e-05, |
|
"loss": 0.3692, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 1.5745677947998047, |
|
"learning_rate": 2.53025302530253e-05, |
|
"loss": 0.331, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 1.4897499084472656, |
|
"learning_rate": 2.5027502750275028e-05, |
|
"loss": 0.352, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"grad_norm": 1.4070786237716675, |
|
"learning_rate": 2.4752475247524754e-05, |
|
"loss": 0.345, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"grad_norm": 2.2010486125946045, |
|
"learning_rate": 2.447744774477448e-05, |
|
"loss": 0.3213, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"grad_norm": 1.4076497554779053, |
|
"learning_rate": 2.4202420242024203e-05, |
|
"loss": 0.3414, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"grad_norm": 1.168713092803955, |
|
"learning_rate": 2.392739273927393e-05, |
|
"loss": 0.3521, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"grad_norm": 1.8131704330444336, |
|
"learning_rate": 2.3652365236523656e-05, |
|
"loss": 0.3576, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"grad_norm": 1.2432703971862793, |
|
"learning_rate": 2.337733773377338e-05, |
|
"loss": 0.3416, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 2.248685598373413, |
|
"learning_rate": 2.31023102310231e-05, |
|
"loss": 0.3396, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"grad_norm": 1.8308826684951782, |
|
"learning_rate": 2.2827282728272828e-05, |
|
"loss": 0.3466, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 2.5257883071899414, |
|
"learning_rate": 2.2552255225522554e-05, |
|
"loss": 0.3186, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 1.7196460962295532, |
|
"learning_rate": 2.227722772277228e-05, |
|
"loss": 0.3545, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8659003831417624, |
|
"eval_loss": 0.31437239050865173, |
|
"eval_runtime": 15.7136, |
|
"eval_samples_per_second": 182.707, |
|
"eval_steps_per_second": 5.728, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 1.643741250038147, |
|
"learning_rate": 2.2002200220022003e-05, |
|
"loss": 0.3356, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 1.5010825395584106, |
|
"learning_rate": 2.172717271727173e-05, |
|
"loss": 0.3288, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"grad_norm": 1.2441235780715942, |
|
"learning_rate": 2.1452145214521452e-05, |
|
"loss": 0.3395, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 1.351552128791809, |
|
"learning_rate": 2.1177117711771178e-05, |
|
"loss": 0.3011, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 1.3512260913848877, |
|
"learning_rate": 2.09020902090209e-05, |
|
"loss": 0.3416, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"grad_norm": 1.5813019275665283, |
|
"learning_rate": 2.0627062706270627e-05, |
|
"loss": 0.3523, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"grad_norm": 1.5249056816101074, |
|
"learning_rate": 2.0352035203520354e-05, |
|
"loss": 0.3288, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 1.9175376892089844, |
|
"learning_rate": 2.007700770077008e-05, |
|
"loss": 0.3527, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 1.5546538829803467, |
|
"learning_rate": 1.9801980198019803e-05, |
|
"loss": 0.3076, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"grad_norm": 1.5470608472824097, |
|
"learning_rate": 1.952695269526953e-05, |
|
"loss": 0.3303, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"grad_norm": 1.3036069869995117, |
|
"learning_rate": 1.9251925192519252e-05, |
|
"loss": 0.3527, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 1.524294137954712, |
|
"learning_rate": 1.8976897689768978e-05, |
|
"loss": 0.3243, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"grad_norm": 1.5366973876953125, |
|
"learning_rate": 1.87018701870187e-05, |
|
"loss": 0.3299, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 1.2275673151016235, |
|
"learning_rate": 1.8426842684268427e-05, |
|
"loss": 0.2928, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"grad_norm": 1.4205721616744995, |
|
"learning_rate": 1.8151815181518153e-05, |
|
"loss": 0.3215, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"grad_norm": 1.4984817504882812, |
|
"learning_rate": 1.787678767876788e-05, |
|
"loss": 0.3008, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"grad_norm": 1.6792049407958984, |
|
"learning_rate": 1.7601760176017602e-05, |
|
"loss": 0.3403, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 1.5373748540878296, |
|
"learning_rate": 1.7326732673267325e-05, |
|
"loss": 0.3353, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"grad_norm": 2.501495361328125, |
|
"learning_rate": 1.705170517051705e-05, |
|
"loss": 0.3219, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"grad_norm": 1.3856743574142456, |
|
"learning_rate": 1.6776677667766778e-05, |
|
"loss": 0.3137, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.864158829676071, |
|
"eval_loss": 0.3307534158229828, |
|
"eval_runtime": 15.9606, |
|
"eval_samples_per_second": 179.88, |
|
"eval_steps_per_second": 5.639, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.5321968793869019, |
|
"learning_rate": 1.6501650165016504e-05, |
|
"loss": 0.308, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 1.4582158327102661, |
|
"learning_rate": 1.6226622662266227e-05, |
|
"loss": 0.3168, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"grad_norm": 1.4425067901611328, |
|
"learning_rate": 1.5951595159515953e-05, |
|
"loss": 0.3213, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"grad_norm": 1.933104395866394, |
|
"learning_rate": 1.567656765676568e-05, |
|
"loss": 0.2978, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"grad_norm": 1.5549027919769287, |
|
"learning_rate": 1.5401540154015402e-05, |
|
"loss": 0.3281, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 1.4376338720321655, |
|
"learning_rate": 1.5126512651265127e-05, |
|
"loss": 0.2896, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"grad_norm": 1.414781093597412, |
|
"learning_rate": 1.4851485148514851e-05, |
|
"loss": 0.3135, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"grad_norm": 1.3260300159454346, |
|
"learning_rate": 1.4576457645764577e-05, |
|
"loss": 0.2991, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"grad_norm": 1.8632981777191162, |
|
"learning_rate": 1.4301430143014302e-05, |
|
"loss": 0.3135, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"grad_norm": 1.6400821208953857, |
|
"learning_rate": 1.4026402640264028e-05, |
|
"loss": 0.2882, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"grad_norm": 1.8777371644973755, |
|
"learning_rate": 1.3751375137513753e-05, |
|
"loss": 0.3539, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"grad_norm": 1.431818962097168, |
|
"learning_rate": 1.3476347634763479e-05, |
|
"loss": 0.3065, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"grad_norm": 2.002371311187744, |
|
"learning_rate": 1.32013201320132e-05, |
|
"loss": 0.3362, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 1.3172950744628906, |
|
"learning_rate": 1.2926292629262926e-05, |
|
"loss": 0.3062, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 2.155853271484375, |
|
"learning_rate": 1.265126512651265e-05, |
|
"loss": 0.3151, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"grad_norm": 1.8984310626983643, |
|
"learning_rate": 1.2376237623762377e-05, |
|
"loss": 0.3488, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"grad_norm": 1.3027973175048828, |
|
"learning_rate": 1.2101210121012102e-05, |
|
"loss": 0.3197, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"grad_norm": 1.9755516052246094, |
|
"learning_rate": 1.1826182618261828e-05, |
|
"loss": 0.3287, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 1.6631439924240112, |
|
"learning_rate": 1.155115511551155e-05, |
|
"loss": 0.303, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"grad_norm": 1.8642737865447998, |
|
"learning_rate": 1.1276127612761277e-05, |
|
"loss": 0.3178, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8645071403692093, |
|
"eval_loss": 0.32295528054237366, |
|
"eval_runtime": 15.3242, |
|
"eval_samples_per_second": 187.351, |
|
"eval_steps_per_second": 5.873, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 1.4315767288208008, |
|
"learning_rate": 1.1001100110011001e-05, |
|
"loss": 0.3078, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 1.6558310985565186, |
|
"learning_rate": 1.0726072607260726e-05, |
|
"loss": 0.2853, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 1.906076431274414, |
|
"learning_rate": 1.045104510451045e-05, |
|
"loss": 0.2814, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"grad_norm": 2.086242914199829, |
|
"learning_rate": 1.0176017601760177e-05, |
|
"loss": 0.2847, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 1.793742299079895, |
|
"learning_rate": 9.900990099009901e-06, |
|
"loss": 0.2945, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"grad_norm": 1.7942755222320557, |
|
"learning_rate": 9.625962596259626e-06, |
|
"loss": 0.296, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"grad_norm": 1.7433489561080933, |
|
"learning_rate": 9.35093509350935e-06, |
|
"loss": 0.2885, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"grad_norm": 1.6308104991912842, |
|
"learning_rate": 9.075907590759077e-06, |
|
"loss": 0.3134, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"grad_norm": 1.6005682945251465, |
|
"learning_rate": 8.800880088008801e-06, |
|
"loss": 0.3206, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"grad_norm": 1.563693642616272, |
|
"learning_rate": 8.525852585258526e-06, |
|
"loss": 0.3112, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"grad_norm": 1.796925663948059, |
|
"learning_rate": 8.250825082508252e-06, |
|
"loss": 0.3076, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"grad_norm": 1.5874933004379272, |
|
"learning_rate": 7.975797579757976e-06, |
|
"loss": 0.3197, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"grad_norm": 1.5310964584350586, |
|
"learning_rate": 7.700770077007701e-06, |
|
"loss": 0.3026, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"grad_norm": 1.9441354274749756, |
|
"learning_rate": 7.4257425742574256e-06, |
|
"loss": 0.3051, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"grad_norm": 1.8038005828857422, |
|
"learning_rate": 7.150715071507151e-06, |
|
"loss": 0.3107, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"grad_norm": 4.733364582061768, |
|
"learning_rate": 6.875687568756876e-06, |
|
"loss": 0.2939, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"grad_norm": 1.5854134559631348, |
|
"learning_rate": 6.6006600660066e-06, |
|
"loss": 0.3049, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 1.7545024156570435, |
|
"learning_rate": 6.325632563256325e-06, |
|
"loss": 0.2936, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"grad_norm": 1.6307064294815063, |
|
"learning_rate": 6.050605060506051e-06, |
|
"loss": 0.2855, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"grad_norm": 1.6208785772323608, |
|
"learning_rate": 5.775577557755775e-06, |
|
"loss": 0.2998, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8707767328456983, |
|
"eval_loss": 0.32057538628578186, |
|
"eval_runtime": 15.4893, |
|
"eval_samples_per_second": 185.354, |
|
"eval_steps_per_second": 5.81, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 1.7324903011322021, |
|
"learning_rate": 5.500550055005501e-06, |
|
"loss": 0.2916, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 1.4792495965957642, |
|
"learning_rate": 5.225522552255225e-06, |
|
"loss": 0.2771, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"grad_norm": 2.207402229309082, |
|
"learning_rate": 4.950495049504951e-06, |
|
"loss": 0.2859, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"grad_norm": 1.9077599048614502, |
|
"learning_rate": 4.675467546754675e-06, |
|
"loss": 0.2833, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"grad_norm": 1.7705243825912476, |
|
"learning_rate": 4.400440044004401e-06, |
|
"loss": 0.3007, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"grad_norm": 1.74075448513031, |
|
"learning_rate": 4.125412541254126e-06, |
|
"loss": 0.2912, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"grad_norm": 1.8058630228042603, |
|
"learning_rate": 3.8503850385038505e-06, |
|
"loss": 0.297, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"grad_norm": 1.7161786556243896, |
|
"learning_rate": 3.5753575357535755e-06, |
|
"loss": 0.2789, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 2.8628289699554443, |
|
"learning_rate": 3.3003300330033e-06, |
|
"loss": 0.3018, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"grad_norm": 1.9037891626358032, |
|
"learning_rate": 3.0253025302530254e-06, |
|
"loss": 0.3043, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 1.5443955659866333, |
|
"learning_rate": 2.7502750275027504e-06, |
|
"loss": 0.2813, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"grad_norm": 1.6395975351333618, |
|
"learning_rate": 2.4752475247524753e-06, |
|
"loss": 0.2942, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 1.4861618280410767, |
|
"learning_rate": 2.2002200220022003e-06, |
|
"loss": 0.2769, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"grad_norm": 1.9940297603607178, |
|
"learning_rate": 1.9251925192519253e-06, |
|
"loss": 0.2833, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"grad_norm": 1.800776720046997, |
|
"learning_rate": 1.65016501650165e-06, |
|
"loss": 0.2948, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"grad_norm": 1.4160261154174805, |
|
"learning_rate": 1.3751375137513752e-06, |
|
"loss": 0.2932, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"grad_norm": 1.8922903537750244, |
|
"learning_rate": 1.1001100110011001e-06, |
|
"loss": 0.3189, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"grad_norm": 1.754544734954834, |
|
"learning_rate": 8.25082508250825e-07, |
|
"loss": 0.2907, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"grad_norm": 1.8808295726776123, |
|
"learning_rate": 5.500550055005501e-07, |
|
"loss": 0.2763, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 2.0124707221984863, |
|
"learning_rate": 2.7502750275027504e-07, |
|
"loss": 0.2611, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.8302078247070312, |
|
"learning_rate": 0.0, |
|
"loss": 0.2773, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8732149076976663, |
|
"eval_loss": 0.3263641893863678, |
|
"eval_runtime": 15.9034, |
|
"eval_samples_per_second": 180.527, |
|
"eval_steps_per_second": 5.659, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2020, |
|
"total_flos": 2.0021605356722135e+19, |
|
"train_loss": 0.37925267927717454, |
|
"train_runtime": 3266.1275, |
|
"train_samples_per_second": 79.106, |
|
"train_steps_per_second": 0.618 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.0021605356722135e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|