scribbl-scan / trainer_state.json
ericvo's picture
Upload folder using huggingface_hub (#1)
3afb1bf
raw
history blame
31.1 kB
{
"best_metric": 0.16805602610111237,
"best_model_checkpoint": "./vit-handwritten-characters/checkpoint-8400",
"epoch": 2.952548330404218,
"global_step": 8400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 9.882835383714118e-05,
"loss": 2.6668,
"step": 100
},
{
"epoch": 0.04,
"eval_accuracy": 0.7636050763605077,
"eval_loss": 1.8537834882736206,
"eval_runtime": 53.1177,
"eval_samples_per_second": 87.523,
"eval_steps_per_second": 2.749,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 9.765670767428236e-05,
"loss": 1.3385,
"step": 200
},
{
"epoch": 0.07,
"eval_accuracy": 0.8698644869864487,
"eval_loss": 1.0306403636932373,
"eval_runtime": 50.6679,
"eval_samples_per_second": 91.754,
"eval_steps_per_second": 2.882,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 9.648506151142355e-05,
"loss": 0.8372,
"step": 300
},
{
"epoch": 0.11,
"eval_accuracy": 0.8657775865777586,
"eval_loss": 0.7356651425361633,
"eval_runtime": 51.2063,
"eval_samples_per_second": 90.79,
"eval_steps_per_second": 2.851,
"step": 300
},
{
"epoch": 0.14,
"learning_rate": 9.531341534856473e-05,
"loss": 0.5838,
"step": 400
},
{
"epoch": 0.14,
"eval_accuracy": 0.899118089911809,
"eval_loss": 0.5292066335678101,
"eval_runtime": 51.0683,
"eval_samples_per_second": 91.035,
"eval_steps_per_second": 2.859,
"step": 400
},
{
"epoch": 0.18,
"learning_rate": 9.414176918570592e-05,
"loss": 0.4572,
"step": 500
},
{
"epoch": 0.18,
"eval_accuracy": 0.901699290169929,
"eval_loss": 0.4536021947860718,
"eval_runtime": 51.5623,
"eval_samples_per_second": 90.163,
"eval_steps_per_second": 2.832,
"step": 500
},
{
"epoch": 0.21,
"learning_rate": 9.29701230228471e-05,
"loss": 0.3808,
"step": 600
},
{
"epoch": 0.21,
"eval_accuracy": 0.9141750914175092,
"eval_loss": 0.37226173281669617,
"eval_runtime": 51.6002,
"eval_samples_per_second": 90.097,
"eval_steps_per_second": 2.829,
"step": 600
},
{
"epoch": 0.25,
"learning_rate": 9.17984768599883e-05,
"loss": 0.393,
"step": 700
},
{
"epoch": 0.25,
"eval_accuracy": 0.899763389976339,
"eval_loss": 0.42711377143859863,
"eval_runtime": 53.3282,
"eval_samples_per_second": 87.177,
"eval_steps_per_second": 2.738,
"step": 700
},
{
"epoch": 0.28,
"learning_rate": 9.062683069712947e-05,
"loss": 0.3611,
"step": 800
},
{
"epoch": 0.28,
"eval_accuracy": 0.8793288879328888,
"eval_loss": 0.41316691040992737,
"eval_runtime": 52.5044,
"eval_samples_per_second": 88.545,
"eval_steps_per_second": 2.781,
"step": 800
},
{
"epoch": 0.32,
"learning_rate": 8.945518453427065e-05,
"loss": 0.3436,
"step": 900
},
{
"epoch": 0.32,
"eval_accuracy": 0.9068616906861691,
"eval_loss": 0.34593629837036133,
"eval_runtime": 52.4712,
"eval_samples_per_second": 88.601,
"eval_steps_per_second": 2.782,
"step": 900
},
{
"epoch": 0.35,
"learning_rate": 8.828353837141183e-05,
"loss": 0.3147,
"step": 1000
},
{
"epoch": 0.35,
"eval_accuracy": 0.9135297913529792,
"eval_loss": 0.32545194029808044,
"eval_runtime": 55.2872,
"eval_samples_per_second": 84.088,
"eval_steps_per_second": 2.641,
"step": 1000
},
{
"epoch": 0.39,
"learning_rate": 8.711189220855302e-05,
"loss": 0.2887,
"step": 1100
},
{
"epoch": 0.39,
"eval_accuracy": 0.9137448913744891,
"eval_loss": 0.3017228841781616,
"eval_runtime": 52.7469,
"eval_samples_per_second": 88.138,
"eval_steps_per_second": 2.768,
"step": 1100
},
{
"epoch": 0.42,
"learning_rate": 8.59402460456942e-05,
"loss": 0.2768,
"step": 1200
},
{
"epoch": 0.42,
"eval_accuracy": 0.9130995913099591,
"eval_loss": 0.3049895465373993,
"eval_runtime": 52.9848,
"eval_samples_per_second": 87.742,
"eval_steps_per_second": 2.756,
"step": 1200
},
{
"epoch": 0.46,
"learning_rate": 8.476859988283538e-05,
"loss": 0.3113,
"step": 1300
},
{
"epoch": 0.46,
"eval_accuracy": 0.9167562916756292,
"eval_loss": 0.2875942885875702,
"eval_runtime": 54.7094,
"eval_samples_per_second": 84.976,
"eval_steps_per_second": 2.669,
"step": 1300
},
{
"epoch": 0.49,
"learning_rate": 8.359695371997657e-05,
"loss": 0.2607,
"step": 1400
},
{
"epoch": 0.49,
"eval_accuracy": 0.9098730909873091,
"eval_loss": 0.33047187328338623,
"eval_runtime": 55.3261,
"eval_samples_per_second": 84.029,
"eval_steps_per_second": 2.639,
"step": 1400
},
{
"epoch": 0.53,
"learning_rate": 8.242530755711776e-05,
"loss": 0.288,
"step": 1500
},
{
"epoch": 0.53,
"eval_accuracy": 0.9098730909873091,
"eval_loss": 0.3238196074962616,
"eval_runtime": 53.2815,
"eval_samples_per_second": 87.254,
"eval_steps_per_second": 2.74,
"step": 1500
},
{
"epoch": 0.56,
"learning_rate": 8.125366139425894e-05,
"loss": 0.311,
"step": 1600
},
{
"epoch": 0.56,
"eval_accuracy": 0.9128844912884492,
"eval_loss": 0.2932077944278717,
"eval_runtime": 54.1056,
"eval_samples_per_second": 85.925,
"eval_steps_per_second": 2.698,
"step": 1600
},
{
"epoch": 0.6,
"learning_rate": 8.008201523140012e-05,
"loss": 0.2702,
"step": 1700
},
{
"epoch": 0.6,
"eval_accuracy": 0.9214884921488492,
"eval_loss": 0.281431645154953,
"eval_runtime": 55.0433,
"eval_samples_per_second": 84.461,
"eval_steps_per_second": 2.652,
"step": 1700
},
{
"epoch": 0.63,
"learning_rate": 7.89103690685413e-05,
"loss": 0.2476,
"step": 1800
},
{
"epoch": 0.63,
"eval_accuracy": 0.899118089911809,
"eval_loss": 0.29802632331848145,
"eval_runtime": 53.6155,
"eval_samples_per_second": 86.71,
"eval_steps_per_second": 2.723,
"step": 1800
},
{
"epoch": 0.67,
"learning_rate": 7.773872290568249e-05,
"loss": 0.2833,
"step": 1900
},
{
"epoch": 0.67,
"eval_accuracy": 0.9283716928371692,
"eval_loss": 0.2524644136428833,
"eval_runtime": 53.6383,
"eval_samples_per_second": 86.673,
"eval_steps_per_second": 2.722,
"step": 1900
},
{
"epoch": 0.7,
"learning_rate": 7.656707674282367e-05,
"loss": 0.2542,
"step": 2000
},
{
"epoch": 0.7,
"eval_accuracy": 0.9247149924714992,
"eval_loss": 0.2760772705078125,
"eval_runtime": 53.5886,
"eval_samples_per_second": 86.753,
"eval_steps_per_second": 2.724,
"step": 2000
},
{
"epoch": 0.74,
"learning_rate": 7.539543057996485e-05,
"loss": 0.2405,
"step": 2100
},
{
"epoch": 0.74,
"eval_accuracy": 0.9229941922994193,
"eval_loss": 0.2586613595485687,
"eval_runtime": 55.7532,
"eval_samples_per_second": 83.385,
"eval_steps_per_second": 2.619,
"step": 2100
},
{
"epoch": 0.77,
"learning_rate": 7.422378441710603e-05,
"loss": 0.2404,
"step": 2200
},
{
"epoch": 0.77,
"eval_accuracy": 0.9184770918477092,
"eval_loss": 0.25114524364471436,
"eval_runtime": 54.8709,
"eval_samples_per_second": 84.726,
"eval_steps_per_second": 2.661,
"step": 2200
},
{
"epoch": 0.81,
"learning_rate": 7.305213825424722e-05,
"loss": 0.2277,
"step": 2300
},
{
"epoch": 0.81,
"eval_accuracy": 0.9191223919122392,
"eval_loss": 0.2526193857192993,
"eval_runtime": 54.7258,
"eval_samples_per_second": 84.951,
"eval_steps_per_second": 2.668,
"step": 2300
},
{
"epoch": 0.84,
"learning_rate": 7.188049209138841e-05,
"loss": 0.2399,
"step": 2400
},
{
"epoch": 0.84,
"eval_accuracy": 0.9214884921488492,
"eval_loss": 0.2550754249095917,
"eval_runtime": 55.744,
"eval_samples_per_second": 83.399,
"eval_steps_per_second": 2.619,
"step": 2400
},
{
"epoch": 0.88,
"learning_rate": 7.070884592852959e-05,
"loss": 0.2336,
"step": 2500
},
{
"epoch": 0.88,
"eval_accuracy": 0.9219186921918692,
"eval_loss": 0.23437733948230743,
"eval_runtime": 53.9586,
"eval_samples_per_second": 86.159,
"eval_steps_per_second": 2.706,
"step": 2500
},
{
"epoch": 0.91,
"learning_rate": 6.953719976567077e-05,
"loss": 0.2246,
"step": 2600
},
{
"epoch": 0.91,
"eval_accuracy": 0.9197676919767692,
"eval_loss": 0.24808667600154877,
"eval_runtime": 53.4909,
"eval_samples_per_second": 86.912,
"eval_steps_per_second": 2.729,
"step": 2600
},
{
"epoch": 0.95,
"learning_rate": 6.836555360281195e-05,
"loss": 0.2284,
"step": 2700
},
{
"epoch": 0.95,
"eval_accuracy": 0.9322434932243493,
"eval_loss": 0.21941034495830536,
"eval_runtime": 53.0587,
"eval_samples_per_second": 87.62,
"eval_steps_per_second": 2.752,
"step": 2700
},
{
"epoch": 0.98,
"learning_rate": 6.719390743995314e-05,
"loss": 0.2335,
"step": 2800
},
{
"epoch": 0.98,
"eval_accuracy": 0.9331038933103893,
"eval_loss": 0.2168220579624176,
"eval_runtime": 56.1589,
"eval_samples_per_second": 82.783,
"eval_steps_per_second": 2.6,
"step": 2800
},
{
"epoch": 1.02,
"learning_rate": 6.602226127709432e-05,
"loss": 0.1883,
"step": 2900
},
{
"epoch": 1.02,
"eval_accuracy": 0.9229941922994193,
"eval_loss": 0.24746185541152954,
"eval_runtime": 54.8887,
"eval_samples_per_second": 84.699,
"eval_steps_per_second": 2.66,
"step": 2900
},
{
"epoch": 1.05,
"learning_rate": 6.48506151142355e-05,
"loss": 0.194,
"step": 3000
},
{
"epoch": 1.05,
"eval_accuracy": 0.9328887932888793,
"eval_loss": 0.2247306853532791,
"eval_runtime": 54.695,
"eval_samples_per_second": 84.999,
"eval_steps_per_second": 2.669,
"step": 3000
},
{
"epoch": 1.09,
"learning_rate": 6.367896895137668e-05,
"loss": 0.2097,
"step": 3100
},
{
"epoch": 1.09,
"eval_accuracy": 0.9272961927296193,
"eval_loss": 0.21885570883750916,
"eval_runtime": 55.7057,
"eval_samples_per_second": 83.456,
"eval_steps_per_second": 2.621,
"step": 3100
},
{
"epoch": 1.12,
"learning_rate": 6.250732278851787e-05,
"loss": 0.1663,
"step": 3200
},
{
"epoch": 1.12,
"eval_accuracy": 0.9380511938051194,
"eval_loss": 0.2038552463054657,
"eval_runtime": 57.0481,
"eval_samples_per_second": 81.493,
"eval_steps_per_second": 2.559,
"step": 3200
},
{
"epoch": 1.16,
"learning_rate": 6.133567662565906e-05,
"loss": 0.1847,
"step": 3300
},
{
"epoch": 1.16,
"eval_accuracy": 0.9266508926650893,
"eval_loss": 0.2347552329301834,
"eval_runtime": 56.4891,
"eval_samples_per_second": 82.299,
"eval_steps_per_second": 2.585,
"step": 3300
},
{
"epoch": 1.2,
"learning_rate": 6.016403046280023e-05,
"loss": 0.1809,
"step": 3400
},
{
"epoch": 1.2,
"eval_accuracy": 0.9236394923639493,
"eval_loss": 0.24315237998962402,
"eval_runtime": 54.6687,
"eval_samples_per_second": 85.04,
"eval_steps_per_second": 2.671,
"step": 3400
},
{
"epoch": 1.23,
"learning_rate": 5.899238429994142e-05,
"loss": 0.1787,
"step": 3500
},
{
"epoch": 1.23,
"eval_accuracy": 0.9339642933964294,
"eval_loss": 0.21847452223300934,
"eval_runtime": 54.0398,
"eval_samples_per_second": 86.029,
"eval_steps_per_second": 2.702,
"step": 3500
},
{
"epoch": 1.27,
"learning_rate": 5.782073813708261e-05,
"loss": 0.183,
"step": 3600
},
{
"epoch": 1.27,
"eval_accuracy": 0.9363303936330394,
"eval_loss": 0.20778249204158783,
"eval_runtime": 54.441,
"eval_samples_per_second": 85.395,
"eval_steps_per_second": 2.682,
"step": 3600
},
{
"epoch": 1.3,
"learning_rate": 5.664909197422379e-05,
"loss": 0.1704,
"step": 3700
},
{
"epoch": 1.3,
"eval_accuracy": 0.9152505915250592,
"eval_loss": 0.23343944549560547,
"eval_runtime": 56.1215,
"eval_samples_per_second": 82.838,
"eval_steps_per_second": 2.601,
"step": 3700
},
{
"epoch": 1.34,
"learning_rate": 5.547744581136497e-05,
"loss": 0.2002,
"step": 3800
},
{
"epoch": 1.34,
"eval_accuracy": 0.9212733921273392,
"eval_loss": 0.23881135880947113,
"eval_runtime": 54.8777,
"eval_samples_per_second": 84.716,
"eval_steps_per_second": 2.66,
"step": 3800
},
{
"epoch": 1.37,
"learning_rate": 5.430579964850615e-05,
"loss": 0.1787,
"step": 3900
},
{
"epoch": 1.37,
"eval_accuracy": 0.9260055926005593,
"eval_loss": 0.22407278418540955,
"eval_runtime": 54.5169,
"eval_samples_per_second": 85.276,
"eval_steps_per_second": 2.678,
"step": 3900
},
{
"epoch": 1.41,
"learning_rate": 5.3134153485647345e-05,
"loss": 0.1757,
"step": 4000
},
{
"epoch": 1.41,
"eval_accuracy": 0.9324585932458593,
"eval_loss": 0.2097584456205368,
"eval_runtime": 54.3298,
"eval_samples_per_second": 85.57,
"eval_steps_per_second": 2.687,
"step": 4000
},
{
"epoch": 1.44,
"learning_rate": 5.1962507322788524e-05,
"loss": 0.1667,
"step": 4100
},
{
"epoch": 1.44,
"eval_accuracy": 0.9348246934824693,
"eval_loss": 0.21198517084121704,
"eval_runtime": 53.7704,
"eval_samples_per_second": 86.46,
"eval_steps_per_second": 2.715,
"step": 4100
},
{
"epoch": 1.48,
"learning_rate": 5.07908611599297e-05,
"loss": 0.2078,
"step": 4200
},
{
"epoch": 1.48,
"eval_accuracy": 0.9348246934824693,
"eval_loss": 0.2187454104423523,
"eval_runtime": 54.7943,
"eval_samples_per_second": 84.845,
"eval_steps_per_second": 2.665,
"step": 4200
},
{
"epoch": 1.51,
"learning_rate": 4.961921499707089e-05,
"loss": 0.1775,
"step": 4300
},
{
"epoch": 1.51,
"eval_accuracy": 0.9294471929447193,
"eval_loss": 0.226194366812706,
"eval_runtime": 54.2742,
"eval_samples_per_second": 85.658,
"eval_steps_per_second": 2.69,
"step": 4300
},
{
"epoch": 1.55,
"learning_rate": 4.844756883421207e-05,
"loss": 0.1771,
"step": 4400
},
{
"epoch": 1.55,
"eval_accuracy": 0.9311679931167993,
"eval_loss": 0.2125757783651352,
"eval_runtime": 53.3871,
"eval_samples_per_second": 87.081,
"eval_steps_per_second": 2.735,
"step": 4400
},
{
"epoch": 1.58,
"learning_rate": 4.727592267135325e-05,
"loss": 0.158,
"step": 4500
},
{
"epoch": 1.58,
"eval_accuracy": 0.9343944934394494,
"eval_loss": 0.20714816451072693,
"eval_runtime": 53.7213,
"eval_samples_per_second": 86.539,
"eval_steps_per_second": 2.718,
"step": 4500
},
{
"epoch": 1.62,
"learning_rate": 4.610427650849444e-05,
"loss": 0.1792,
"step": 4600
},
{
"epoch": 1.62,
"eval_accuracy": 0.9275112927511293,
"eval_loss": 0.227621927857399,
"eval_runtime": 54.9746,
"eval_samples_per_second": 84.566,
"eval_steps_per_second": 2.656,
"step": 4600
},
{
"epoch": 1.65,
"learning_rate": 4.4932630345635615e-05,
"loss": 0.1523,
"step": 4700
},
{
"epoch": 1.65,
"eval_accuracy": 0.9268659926865993,
"eval_loss": 0.21835899353027344,
"eval_runtime": 55.0826,
"eval_samples_per_second": 84.4,
"eval_steps_per_second": 2.651,
"step": 4700
},
{
"epoch": 1.69,
"learning_rate": 4.37609841827768e-05,
"loss": 0.1739,
"step": 4800
},
{
"epoch": 1.69,
"eval_accuracy": 0.9322434932243493,
"eval_loss": 0.22359773516654968,
"eval_runtime": 53.2692,
"eval_samples_per_second": 87.274,
"eval_steps_per_second": 2.741,
"step": 4800
},
{
"epoch": 1.72,
"learning_rate": 4.2589338019917986e-05,
"loss": 0.1638,
"step": 4900
},
{
"epoch": 1.72,
"eval_accuracy": 0.9402021940202194,
"eval_loss": 0.18819227814674377,
"eval_runtime": 52.562,
"eval_samples_per_second": 88.448,
"eval_steps_per_second": 2.778,
"step": 4900
},
{
"epoch": 1.76,
"learning_rate": 4.141769185705917e-05,
"loss": 0.1653,
"step": 5000
},
{
"epoch": 1.76,
"eval_accuracy": 0.9309528930952893,
"eval_loss": 0.21739357709884644,
"eval_runtime": 54.6403,
"eval_samples_per_second": 85.084,
"eval_steps_per_second": 2.672,
"step": 5000
},
{
"epoch": 1.79,
"learning_rate": 4.024604569420035e-05,
"loss": 0.1584,
"step": 5100
},
{
"epoch": 1.79,
"eval_accuracy": 0.9384813938481393,
"eval_loss": 0.197902649641037,
"eval_runtime": 54.1444,
"eval_samples_per_second": 85.863,
"eval_steps_per_second": 2.696,
"step": 5100
},
{
"epoch": 1.83,
"learning_rate": 3.9074399531341535e-05,
"loss": 0.1621,
"step": 5200
},
{
"epoch": 1.83,
"eval_accuracy": 0.9371907937190793,
"eval_loss": 0.20193301141262054,
"eval_runtime": 52.6121,
"eval_samples_per_second": 88.364,
"eval_steps_per_second": 2.775,
"step": 5200
},
{
"epoch": 1.86,
"learning_rate": 3.790275336848272e-05,
"loss": 0.156,
"step": 5300
},
{
"epoch": 1.86,
"eval_accuracy": 0.9322434932243493,
"eval_loss": 0.20995856821537018,
"eval_runtime": 52.8452,
"eval_samples_per_second": 87.974,
"eval_steps_per_second": 2.763,
"step": 5300
},
{
"epoch": 1.9,
"learning_rate": 3.6731107205623906e-05,
"loss": 0.1819,
"step": 5400
},
{
"epoch": 1.9,
"eval_accuracy": 0.9384813938481393,
"eval_loss": 0.18913036584854126,
"eval_runtime": 53.1008,
"eval_samples_per_second": 87.55,
"eval_steps_per_second": 2.749,
"step": 5400
},
{
"epoch": 1.93,
"learning_rate": 3.5559461042765085e-05,
"loss": 0.1676,
"step": 5500
},
{
"epoch": 1.93,
"eval_accuracy": 0.9397719939771993,
"eval_loss": 0.18966327607631683,
"eval_runtime": 54.5409,
"eval_samples_per_second": 85.239,
"eval_steps_per_second": 2.677,
"step": 5500
},
{
"epoch": 1.97,
"learning_rate": 3.438781487990627e-05,
"loss": 0.1675,
"step": 5600
},
{
"epoch": 1.97,
"eval_accuracy": 0.9393417939341794,
"eval_loss": 0.1928705871105194,
"eval_runtime": 54.2748,
"eval_samples_per_second": 85.657,
"eval_steps_per_second": 2.69,
"step": 5600
},
{
"epoch": 2.0,
"learning_rate": 3.3216168717047456e-05,
"loss": 0.1389,
"step": 5700
},
{
"epoch": 2.0,
"eval_accuracy": 0.9389115938911594,
"eval_loss": 0.1925574094057083,
"eval_runtime": 53.5821,
"eval_samples_per_second": 86.764,
"eval_steps_per_second": 2.725,
"step": 5700
},
{
"epoch": 2.04,
"learning_rate": 3.204452255418864e-05,
"loss": 0.1217,
"step": 5800
},
{
"epoch": 2.04,
"eval_accuracy": 0.9324585932458593,
"eval_loss": 0.1982860118150711,
"eval_runtime": 53.4582,
"eval_samples_per_second": 86.965,
"eval_steps_per_second": 2.731,
"step": 5800
},
{
"epoch": 2.07,
"learning_rate": 3.087287639132982e-05,
"loss": 0.1158,
"step": 5900
},
{
"epoch": 2.07,
"eval_accuracy": 0.9391266939126693,
"eval_loss": 0.1853022575378418,
"eval_runtime": 52.3558,
"eval_samples_per_second": 88.796,
"eval_steps_per_second": 2.789,
"step": 5900
},
{
"epoch": 2.11,
"learning_rate": 2.9701230228471005e-05,
"loss": 0.1375,
"step": 6000
},
{
"epoch": 2.11,
"eval_accuracy": 0.9371907937190793,
"eval_loss": 0.19718600809574127,
"eval_runtime": 53.8918,
"eval_samples_per_second": 86.265,
"eval_steps_per_second": 2.709,
"step": 6000
},
{
"epoch": 2.14,
"learning_rate": 2.8541300527240778e-05,
"loss": 0.1195,
"step": 6100
},
{
"epoch": 2.14,
"eval_accuracy": 0.9384813938481393,
"eval_loss": 0.19445277750492096,
"eval_runtime": 53.8229,
"eval_samples_per_second": 86.376,
"eval_steps_per_second": 2.713,
"step": 6100
},
{
"epoch": 2.18,
"learning_rate": 2.7369654364381957e-05,
"loss": 0.1249,
"step": 6200
},
{
"epoch": 2.18,
"eval_accuracy": 0.9350397935039794,
"eval_loss": 0.19001764059066772,
"eval_runtime": 52.3182,
"eval_samples_per_second": 88.86,
"eval_steps_per_second": 2.791,
"step": 6200
},
{
"epoch": 2.21,
"learning_rate": 2.619800820152314e-05,
"loss": 0.1204,
"step": 6300
},
{
"epoch": 2.21,
"eval_accuracy": 0.9324585932458593,
"eval_loss": 0.20320840179920197,
"eval_runtime": 52.8746,
"eval_samples_per_second": 87.925,
"eval_steps_per_second": 2.761,
"step": 6300
},
{
"epoch": 2.25,
"learning_rate": 2.5026362038664324e-05,
"loss": 0.122,
"step": 6400
},
{
"epoch": 2.25,
"eval_accuracy": 0.9376209937620994,
"eval_loss": 0.17533761262893677,
"eval_runtime": 52.222,
"eval_samples_per_second": 89.024,
"eval_steps_per_second": 2.796,
"step": 6400
},
{
"epoch": 2.28,
"learning_rate": 2.385471587580551e-05,
"loss": 0.1136,
"step": 6500
},
{
"epoch": 2.28,
"eval_accuracy": 0.9406323940632394,
"eval_loss": 0.18234318494796753,
"eval_runtime": 53.2472,
"eval_samples_per_second": 87.31,
"eval_steps_per_second": 2.742,
"step": 6500
},
{
"epoch": 2.32,
"learning_rate": 2.268306971294669e-05,
"loss": 0.125,
"step": 6600
},
{
"epoch": 2.32,
"eval_accuracy": 0.9391266939126693,
"eval_loss": 0.1814049780368805,
"eval_runtime": 55.3063,
"eval_samples_per_second": 84.059,
"eval_steps_per_second": 2.64,
"step": 6600
},
{
"epoch": 2.36,
"learning_rate": 2.1511423550087877e-05,
"loss": 0.1142,
"step": 6700
},
{
"epoch": 2.36,
"eval_accuracy": 0.9402021940202194,
"eval_loss": 0.1924043744802475,
"eval_runtime": 53.3253,
"eval_samples_per_second": 87.182,
"eval_steps_per_second": 2.738,
"step": 6700
},
{
"epoch": 2.39,
"learning_rate": 2.033977738722906e-05,
"loss": 0.1258,
"step": 6800
},
{
"epoch": 2.39,
"eval_accuracy": 0.9421380942138095,
"eval_loss": 0.1804322451353073,
"eval_runtime": 52.0681,
"eval_samples_per_second": 89.287,
"eval_steps_per_second": 2.804,
"step": 6800
},
{
"epoch": 2.43,
"learning_rate": 1.916813122437024e-05,
"loss": 0.1178,
"step": 6900
},
{
"epoch": 2.43,
"eval_accuracy": 0.9374058937405894,
"eval_loss": 0.18380628526210785,
"eval_runtime": 52.0076,
"eval_samples_per_second": 89.391,
"eval_steps_per_second": 2.807,
"step": 6900
},
{
"epoch": 2.46,
"learning_rate": 1.7996485061511426e-05,
"loss": 0.1093,
"step": 7000
},
{
"epoch": 2.46,
"eval_accuracy": 0.9391266939126693,
"eval_loss": 0.17910005152225494,
"eval_runtime": 51.873,
"eval_samples_per_second": 89.623,
"eval_steps_per_second": 2.815,
"step": 7000
},
{
"epoch": 2.5,
"learning_rate": 1.6824838898652608e-05,
"loss": 0.1069,
"step": 7100
},
{
"epoch": 2.5,
"eval_accuracy": 0.9391266939126693,
"eval_loss": 0.18617549538612366,
"eval_runtime": 54.5914,
"eval_samples_per_second": 85.16,
"eval_steps_per_second": 2.674,
"step": 7100
},
{
"epoch": 2.53,
"learning_rate": 1.565319273579379e-05,
"loss": 0.1361,
"step": 7200
},
{
"epoch": 2.53,
"eval_accuracy": 0.9417078941707894,
"eval_loss": 0.17183491587638855,
"eval_runtime": 53.9807,
"eval_samples_per_second": 86.123,
"eval_steps_per_second": 2.705,
"step": 7200
},
{
"epoch": 2.57,
"learning_rate": 1.4481546572934974e-05,
"loss": 0.1167,
"step": 7300
},
{
"epoch": 2.57,
"eval_accuracy": 0.9380511938051194,
"eval_loss": 0.18089379370212555,
"eval_runtime": 52.2028,
"eval_samples_per_second": 89.056,
"eval_steps_per_second": 2.797,
"step": 7300
},
{
"epoch": 2.6,
"learning_rate": 1.3309900410076156e-05,
"loss": 0.127,
"step": 7400
},
{
"epoch": 2.6,
"eval_accuracy": 0.9445041944504194,
"eval_loss": 0.17267315089702606,
"eval_runtime": 51.7533,
"eval_samples_per_second": 89.83,
"eval_steps_per_second": 2.821,
"step": 7400
},
{
"epoch": 2.64,
"learning_rate": 1.2138254247217341e-05,
"loss": 0.1215,
"step": 7500
},
{
"epoch": 2.64,
"eval_accuracy": 0.9402021940202194,
"eval_loss": 0.18060338497161865,
"eval_runtime": 53.2058,
"eval_samples_per_second": 87.378,
"eval_steps_per_second": 2.744,
"step": 7500
},
{
"epoch": 2.67,
"learning_rate": 1.0966608084358525e-05,
"loss": 0.1162,
"step": 7600
},
{
"epoch": 2.67,
"eval_accuracy": 0.9404172940417294,
"eval_loss": 0.18343867361545563,
"eval_runtime": 56.0804,
"eval_samples_per_second": 82.899,
"eval_steps_per_second": 2.603,
"step": 7600
},
{
"epoch": 2.71,
"learning_rate": 9.794961921499708e-06,
"loss": 0.1182,
"step": 7700
},
{
"epoch": 2.71,
"eval_accuracy": 0.9406323940632394,
"eval_loss": 0.17546848952770233,
"eval_runtime": 53.4141,
"eval_samples_per_second": 87.037,
"eval_steps_per_second": 2.733,
"step": 7700
},
{
"epoch": 2.74,
"learning_rate": 8.62331575864089e-06,
"loss": 0.1179,
"step": 7800
},
{
"epoch": 2.74,
"eval_accuracy": 0.9410625941062594,
"eval_loss": 0.179013192653656,
"eval_runtime": 54.9709,
"eval_samples_per_second": 84.572,
"eval_steps_per_second": 2.656,
"step": 7800
},
{
"epoch": 2.78,
"learning_rate": 7.451669595782074e-06,
"loss": 0.1012,
"step": 7900
},
{
"epoch": 2.78,
"eval_accuracy": 0.9427833942783395,
"eval_loss": 0.17139938473701477,
"eval_runtime": 53.4602,
"eval_samples_per_second": 86.962,
"eval_steps_per_second": 2.731,
"step": 7900
},
{
"epoch": 2.81,
"learning_rate": 6.280023432923257e-06,
"loss": 0.1186,
"step": 8000
},
{
"epoch": 2.81,
"eval_accuracy": 0.9393417939341794,
"eval_loss": 0.17887155711650848,
"eval_runtime": 53.0073,
"eval_samples_per_second": 87.705,
"eval_steps_per_second": 2.754,
"step": 8000
},
{
"epoch": 2.85,
"learning_rate": 5.1083772700644406e-06,
"loss": 0.1305,
"step": 8100
},
{
"epoch": 2.85,
"eval_accuracy": 0.9408474940847494,
"eval_loss": 0.17335866391658783,
"eval_runtime": 52.8785,
"eval_samples_per_second": 87.919,
"eval_steps_per_second": 2.761,
"step": 8100
},
{
"epoch": 2.88,
"learning_rate": 3.936731107205624e-06,
"loss": 0.1126,
"step": 8200
},
{
"epoch": 2.88,
"eval_accuracy": 0.9397719939771993,
"eval_loss": 0.1753973364830017,
"eval_runtime": 51.7413,
"eval_samples_per_second": 89.851,
"eval_steps_per_second": 2.822,
"step": 8200
},
{
"epoch": 2.92,
"learning_rate": 2.776801405975396e-06,
"loss": 0.1128,
"step": 8300
},
{
"epoch": 2.92,
"eval_accuracy": 0.9445041944504194,
"eval_loss": 0.1688772737979889,
"eval_runtime": 51.7644,
"eval_samples_per_second": 89.811,
"eval_steps_per_second": 2.82,
"step": 8300
},
{
"epoch": 2.95,
"learning_rate": 1.605155243116579e-06,
"loss": 0.1108,
"step": 8400
},
{
"epoch": 2.95,
"eval_accuracy": 0.9442890944289094,
"eval_loss": 0.16805602610111237,
"eval_runtime": 53.4934,
"eval_samples_per_second": 86.908,
"eval_steps_per_second": 2.729,
"step": 8400
}
],
"max_steps": 8535,
"num_train_epochs": 3,
"total_flos": 2.083300191455087e+19,
"trial_name": null,
"trial_params": null
}