|
{ |
|
"best_metric": 0.8701298701298701, |
|
"best_model_checkpoint": "MAE-CT-M1N0-M12_v8_split5_v3/checkpoint-4760", |
|
"epoch": 147.00579710144928, |
|
"eval_steps": 500, |
|
"global_step": 10350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000966183574879227, |
|
"grad_norm": 5.63077974319458, |
|
"learning_rate": 9.661835748792271e-08, |
|
"loss": 0.7586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001932367149758454, |
|
"grad_norm": 2.0964934825897217, |
|
"learning_rate": 1.9323671497584542e-07, |
|
"loss": 0.7254, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002898550724637681, |
|
"grad_norm": 2.0449180603027344, |
|
"learning_rate": 2.8985507246376816e-07, |
|
"loss": 0.6984, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003864734299516908, |
|
"grad_norm": 1.9611525535583496, |
|
"learning_rate": 3.8647342995169085e-07, |
|
"loss": 0.7315, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004830917874396135, |
|
"grad_norm": 3.5923993587493896, |
|
"learning_rate": 4.830917874396135e-07, |
|
"loss": 0.7284, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005797101449275362, |
|
"grad_norm": 2.7955286502838135, |
|
"learning_rate": 5.797101449275363e-07, |
|
"loss": 0.6907, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00676328502415459, |
|
"grad_norm": 4.491306304931641, |
|
"learning_rate": 6.763285024154589e-07, |
|
"loss": 0.685, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.00676328502415459, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 0.6756892800331116, |
|
"eval_runtime": 18.2897, |
|
"eval_samples_per_second": 4.21, |
|
"eval_steps_per_second": 1.094, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0009661835748793, |
|
"grad_norm": 3.633984088897705, |
|
"learning_rate": 7.729468599033817e-07, |
|
"loss": 0.6743, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0019323671497584, |
|
"grad_norm": 4.844719409942627, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.65, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0028985507246377, |
|
"grad_norm": 20.677204132080078, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.638, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0038647342995168, |
|
"grad_norm": 4.349536895751953, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.6479, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0048309178743962, |
|
"grad_norm": 8.440875053405762, |
|
"learning_rate": 1.1594202898550726e-06, |
|
"loss": 0.5937, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0057971014492753, |
|
"grad_norm": 9.938006401062012, |
|
"learning_rate": 1.2560386473429952e-06, |
|
"loss": 0.7384, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0067632850241546, |
|
"grad_norm": 10.255725860595703, |
|
"learning_rate": 1.3526570048309178e-06, |
|
"loss": 0.5601, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0067632850241546, |
|
"eval_accuracy": 0.6233766233766234, |
|
"eval_loss": 0.6217549443244934, |
|
"eval_runtime": 16.3642, |
|
"eval_samples_per_second": 4.705, |
|
"eval_steps_per_second": 1.222, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0009661835748793, |
|
"grad_norm": 7.985101699829102, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"loss": 0.7056, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0019323671497586, |
|
"grad_norm": 7.1389007568359375, |
|
"learning_rate": 1.5458937198067634e-06, |
|
"loss": 0.5766, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0028985507246375, |
|
"grad_norm": 12.669121742248535, |
|
"learning_rate": 1.6425120772946862e-06, |
|
"loss": 0.6746, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.003864734299517, |
|
"grad_norm": 6.364377498626709, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.6363, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.004830917874396, |
|
"grad_norm": 9.995187759399414, |
|
"learning_rate": 1.8357487922705318e-06, |
|
"loss": 0.5837, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0057971014492755, |
|
"grad_norm": 8.345070838928223, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.5432, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.006763285024155, |
|
"grad_norm": 37.9766845703125, |
|
"learning_rate": 2.028985507246377e-06, |
|
"loss": 0.6632, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.006763285024155, |
|
"eval_accuracy": 0.6233766233766234, |
|
"eval_loss": 0.6156818866729736, |
|
"eval_runtime": 16.4154, |
|
"eval_samples_per_second": 4.691, |
|
"eval_steps_per_second": 1.218, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0009661835748793, |
|
"grad_norm": 12.400394439697266, |
|
"learning_rate": 2.1256038647342997e-06, |
|
"loss": 0.6558, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0019323671497586, |
|
"grad_norm": 10.40713119506836, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.5752, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0028985507246375, |
|
"grad_norm": 12.890250205993652, |
|
"learning_rate": 2.3188405797101453e-06, |
|
"loss": 0.528, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.003864734299517, |
|
"grad_norm": 10.023517608642578, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.5814, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.004830917874396, |
|
"grad_norm": 10.220820426940918, |
|
"learning_rate": 2.5120772946859904e-06, |
|
"loss": 0.6507, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.0057971014492755, |
|
"grad_norm": 19.018766403198242, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.5541, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.006763285024155, |
|
"grad_norm": 7.0940680503845215, |
|
"learning_rate": 2.7053140096618356e-06, |
|
"loss": 0.5153, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.006763285024155, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 0.565974771976471, |
|
"eval_runtime": 16.5709, |
|
"eval_samples_per_second": 4.647, |
|
"eval_steps_per_second": 1.207, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.000966183574879, |
|
"grad_norm": 13.853311538696289, |
|
"learning_rate": 2.801932367149759e-06, |
|
"loss": 0.5608, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.001932367149759, |
|
"grad_norm": 24.638504028320312, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.5793, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0028985507246375, |
|
"grad_norm": 22.3065128326416, |
|
"learning_rate": 2.995169082125604e-06, |
|
"loss": 0.4284, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.003864734299517, |
|
"grad_norm": 30.956016540527344, |
|
"learning_rate": 3.0917874396135268e-06, |
|
"loss": 0.7425, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.004830917874396, |
|
"grad_norm": 12.773093223571777, |
|
"learning_rate": 3.188405797101449e-06, |
|
"loss": 0.5068, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.005797101449275, |
|
"grad_norm": 21.808074951171875, |
|
"learning_rate": 3.2850241545893724e-06, |
|
"loss": 0.4752, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.006763285024155, |
|
"grad_norm": 24.596450805664062, |
|
"learning_rate": 3.381642512077295e-06, |
|
"loss": 0.5008, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.006763285024155, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 0.5237749814987183, |
|
"eval_runtime": 16.9105, |
|
"eval_samples_per_second": 4.553, |
|
"eval_steps_per_second": 1.183, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.000966183574879, |
|
"grad_norm": 41.83505630493164, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.415, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.001932367149759, |
|
"grad_norm": 37.28093719482422, |
|
"learning_rate": 3.5748792270531403e-06, |
|
"loss": 0.5244, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.0028985507246375, |
|
"grad_norm": 14.932278633117676, |
|
"learning_rate": 3.6714975845410635e-06, |
|
"loss": 0.4871, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.003864734299517, |
|
"grad_norm": 35.8604736328125, |
|
"learning_rate": 3.768115942028986e-06, |
|
"loss": 0.5247, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.004830917874396, |
|
"grad_norm": 31.0769100189209, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.4985, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.005797101449275, |
|
"grad_norm": 16.91299057006836, |
|
"learning_rate": 3.961352657004831e-06, |
|
"loss": 0.5541, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.006763285024155, |
|
"grad_norm": 60.42302703857422, |
|
"learning_rate": 4.057971014492754e-06, |
|
"loss": 0.4879, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.006763285024155, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 0.5012220144271851, |
|
"eval_runtime": 16.1267, |
|
"eval_samples_per_second": 4.775, |
|
"eval_steps_per_second": 1.24, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.000966183574879, |
|
"grad_norm": 12.599077224731445, |
|
"learning_rate": 4.154589371980677e-06, |
|
"loss": 0.4077, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.001932367149759, |
|
"grad_norm": 13.679814338684082, |
|
"learning_rate": 4.251207729468599e-06, |
|
"loss": 0.4849, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.0028985507246375, |
|
"grad_norm": 58.93899917602539, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.4561, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.003864734299517, |
|
"grad_norm": 46.164459228515625, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.4785, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.004830917874396, |
|
"grad_norm": 6.964693546295166, |
|
"learning_rate": 4.541062801932368e-06, |
|
"loss": 0.3693, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.005797101449275, |
|
"grad_norm": 65.39038848876953, |
|
"learning_rate": 4.637681159420291e-06, |
|
"loss": 0.6067, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.006763285024155, |
|
"grad_norm": 24.236276626586914, |
|
"learning_rate": 4.7342995169082125e-06, |
|
"loss": 0.3636, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.006763285024155, |
|
"eval_accuracy": 0.7012987012987013, |
|
"eval_loss": 0.5639926195144653, |
|
"eval_runtime": 16.4373, |
|
"eval_samples_per_second": 4.684, |
|
"eval_steps_per_second": 1.217, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.000966183574879, |
|
"grad_norm": 71.80524444580078, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.5243, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.001932367149759, |
|
"grad_norm": 60.86780548095703, |
|
"learning_rate": 4.927536231884059e-06, |
|
"loss": 0.4487, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.0028985507246375, |
|
"grad_norm": 18.15448760986328, |
|
"learning_rate": 5.024154589371981e-06, |
|
"loss": 0.4776, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.003864734299517, |
|
"grad_norm": 25.025348663330078, |
|
"learning_rate": 5.1207729468599045e-06, |
|
"loss": 0.3617, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.004830917874396, |
|
"grad_norm": 8.851715087890625, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 0.3614, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.005797101449275, |
|
"grad_norm": 72.60469818115234, |
|
"learning_rate": 5.314009661835749e-06, |
|
"loss": 0.7049, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.006763285024155, |
|
"grad_norm": 8.072798728942871, |
|
"learning_rate": 5.410628019323671e-06, |
|
"loss": 0.7238, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.006763285024155, |
|
"eval_accuracy": 0.7012987012987013, |
|
"eval_loss": 0.575639545917511, |
|
"eval_runtime": 16.2503, |
|
"eval_samples_per_second": 4.738, |
|
"eval_steps_per_second": 1.231, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.00096618357488, |
|
"grad_norm": 26.914819717407227, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.4224, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.001932367149758, |
|
"grad_norm": 12.590118408203125, |
|
"learning_rate": 5.603864734299518e-06, |
|
"loss": 1.1756, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.002898550724638, |
|
"grad_norm": 31.019224166870117, |
|
"learning_rate": 5.70048309178744e-06, |
|
"loss": 0.5193, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.003864734299517, |
|
"grad_norm": 25.586570739746094, |
|
"learning_rate": 5.797101449275363e-06, |
|
"loss": 0.5632, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.004830917874395, |
|
"grad_norm": 7.837695121765137, |
|
"learning_rate": 5.893719806763285e-06, |
|
"loss": 0.2409, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.005797101449275, |
|
"grad_norm": 45.919281005859375, |
|
"learning_rate": 5.990338164251208e-06, |
|
"loss": 0.4052, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.006763285024155, |
|
"grad_norm": 9.32940673828125, |
|
"learning_rate": 6.086956521739132e-06, |
|
"loss": 0.3339, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.006763285024155, |
|
"eval_accuracy": 0.6883116883116883, |
|
"eval_loss": 0.9894827604293823, |
|
"eval_runtime": 15.6835, |
|
"eval_samples_per_second": 4.91, |
|
"eval_steps_per_second": 1.275, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.00096618357488, |
|
"grad_norm": 4.154600620269775, |
|
"learning_rate": 6.1835748792270535e-06, |
|
"loss": 0.4827, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.001932367149758, |
|
"grad_norm": 43.86359786987305, |
|
"learning_rate": 6.280193236714976e-06, |
|
"loss": 0.4219, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.002898550724638, |
|
"grad_norm": 87.51858520507812, |
|
"learning_rate": 6.376811594202898e-06, |
|
"loss": 0.2489, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.003864734299517, |
|
"grad_norm": 11.23896598815918, |
|
"learning_rate": 6.473429951690822e-06, |
|
"loss": 0.5007, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.004830917874395, |
|
"grad_norm": 27.375701904296875, |
|
"learning_rate": 6.570048309178745e-06, |
|
"loss": 0.2891, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.005797101449275, |
|
"grad_norm": 51.312923431396484, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.4243, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.006763285024155, |
|
"grad_norm": 29.63850975036621, |
|
"learning_rate": 6.76328502415459e-06, |
|
"loss": 0.4152, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.006763285024155, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.5031455159187317, |
|
"eval_runtime": 15.7949, |
|
"eval_samples_per_second": 4.875, |
|
"eval_steps_per_second": 1.266, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.00096618357488, |
|
"grad_norm": 7.607165813446045, |
|
"learning_rate": 6.859903381642513e-06, |
|
"loss": 0.5239, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.001932367149758, |
|
"grad_norm": 6.940621852874756, |
|
"learning_rate": 6.956521739130435e-06, |
|
"loss": 0.2281, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.002898550724638, |
|
"grad_norm": 57.28858947753906, |
|
"learning_rate": 7.053140096618359e-06, |
|
"loss": 0.4428, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.003864734299517, |
|
"grad_norm": 90.76024627685547, |
|
"learning_rate": 7.149758454106281e-06, |
|
"loss": 0.6327, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.004830917874395, |
|
"grad_norm": 59.252777099609375, |
|
"learning_rate": 7.246376811594203e-06, |
|
"loss": 0.7428, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.005797101449275, |
|
"grad_norm": 24.238967895507812, |
|
"learning_rate": 7.342995169082127e-06, |
|
"loss": 0.4681, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.006763285024155, |
|
"grad_norm": 0.46667662262916565, |
|
"learning_rate": 7.439613526570049e-06, |
|
"loss": 0.3126, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 10.006763285024155, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.5349549055099487, |
|
"eval_runtime": 15.9144, |
|
"eval_samples_per_second": 4.838, |
|
"eval_steps_per_second": 1.257, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.00096618357488, |
|
"grad_norm": 3.171098470687866, |
|
"learning_rate": 7.536231884057972e-06, |
|
"loss": 0.3612, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.001932367149758, |
|
"grad_norm": 51.79991912841797, |
|
"learning_rate": 7.632850241545895e-06, |
|
"loss": 0.2405, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.002898550724638, |
|
"grad_norm": 15.745019912719727, |
|
"learning_rate": 7.729468599033817e-06, |
|
"loss": 0.3578, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.003864734299517, |
|
"grad_norm": 46.960411071777344, |
|
"learning_rate": 7.82608695652174e-06, |
|
"loss": 0.4183, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.004830917874395, |
|
"grad_norm": 19.980567932128906, |
|
"learning_rate": 7.922705314009662e-06, |
|
"loss": 1.0693, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.005797101449275, |
|
"grad_norm": 7.018507480621338, |
|
"learning_rate": 8.019323671497586e-06, |
|
"loss": 0.4977, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 11.006763285024155, |
|
"grad_norm": 42.957664489746094, |
|
"learning_rate": 8.115942028985508e-06, |
|
"loss": 0.4479, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 11.006763285024155, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 0.42781147360801697, |
|
"eval_runtime": 17.5652, |
|
"eval_samples_per_second": 4.384, |
|
"eval_steps_per_second": 1.139, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.00096618357488, |
|
"grad_norm": 30.186668395996094, |
|
"learning_rate": 8.212560386473431e-06, |
|
"loss": 0.3227, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.001932367149758, |
|
"grad_norm": 95.9303207397461, |
|
"learning_rate": 8.309178743961353e-06, |
|
"loss": 0.3342, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.002898550724638, |
|
"grad_norm": 72.12458038330078, |
|
"learning_rate": 8.405797101449275e-06, |
|
"loss": 0.3496, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.003864734299517, |
|
"grad_norm": 12.492197036743164, |
|
"learning_rate": 8.502415458937199e-06, |
|
"loss": 0.2547, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.004830917874395, |
|
"grad_norm": 31.763296127319336, |
|
"learning_rate": 8.599033816425122e-06, |
|
"loss": 0.4288, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 12.005797101449275, |
|
"grad_norm": 46.830039978027344, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.4244, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.006763285024155, |
|
"grad_norm": 66.69847869873047, |
|
"learning_rate": 8.792270531400966e-06, |
|
"loss": 0.5548, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 12.006763285024155, |
|
"eval_accuracy": 0.7012987012987013, |
|
"eval_loss": 0.6865193843841553, |
|
"eval_runtime": 16.7685, |
|
"eval_samples_per_second": 4.592, |
|
"eval_steps_per_second": 1.193, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 13.00096618357488, |
|
"grad_norm": 1.0070035457611084, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1739, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 13.001932367149758, |
|
"grad_norm": 27.44700813293457, |
|
"learning_rate": 8.985507246376812e-06, |
|
"loss": 0.3626, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.002898550724638, |
|
"grad_norm": 83.7531967163086, |
|
"learning_rate": 9.082125603864736e-06, |
|
"loss": 0.4617, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.003864734299517, |
|
"grad_norm": 29.952964782714844, |
|
"learning_rate": 9.178743961352658e-06, |
|
"loss": 0.3059, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.004830917874395, |
|
"grad_norm": 19.046611785888672, |
|
"learning_rate": 9.275362318840581e-06, |
|
"loss": 0.3571, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 13.005797101449275, |
|
"grad_norm": 4.5947980880737305, |
|
"learning_rate": 9.371980676328503e-06, |
|
"loss": 0.5925, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 13.006763285024155, |
|
"grad_norm": 0.1116618886590004, |
|
"learning_rate": 9.468599033816425e-06, |
|
"loss": 0.1509, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 13.006763285024155, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 0.8144263029098511, |
|
"eval_runtime": 16.8839, |
|
"eval_samples_per_second": 4.561, |
|
"eval_steps_per_second": 1.185, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 14.00096618357488, |
|
"grad_norm": 7.001115798950195, |
|
"learning_rate": 9.565217391304349e-06, |
|
"loss": 0.291, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.001932367149758, |
|
"grad_norm": 81.9195327758789, |
|
"learning_rate": 9.66183574879227e-06, |
|
"loss": 0.4609, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.002898550724638, |
|
"grad_norm": 72.53138732910156, |
|
"learning_rate": 9.758454106280194e-06, |
|
"loss": 0.2614, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.003864734299517, |
|
"grad_norm": 88.42354583740234, |
|
"learning_rate": 9.855072463768118e-06, |
|
"loss": 0.5673, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.004830917874395, |
|
"grad_norm": 182.70054626464844, |
|
"learning_rate": 9.95169082125604e-06, |
|
"loss": 0.3395, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 14.005797101449275, |
|
"grad_norm": 75.77576446533203, |
|
"learning_rate": 9.994632313472894e-06, |
|
"loss": 0.4042, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 14.006763285024155, |
|
"grad_norm": 0.33039167523384094, |
|
"learning_rate": 9.98389694041868e-06, |
|
"loss": 0.4038, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.006763285024155, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 0.6039356589317322, |
|
"eval_runtime": 17.6515, |
|
"eval_samples_per_second": 4.362, |
|
"eval_steps_per_second": 1.133, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 15.00096618357488, |
|
"grad_norm": 0.12073725461959839, |
|
"learning_rate": 9.973161567364467e-06, |
|
"loss": 0.2022, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 15.001932367149758, |
|
"grad_norm": 30.217771530151367, |
|
"learning_rate": 9.962426194310253e-06, |
|
"loss": 0.2995, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 15.002898550724638, |
|
"grad_norm": 59.161224365234375, |
|
"learning_rate": 9.95169082125604e-06, |
|
"loss": 0.287, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 15.003864734299517, |
|
"grad_norm": 51.48099136352539, |
|
"learning_rate": 9.940955448201826e-06, |
|
"loss": 0.3779, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 15.004830917874395, |
|
"grad_norm": 67.52349853515625, |
|
"learning_rate": 9.930220075147611e-06, |
|
"loss": 0.1764, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.005797101449275, |
|
"grad_norm": 1.3956732749938965, |
|
"learning_rate": 9.919484702093398e-06, |
|
"loss": 0.4947, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 15.006763285024155, |
|
"grad_norm": 0.3445068299770355, |
|
"learning_rate": 9.908749329039184e-06, |
|
"loss": 0.2748, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 15.006763285024155, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.183449149131775, |
|
"eval_runtime": 15.8908, |
|
"eval_samples_per_second": 4.846, |
|
"eval_steps_per_second": 1.259, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 16.000966183574878, |
|
"grad_norm": 25.889986038208008, |
|
"learning_rate": 9.89801395598497e-06, |
|
"loss": 0.4726, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 16.00193236714976, |
|
"grad_norm": 96.12169647216797, |
|
"learning_rate": 9.887278582930757e-06, |
|
"loss": 0.7904, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 16.002898550724638, |
|
"grad_norm": 36.80066680908203, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.4876, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 16.003864734299516, |
|
"grad_norm": 0.03129780665040016, |
|
"learning_rate": 9.865807836822331e-06, |
|
"loss": 0.0427, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 16.004830917874397, |
|
"grad_norm": 11.805863380432129, |
|
"learning_rate": 9.855072463768118e-06, |
|
"loss": 0.5398, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 16.005797101449275, |
|
"grad_norm": 14.385322570800781, |
|
"learning_rate": 9.844337090713904e-06, |
|
"loss": 0.6715, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 16.006763285024153, |
|
"grad_norm": 0.18029144406318665, |
|
"learning_rate": 9.833601717659689e-06, |
|
"loss": 0.4552, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 16.006763285024153, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 0.7594266533851624, |
|
"eval_runtime": 15.3345, |
|
"eval_samples_per_second": 5.021, |
|
"eval_steps_per_second": 1.304, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 17.000966183574878, |
|
"grad_norm": 0.4168717563152313, |
|
"learning_rate": 9.822866344605476e-06, |
|
"loss": 0.1674, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 17.00193236714976, |
|
"grad_norm": 0.9499201774597168, |
|
"learning_rate": 9.812130971551262e-06, |
|
"loss": 0.2045, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 17.002898550724638, |
|
"grad_norm": 162.95167541503906, |
|
"learning_rate": 9.801395598497048e-06, |
|
"loss": 0.7506, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 17.003864734299516, |
|
"grad_norm": 1.6326782703399658, |
|
"learning_rate": 9.790660225442835e-06, |
|
"loss": 0.398, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 17.004830917874397, |
|
"grad_norm": 15.130646705627441, |
|
"learning_rate": 9.779924852388621e-06, |
|
"loss": 0.3404, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 17.005797101449275, |
|
"grad_norm": 0.08184617757797241, |
|
"learning_rate": 9.769189479334408e-06, |
|
"loss": 0.1294, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.006763285024153, |
|
"grad_norm": 316.427001953125, |
|
"learning_rate": 9.758454106280194e-06, |
|
"loss": 0.5584, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 17.006763285024153, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 0.9481449127197266, |
|
"eval_runtime": 15.5575, |
|
"eval_samples_per_second": 4.949, |
|
"eval_steps_per_second": 1.286, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 18.000966183574878, |
|
"grad_norm": 0.19292013347148895, |
|
"learning_rate": 9.74771873322598e-06, |
|
"loss": 0.2401, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 18.00193236714976, |
|
"grad_norm": 0.07158540934324265, |
|
"learning_rate": 9.736983360171767e-06, |
|
"loss": 0.2077, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 18.002898550724638, |
|
"grad_norm": 10.159412384033203, |
|
"learning_rate": 9.726247987117554e-06, |
|
"loss": 0.0463, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 18.003864734299516, |
|
"grad_norm": 0.16102541983127594, |
|
"learning_rate": 9.71551261406334e-06, |
|
"loss": 0.114, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.004830917874397, |
|
"grad_norm": 0.08238888531923294, |
|
"learning_rate": 9.704777241009125e-06, |
|
"loss": 0.2091, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.005797101449275, |
|
"grad_norm": 0.6891394257545471, |
|
"learning_rate": 9.694041867954911e-06, |
|
"loss": 0.133, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 18.006763285024153, |
|
"grad_norm": 0.02565833181142807, |
|
"learning_rate": 9.683306494900698e-06, |
|
"loss": 0.0919, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 18.006763285024153, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.0079569816589355, |
|
"eval_runtime": 17.4793, |
|
"eval_samples_per_second": 4.405, |
|
"eval_steps_per_second": 1.144, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 19.000966183574878, |
|
"grad_norm": 0.20424237847328186, |
|
"learning_rate": 9.672571121846484e-06, |
|
"loss": 0.0852, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 19.00193236714976, |
|
"grad_norm": 0.056213993579149246, |
|
"learning_rate": 9.66183574879227e-06, |
|
"loss": 0.0014, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 19.002898550724638, |
|
"grad_norm": 66.11773681640625, |
|
"learning_rate": 9.651100375738057e-06, |
|
"loss": 0.0981, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 19.003864734299516, |
|
"grad_norm": 0.020251819863915443, |
|
"learning_rate": 9.640365002683844e-06, |
|
"loss": 0.1629, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 19.004830917874397, |
|
"grad_norm": 0.08519980311393738, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.1436, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 19.005797101449275, |
|
"grad_norm": 0.04679597541689873, |
|
"learning_rate": 9.618894256575418e-06, |
|
"loss": 0.4893, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 19.006763285024153, |
|
"grad_norm": 0.04914074391126633, |
|
"learning_rate": 9.608158883521203e-06, |
|
"loss": 0.2309, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.006763285024153, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.8453315496444702, |
|
"eval_runtime": 16.6003, |
|
"eval_samples_per_second": 4.638, |
|
"eval_steps_per_second": 1.205, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 20.000966183574878, |
|
"grad_norm": 0.1529330313205719, |
|
"learning_rate": 9.59742351046699e-06, |
|
"loss": 0.2619, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 20.00193236714976, |
|
"grad_norm": 0.02123161591589451, |
|
"learning_rate": 9.586688137412776e-06, |
|
"loss": 0.0256, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 20.002898550724638, |
|
"grad_norm": 0.014065372757613659, |
|
"learning_rate": 9.575952764358562e-06, |
|
"loss": 0.1996, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 20.003864734299516, |
|
"grad_norm": 0.7381812930107117, |
|
"learning_rate": 9.565217391304349e-06, |
|
"loss": 0.0424, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 20.004830917874397, |
|
"grad_norm": 0.06800421327352524, |
|
"learning_rate": 9.554482018250135e-06, |
|
"loss": 0.21, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.005797101449275, |
|
"grad_norm": 80.73758697509766, |
|
"learning_rate": 9.543746645195922e-06, |
|
"loss": 0.0242, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 20.006763285024153, |
|
"grad_norm": 0.0316414013504982, |
|
"learning_rate": 9.533011272141708e-06, |
|
"loss": 0.191, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 20.006763285024153, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.069457769393921, |
|
"eval_runtime": 147.8544, |
|
"eval_samples_per_second": 0.521, |
|
"eval_steps_per_second": 0.135, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 21.000966183574878, |
|
"grad_norm": 0.012025800533592701, |
|
"learning_rate": 9.522275899087494e-06, |
|
"loss": 0.0317, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 21.00193236714976, |
|
"grad_norm": 115.9197769165039, |
|
"learning_rate": 9.511540526033281e-06, |
|
"loss": 0.2591, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 21.002898550724638, |
|
"grad_norm": 0.7661476135253906, |
|
"learning_rate": 9.500805152979067e-06, |
|
"loss": 0.2929, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.003864734299516, |
|
"grad_norm": 89.17171478271484, |
|
"learning_rate": 9.490069779924854e-06, |
|
"loss": 0.501, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 21.004830917874397, |
|
"grad_norm": 0.056331489235162735, |
|
"learning_rate": 9.479334406870639e-06, |
|
"loss": 0.2755, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 21.005797101449275, |
|
"grad_norm": 0.035313863307237625, |
|
"learning_rate": 9.468599033816425e-06, |
|
"loss": 0.315, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 21.006763285024153, |
|
"grad_norm": 0.06920187175273895, |
|
"learning_rate": 9.457863660762211e-06, |
|
"loss": 0.2013, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 21.006763285024153, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 1.465735673904419, |
|
"eval_runtime": 17.3838, |
|
"eval_samples_per_second": 4.429, |
|
"eval_steps_per_second": 1.15, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 22.000966183574878, |
|
"grad_norm": 3.0384302139282227, |
|
"learning_rate": 9.447128287707998e-06, |
|
"loss": 0.2171, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 22.00193236714976, |
|
"grad_norm": 0.022978052496910095, |
|
"learning_rate": 9.436392914653784e-06, |
|
"loss": 0.2218, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 22.002898550724638, |
|
"grad_norm": 0.2309640645980835, |
|
"learning_rate": 9.42565754159957e-06, |
|
"loss": 0.1913, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 22.003864734299516, |
|
"grad_norm": 0.027831105515360832, |
|
"learning_rate": 9.414922168545357e-06, |
|
"loss": 0.0069, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 22.004830917874397, |
|
"grad_norm": 0.01753852143883705, |
|
"learning_rate": 9.404186795491144e-06, |
|
"loss": 0.0014, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 22.005797101449275, |
|
"grad_norm": 0.051825933158397675, |
|
"learning_rate": 9.39345142243693e-06, |
|
"loss": 0.0012, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.006763285024153, |
|
"grad_norm": 23.33223533630371, |
|
"learning_rate": 9.382716049382717e-06, |
|
"loss": 0.6645, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 22.006763285024153, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.0601636171340942, |
|
"eval_runtime": 18.3493, |
|
"eval_samples_per_second": 4.196, |
|
"eval_steps_per_second": 1.09, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 23.000966183574878, |
|
"grad_norm": 42.5572395324707, |
|
"learning_rate": 9.371980676328503e-06, |
|
"loss": 0.1616, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 23.00193236714976, |
|
"grad_norm": 0.06940260529518127, |
|
"learning_rate": 9.36124530327429e-06, |
|
"loss": 0.0026, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 23.002898550724638, |
|
"grad_norm": 24.54999351501465, |
|
"learning_rate": 9.350509930220076e-06, |
|
"loss": 0.1298, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 23.003864734299516, |
|
"grad_norm": 0.014982779510319233, |
|
"learning_rate": 9.339774557165862e-06, |
|
"loss": 0.0021, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 23.004830917874397, |
|
"grad_norm": 0.022154109552502632, |
|
"learning_rate": 9.329039184111649e-06, |
|
"loss": 0.1601, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 23.005797101449275, |
|
"grad_norm": 0.1332763135433197, |
|
"learning_rate": 9.318303811057435e-06, |
|
"loss": 0.1839, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 23.006763285024153, |
|
"grad_norm": 0.0714530497789383, |
|
"learning_rate": 9.307568438003222e-06, |
|
"loss": 0.1083, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 23.006763285024153, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.2147879600524902, |
|
"eval_runtime": 42.3924, |
|
"eval_samples_per_second": 1.816, |
|
"eval_steps_per_second": 0.472, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 24.000966183574878, |
|
"grad_norm": 10.763915061950684, |
|
"learning_rate": 9.296833064949008e-06, |
|
"loss": 0.0153, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 24.00193236714976, |
|
"grad_norm": 0.34083086252212524, |
|
"learning_rate": 9.286097691894795e-06, |
|
"loss": 0.3552, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 24.002898550724638, |
|
"grad_norm": 0.09669429808855057, |
|
"learning_rate": 9.275362318840581e-06, |
|
"loss": 0.1833, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 24.003864734299516, |
|
"grad_norm": 3.468942403793335, |
|
"learning_rate": 9.264626945786368e-06, |
|
"loss": 0.0984, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 24.004830917874397, |
|
"grad_norm": 105.82479095458984, |
|
"learning_rate": 9.253891572732154e-06, |
|
"loss": 0.1335, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 24.005797101449275, |
|
"grad_norm": 0.03978487849235535, |
|
"learning_rate": 9.243156199677939e-06, |
|
"loss": 0.0364, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 24.006763285024153, |
|
"grad_norm": 0.05299947410821915, |
|
"learning_rate": 9.232420826623725e-06, |
|
"loss": 0.0885, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.006763285024153, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.200799822807312, |
|
"eval_runtime": 18.7931, |
|
"eval_samples_per_second": 4.097, |
|
"eval_steps_per_second": 1.064, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 25.000966183574878, |
|
"grad_norm": 0.07938944548368454, |
|
"learning_rate": 9.221685453569512e-06, |
|
"loss": 0.1654, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 25.00193236714976, |
|
"grad_norm": 0.017891909927129745, |
|
"learning_rate": 9.210950080515298e-06, |
|
"loss": 0.1959, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 25.002898550724638, |
|
"grad_norm": 0.03742482513189316, |
|
"learning_rate": 9.200214707461085e-06, |
|
"loss": 0.2129, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 25.003864734299516, |
|
"grad_norm": 0.018285313621163368, |
|
"learning_rate": 9.189479334406871e-06, |
|
"loss": 0.0005, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 25.004830917874397, |
|
"grad_norm": 0.012646480463445187, |
|
"learning_rate": 9.178743961352658e-06, |
|
"loss": 0.2598, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 25.005797101449275, |
|
"grad_norm": 0.1877349466085434, |
|
"learning_rate": 9.168008588298444e-06, |
|
"loss": 0.3652, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 25.006763285024153, |
|
"grad_norm": 0.04589417949318886, |
|
"learning_rate": 9.15727321524423e-06, |
|
"loss": 0.0015, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 25.006763285024153, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.298748254776001, |
|
"eval_runtime": 18.6643, |
|
"eval_samples_per_second": 4.126, |
|
"eval_steps_per_second": 1.072, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 26.000966183574878, |
|
"grad_norm": 0.007107927929610014, |
|
"learning_rate": 9.146537842190017e-06, |
|
"loss": 0.0241, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 26.00193236714976, |
|
"grad_norm": 0.2932748794555664, |
|
"learning_rate": 9.135802469135803e-06, |
|
"loss": 0.1536, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 26.002898550724638, |
|
"grad_norm": 0.005459180101752281, |
|
"learning_rate": 9.12506709608159e-06, |
|
"loss": 0.0019, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 26.003864734299516, |
|
"grad_norm": 0.05199627950787544, |
|
"learning_rate": 9.114331723027376e-06, |
|
"loss": 0.0547, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 26.004830917874397, |
|
"grad_norm": 0.0027262712828814983, |
|
"learning_rate": 9.103596349973163e-06, |
|
"loss": 0.0003, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 26.005797101449275, |
|
"grad_norm": 0.005350043997168541, |
|
"learning_rate": 9.092860976918949e-06, |
|
"loss": 0.24, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 26.006763285024153, |
|
"grad_norm": 0.003055410925298929, |
|
"learning_rate": 9.082125603864736e-06, |
|
"loss": 0.2372, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 26.006763285024153, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.6224983930587769, |
|
"eval_runtime": 19.2376, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 1.04, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 27.000966183574878, |
|
"grad_norm": 0.013318424113094807, |
|
"learning_rate": 9.071390230810522e-06, |
|
"loss": 0.0192, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 27.00193236714976, |
|
"grad_norm": 0.01380719244480133, |
|
"learning_rate": 9.060654857756308e-06, |
|
"loss": 0.0365, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 27.002898550724638, |
|
"grad_norm": 12.373915672302246, |
|
"learning_rate": 9.049919484702095e-06, |
|
"loss": 0.6307, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 27.003864734299516, |
|
"grad_norm": 186.6871337890625, |
|
"learning_rate": 9.039184111647881e-06, |
|
"loss": 0.5701, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 27.004830917874397, |
|
"grad_norm": 0.41726887226104736, |
|
"learning_rate": 9.028448738593668e-06, |
|
"loss": 0.2799, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 27.005797101449275, |
|
"grad_norm": 0.0075998492538928986, |
|
"learning_rate": 9.017713365539453e-06, |
|
"loss": 0.0908, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 27.006763285024153, |
|
"grad_norm": 0.0030398606322705746, |
|
"learning_rate": 9.006977992485239e-06, |
|
"loss": 0.001, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 27.006763285024153, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.1689021587371826, |
|
"eval_runtime": 15.3074, |
|
"eval_samples_per_second": 5.03, |
|
"eval_steps_per_second": 1.307, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 28.000966183574878, |
|
"grad_norm": 0.004823345225304365, |
|
"learning_rate": 8.996242619431025e-06, |
|
"loss": 0.002, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 28.00193236714976, |
|
"grad_norm": 37.93857955932617, |
|
"learning_rate": 8.985507246376812e-06, |
|
"loss": 0.0136, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 28.002898550724638, |
|
"grad_norm": 0.008819162845611572, |
|
"learning_rate": 8.974771873322598e-06, |
|
"loss": 0.0003, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 28.003864734299516, |
|
"grad_norm": 0.02176712639629841, |
|
"learning_rate": 8.964036500268385e-06, |
|
"loss": 0.0005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.004830917874397, |
|
"grad_norm": 2.0372092723846436, |
|
"learning_rate": 8.953301127214171e-06, |
|
"loss": 0.0883, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 28.005797101449275, |
|
"grad_norm": 0.0033761654049158096, |
|
"learning_rate": 8.942565754159958e-06, |
|
"loss": 0.0002, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 28.006763285024153, |
|
"grad_norm": 0.015467183664441109, |
|
"learning_rate": 8.931830381105744e-06, |
|
"loss": 0.0006, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 28.006763285024153, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.3817362785339355, |
|
"eval_runtime": 14.6935, |
|
"eval_samples_per_second": 5.24, |
|
"eval_steps_per_second": 1.361, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 29.000966183574878, |
|
"grad_norm": 0.00563892163336277, |
|
"learning_rate": 8.92109500805153e-06, |
|
"loss": 0.0002, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 29.00193236714976, |
|
"grad_norm": 0.008719475008547306, |
|
"learning_rate": 8.910359634997317e-06, |
|
"loss": 0.0002, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 29.002898550724638, |
|
"grad_norm": 0.012711520306766033, |
|
"learning_rate": 8.899624261943104e-06, |
|
"loss": 0.0021, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 29.003864734299516, |
|
"grad_norm": 0.0029001745861023664, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0012, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 29.004830917874397, |
|
"grad_norm": 0.0027202004566788673, |
|
"learning_rate": 8.878153515834675e-06, |
|
"loss": 0.0259, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 29.005797101449275, |
|
"grad_norm": 0.0022866763174533844, |
|
"learning_rate": 8.867418142780463e-06, |
|
"loss": 0.001, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 29.006763285024153, |
|
"grad_norm": 0.009289148263633251, |
|
"learning_rate": 8.85668276972625e-06, |
|
"loss": 0.0002, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.006763285024153, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.714272379875183, |
|
"eval_runtime": 14.4439, |
|
"eval_samples_per_second": 5.331, |
|
"eval_steps_per_second": 1.385, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 30.000966183574878, |
|
"grad_norm": 0.009948095306754112, |
|
"learning_rate": 8.845947396672036e-06, |
|
"loss": 0.1673, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 30.00193236714976, |
|
"grad_norm": 0.0249008946120739, |
|
"learning_rate": 8.835212023617822e-06, |
|
"loss": 0.1536, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 30.002898550724638, |
|
"grad_norm": 0.12144894897937775, |
|
"learning_rate": 8.824476650563609e-06, |
|
"loss": 0.1607, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 30.003864734299516, |
|
"grad_norm": 7.651823997497559, |
|
"learning_rate": 8.813741277509395e-06, |
|
"loss": 0.1802, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 30.004830917874397, |
|
"grad_norm": 0.005258599761873484, |
|
"learning_rate": 8.803005904455182e-06, |
|
"loss": 0.0002, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 30.005797101449275, |
|
"grad_norm": 0.004621783271431923, |
|
"learning_rate": 8.792270531400966e-06, |
|
"loss": 0.0222, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 30.006763285024153, |
|
"grad_norm": 0.012738402932882309, |
|
"learning_rate": 8.781535158346753e-06, |
|
"loss": 0.0012, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 30.006763285024153, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8865219354629517, |
|
"eval_runtime": 14.9876, |
|
"eval_samples_per_second": 5.138, |
|
"eval_steps_per_second": 1.334, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 31.000966183574878, |
|
"grad_norm": 0.004862803500145674, |
|
"learning_rate": 8.77079978529254e-06, |
|
"loss": 0.0002, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 31.00193236714976, |
|
"grad_norm": 0.008277139626443386, |
|
"learning_rate": 8.760064412238326e-06, |
|
"loss": 0.0002, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 31.002898550724638, |
|
"grad_norm": 0.011266889050602913, |
|
"learning_rate": 8.749329039184112e-06, |
|
"loss": 0.0256, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 31.003864734299516, |
|
"grad_norm": 0.037181366235017776, |
|
"learning_rate": 8.738593666129899e-06, |
|
"loss": 0.0389, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 31.004830917874397, |
|
"grad_norm": 0.011379989795386791, |
|
"learning_rate": 8.727858293075685e-06, |
|
"loss": 0.0703, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 31.005797101449275, |
|
"grad_norm": 0.0035820791963487864, |
|
"learning_rate": 8.717122920021472e-06, |
|
"loss": 0.0123, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 31.006763285024153, |
|
"grad_norm": 0.11101510375738144, |
|
"learning_rate": 8.706387546967258e-06, |
|
"loss": 0.153, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 31.006763285024153, |
|
"eval_accuracy": 0.6623376623376623, |
|
"eval_loss": 2.4574289321899414, |
|
"eval_runtime": 14.6437, |
|
"eval_samples_per_second": 5.258, |
|
"eval_steps_per_second": 1.366, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 32.00096618357488, |
|
"grad_norm": 0.007295526098459959, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.2227, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 32.001932367149756, |
|
"grad_norm": 0.005096717271953821, |
|
"learning_rate": 8.684916800858831e-06, |
|
"loss": 0.1528, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 32.00289855072464, |
|
"grad_norm": 0.007106207311153412, |
|
"learning_rate": 8.674181427804617e-06, |
|
"loss": 0.0649, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 32.00386473429952, |
|
"grad_norm": 16.95619773864746, |
|
"learning_rate": 8.663446054750402e-06, |
|
"loss": 0.0751, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 32.00483091787439, |
|
"grad_norm": 0.00349339353851974, |
|
"learning_rate": 8.652710681696189e-06, |
|
"loss": 0.0014, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 32.005797101449275, |
|
"grad_norm": 0.010887386277318, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.0001, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 32.00676328502416, |
|
"grad_norm": 0.011478613130748272, |
|
"learning_rate": 8.631239935587761e-06, |
|
"loss": 0.1308, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 32.00676328502416, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.1799964904785156, |
|
"eval_runtime": 15.238, |
|
"eval_samples_per_second": 5.053, |
|
"eval_steps_per_second": 1.313, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 33.00096618357488, |
|
"grad_norm": 0.0030596402939409018, |
|
"learning_rate": 8.62050456253355e-06, |
|
"loss": 0.0001, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 33.001932367149756, |
|
"grad_norm": 0.008988602086901665, |
|
"learning_rate": 8.609769189479336e-06, |
|
"loss": 0.1962, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 33.00289855072464, |
|
"grad_norm": 0.11388076096773148, |
|
"learning_rate": 8.599033816425122e-06, |
|
"loss": 0.0001, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 33.00386473429952, |
|
"grad_norm": 0.012941384688019753, |
|
"learning_rate": 8.588298443370909e-06, |
|
"loss": 0.0001, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 33.00483091787439, |
|
"grad_norm": 117.23433685302734, |
|
"learning_rate": 8.577563070316695e-06, |
|
"loss": 0.2454, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 33.005797101449275, |
|
"grad_norm": 0.0073850443586707115, |
|
"learning_rate": 8.56682769726248e-06, |
|
"loss": 0.0001, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 33.00676328502416, |
|
"grad_norm": 0.038754310458898544, |
|
"learning_rate": 8.556092324208267e-06, |
|
"loss": 0.0002, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 33.00676328502416, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.2817308902740479, |
|
"eval_runtime": 14.7367, |
|
"eval_samples_per_second": 5.225, |
|
"eval_steps_per_second": 1.357, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 34.00096618357488, |
|
"grad_norm": 0.006986475549638271, |
|
"learning_rate": 8.545356951154053e-06, |
|
"loss": 0.0001, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 34.001932367149756, |
|
"grad_norm": 0.08525365591049194, |
|
"learning_rate": 8.53462157809984e-06, |
|
"loss": 0.1159, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 34.00289855072464, |
|
"grad_norm": 0.005141557659953833, |
|
"learning_rate": 8.523886205045626e-06, |
|
"loss": 0.0209, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 34.00386473429952, |
|
"grad_norm": 0.002347426488995552, |
|
"learning_rate": 8.513150831991412e-06, |
|
"loss": 0.0035, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 34.00483091787439, |
|
"grad_norm": 0.021269038319587708, |
|
"learning_rate": 8.502415458937199e-06, |
|
"loss": 0.0002, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 34.005797101449275, |
|
"grad_norm": 0.0015353817725554109, |
|
"learning_rate": 8.491680085882985e-06, |
|
"loss": 0.0001, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 34.00676328502416, |
|
"grad_norm": 0.002692780690267682, |
|
"learning_rate": 8.480944712828772e-06, |
|
"loss": 0.0001, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 34.00676328502416, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.2770259380340576, |
|
"eval_runtime": 14.7085, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 1.36, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 35.00096618357488, |
|
"grad_norm": 0.0036268895491957664, |
|
"learning_rate": 8.470209339774558e-06, |
|
"loss": 0.0001, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 35.001932367149756, |
|
"grad_norm": 0.02713877335190773, |
|
"learning_rate": 8.459473966720345e-06, |
|
"loss": 0.0001, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 35.00289855072464, |
|
"grad_norm": 0.004495933186262846, |
|
"learning_rate": 8.448738593666131e-06, |
|
"loss": 0.0001, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 35.00386473429952, |
|
"grad_norm": 0.0023365523666143417, |
|
"learning_rate": 8.438003220611916e-06, |
|
"loss": 0.0001, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 35.00483091787439, |
|
"grad_norm": 0.006139947567135096, |
|
"learning_rate": 8.427267847557702e-06, |
|
"loss": 0.0001, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 35.005797101449275, |
|
"grad_norm": 0.3539928197860718, |
|
"learning_rate": 8.416532474503489e-06, |
|
"loss": 0.0001, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 35.00676328502416, |
|
"grad_norm": 0.006096704863011837, |
|
"learning_rate": 8.405797101449275e-06, |
|
"loss": 0.0001, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 35.00676328502416, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 1.2778986692428589, |
|
"eval_runtime": 14.4303, |
|
"eval_samples_per_second": 5.336, |
|
"eval_steps_per_second": 1.386, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 36.00096618357488, |
|
"grad_norm": 0.0016143172979354858, |
|
"learning_rate": 8.395061728395062e-06, |
|
"loss": 0.0001, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 36.001932367149756, |
|
"grad_norm": 0.004879065323621035, |
|
"learning_rate": 8.384326355340848e-06, |
|
"loss": 0.0001, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 36.00289855072464, |
|
"grad_norm": 0.002586913527920842, |
|
"learning_rate": 8.373590982286636e-06, |
|
"loss": 0.0001, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 36.00386473429952, |
|
"grad_norm": 0.011908585205674171, |
|
"learning_rate": 8.362855609232423e-06, |
|
"loss": 0.0001, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 36.00483091787439, |
|
"grad_norm": 0.002101797377690673, |
|
"learning_rate": 8.352120236178209e-06, |
|
"loss": 0.0005, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 36.005797101449275, |
|
"grad_norm": 0.33965930342674255, |
|
"learning_rate": 8.341384863123994e-06, |
|
"loss": 0.1018, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 36.00676328502416, |
|
"grad_norm": 0.0029156110249459743, |
|
"learning_rate": 8.33064949006978e-06, |
|
"loss": 0.0001, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 36.00676328502416, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.3971278667449951, |
|
"eval_runtime": 14.5016, |
|
"eval_samples_per_second": 5.31, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 37.00096618357488, |
|
"grad_norm": 160.6613311767578, |
|
"learning_rate": 8.319914117015567e-06, |
|
"loss": 0.0107, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 37.001932367149756, |
|
"grad_norm": 0.001831389730796218, |
|
"learning_rate": 8.309178743961353e-06, |
|
"loss": 0.0021, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 37.00289855072464, |
|
"grad_norm": 0.0028115217573940754, |
|
"learning_rate": 8.29844337090714e-06, |
|
"loss": 0.0002, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 37.00386473429952, |
|
"grad_norm": 100.3589859008789, |
|
"learning_rate": 8.287707997852926e-06, |
|
"loss": 0.1948, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 37.00483091787439, |
|
"grad_norm": 0.0055212159641087055, |
|
"learning_rate": 8.276972624798713e-06, |
|
"loss": 0.0035, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 37.005797101449275, |
|
"grad_norm": 0.0011701483745127916, |
|
"learning_rate": 8.266237251744499e-06, |
|
"loss": 0.0058, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 37.00676328502416, |
|
"grad_norm": 0.0021236445754766464, |
|
"learning_rate": 8.255501878690286e-06, |
|
"loss": 0.0001, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 37.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.126293420791626, |
|
"eval_runtime": 14.5035, |
|
"eval_samples_per_second": 5.309, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 38.00096618357488, |
|
"grad_norm": 0.002591678872704506, |
|
"learning_rate": 8.244766505636072e-06, |
|
"loss": 0.0001, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 38.001932367149756, |
|
"grad_norm": 0.002082381397485733, |
|
"learning_rate": 8.234031132581858e-06, |
|
"loss": 0.0001, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 38.00289855072464, |
|
"grad_norm": 0.0038508835714310408, |
|
"learning_rate": 8.223295759527645e-06, |
|
"loss": 0.1384, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 38.00386473429952, |
|
"grad_norm": 0.0028780594002455473, |
|
"learning_rate": 8.212560386473431e-06, |
|
"loss": 0.0001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 38.00483091787439, |
|
"grad_norm": 0.0069783455692231655, |
|
"learning_rate": 8.201825013419216e-06, |
|
"loss": 0.0194, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 38.005797101449275, |
|
"grad_norm": 0.0021866625174880028, |
|
"learning_rate": 8.191089640365003e-06, |
|
"loss": 0.0001, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 38.00676328502416, |
|
"grad_norm": 0.002578013576567173, |
|
"learning_rate": 8.180354267310789e-06, |
|
"loss": 0.0001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 38.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.1232645511627197, |
|
"eval_runtime": 14.7179, |
|
"eval_samples_per_second": 5.232, |
|
"eval_steps_per_second": 1.359, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 39.00096618357488, |
|
"grad_norm": 0.003141900757327676, |
|
"learning_rate": 8.169618894256575e-06, |
|
"loss": 0.0001, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 39.001932367149756, |
|
"grad_norm": 0.004278377164155245, |
|
"learning_rate": 8.158883521202362e-06, |
|
"loss": 0.1379, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 39.00289855072464, |
|
"grad_norm": 0.0016402786131948233, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 0.1475, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 39.00386473429952, |
|
"grad_norm": 0.1566852182149887, |
|
"learning_rate": 8.137412775093935e-06, |
|
"loss": 0.3523, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 39.00483091787439, |
|
"grad_norm": 0.0037191323935985565, |
|
"learning_rate": 8.126677402039721e-06, |
|
"loss": 0.2542, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 39.005797101449275, |
|
"grad_norm": 0.0030009618494659662, |
|
"learning_rate": 8.115942028985508e-06, |
|
"loss": 0.0003, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 39.00676328502416, |
|
"grad_norm": 0.0024690297432243824, |
|
"learning_rate": 8.105206655931294e-06, |
|
"loss": 0.0675, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 39.00676328502416, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.4884586334228516, |
|
"eval_runtime": 15.941, |
|
"eval_samples_per_second": 4.83, |
|
"eval_steps_per_second": 1.255, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 40.00096618357488, |
|
"grad_norm": 0.00310198194347322, |
|
"learning_rate": 8.09447128287708e-06, |
|
"loss": 0.0029, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 40.001932367149756, |
|
"grad_norm": 0.02491198293864727, |
|
"learning_rate": 8.083735909822867e-06, |
|
"loss": 0.0004, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 40.00289855072464, |
|
"grad_norm": 0.0024280883371829987, |
|
"learning_rate": 8.073000536768653e-06, |
|
"loss": 0.0016, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 40.00386473429952, |
|
"grad_norm": 39.02526092529297, |
|
"learning_rate": 8.06226516371444e-06, |
|
"loss": 0.2267, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 40.00483091787439, |
|
"grad_norm": 0.007119787856936455, |
|
"learning_rate": 8.051529790660226e-06, |
|
"loss": 0.1246, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 40.005797101449275, |
|
"grad_norm": 0.0054905591532588005, |
|
"learning_rate": 8.040794417606013e-06, |
|
"loss": 0.0002, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 40.00676328502416, |
|
"grad_norm": 0.003688159631565213, |
|
"learning_rate": 8.0300590445518e-06, |
|
"loss": 0.0002, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 40.00676328502416, |
|
"eval_accuracy": 0.7012987012987013, |
|
"eval_loss": 1.8405940532684326, |
|
"eval_runtime": 38.4778, |
|
"eval_samples_per_second": 2.001, |
|
"eval_steps_per_second": 0.52, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 41.00096618357488, |
|
"grad_norm": 0.0019798579160124063, |
|
"learning_rate": 8.019323671497586e-06, |
|
"loss": 0.0001, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 41.001932367149756, |
|
"grad_norm": 0.011942965909838676, |
|
"learning_rate": 8.008588298443372e-06, |
|
"loss": 0.0002, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 41.00289855072464, |
|
"grad_norm": 0.0018374843057245016, |
|
"learning_rate": 7.997852925389159e-06, |
|
"loss": 0.0003, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 41.00386473429952, |
|
"grad_norm": 0.0034626899287104607, |
|
"learning_rate": 7.987117552334945e-06, |
|
"loss": 0.0001, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 41.00483091787439, |
|
"grad_norm": 0.0038331467658281326, |
|
"learning_rate": 7.97638217928073e-06, |
|
"loss": 0.0001, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 41.005797101449275, |
|
"grad_norm": 0.0017002819804474711, |
|
"learning_rate": 7.965646806226516e-06, |
|
"loss": 0.0002, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 41.00676328502416, |
|
"grad_norm": 0.0059528648853302, |
|
"learning_rate": 7.954911433172303e-06, |
|
"loss": 0.0001, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 41.00676328502416, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.9084670543670654, |
|
"eval_runtime": 15.0568, |
|
"eval_samples_per_second": 5.114, |
|
"eval_steps_per_second": 1.328, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 42.00096618357488, |
|
"grad_norm": 0.0026344398502260447, |
|
"learning_rate": 7.94417606011809e-06, |
|
"loss": 0.0001, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 42.001932367149756, |
|
"grad_norm": 0.0017737130401656032, |
|
"learning_rate": 7.933440687063876e-06, |
|
"loss": 0.0001, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 42.00289855072464, |
|
"grad_norm": 222.6859588623047, |
|
"learning_rate": 7.922705314009662e-06, |
|
"loss": 0.179, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 42.00386473429952, |
|
"grad_norm": 0.022273195907473564, |
|
"learning_rate": 7.911969940955449e-06, |
|
"loss": 0.1046, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 42.00483091787439, |
|
"grad_norm": 0.02884945087134838, |
|
"learning_rate": 7.901234567901235e-06, |
|
"loss": 0.0018, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 42.005797101449275, |
|
"grad_norm": 1.2516549825668335, |
|
"learning_rate": 7.890499194847021e-06, |
|
"loss": 0.6459, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.00676328502416, |
|
"grad_norm": 0.2831589877605438, |
|
"learning_rate": 7.879763821792808e-06, |
|
"loss": 0.0005, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 42.00676328502416, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 1.9380286931991577, |
|
"eval_runtime": 15.4466, |
|
"eval_samples_per_second": 4.985, |
|
"eval_steps_per_second": 1.295, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 43.00096618357488, |
|
"grad_norm": 0.00937322061508894, |
|
"learning_rate": 7.869028448738594e-06, |
|
"loss": 0.4209, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 43.001932367149756, |
|
"grad_norm": 5.275388717651367, |
|
"learning_rate": 7.85829307568438e-06, |
|
"loss": 0.1863, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 43.00289855072464, |
|
"grad_norm": 0.0025148040149360895, |
|
"learning_rate": 7.847557702630167e-06, |
|
"loss": 0.0002, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 43.00386473429952, |
|
"grad_norm": 0.011758288368582726, |
|
"learning_rate": 7.836822329575954e-06, |
|
"loss": 0.0002, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 43.00483091787439, |
|
"grad_norm": 145.53335571289062, |
|
"learning_rate": 7.82608695652174e-06, |
|
"loss": 0.2425, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 43.005797101449275, |
|
"grad_norm": 0.005929389502853155, |
|
"learning_rate": 7.815351583467527e-06, |
|
"loss": 0.179, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 43.00676328502416, |
|
"grad_norm": 0.00786946527659893, |
|
"learning_rate": 7.804616210413313e-06, |
|
"loss": 0.1589, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 43.00676328502416, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 0.9673866033554077, |
|
"eval_runtime": 16.8427, |
|
"eval_samples_per_second": 4.572, |
|
"eval_steps_per_second": 1.187, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 44.00096618357488, |
|
"grad_norm": 0.005430703517049551, |
|
"learning_rate": 7.7938808373591e-06, |
|
"loss": 0.0002, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 44.001932367149756, |
|
"grad_norm": 4.347066879272461, |
|
"learning_rate": 7.783145464304886e-06, |
|
"loss": 0.0009, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 44.00289855072464, |
|
"grad_norm": 0.014723542146384716, |
|
"learning_rate": 7.772410091250672e-06, |
|
"loss": 0.0006, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 44.00386473429952, |
|
"grad_norm": 289.0671691894531, |
|
"learning_rate": 7.761674718196459e-06, |
|
"loss": 0.2869, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 44.00483091787439, |
|
"grad_norm": 0.0035440954379737377, |
|
"learning_rate": 7.750939345142244e-06, |
|
"loss": 0.0001, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 44.005797101449275, |
|
"grad_norm": 0.0034120541531592607, |
|
"learning_rate": 7.74020397208803e-06, |
|
"loss": 0.2225, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 44.00676328502416, |
|
"grad_norm": 0.007564049679785967, |
|
"learning_rate": 7.729468599033817e-06, |
|
"loss": 0.0001, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 44.00676328502416, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 1.5574208498001099, |
|
"eval_runtime": 15.8522, |
|
"eval_samples_per_second": 4.857, |
|
"eval_steps_per_second": 1.262, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 45.00096618357488, |
|
"grad_norm": 0.013103250414133072, |
|
"learning_rate": 7.718733225979603e-06, |
|
"loss": 0.0063, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 45.001932367149756, |
|
"grad_norm": 0.010216855444014072, |
|
"learning_rate": 7.70799785292539e-06, |
|
"loss": 0.2025, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 45.00289855072464, |
|
"grad_norm": 0.004929398186504841, |
|
"learning_rate": 7.697262479871176e-06, |
|
"loss": 0.4529, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 45.00386473429952, |
|
"grad_norm": 48.59390640258789, |
|
"learning_rate": 7.686527106816962e-06, |
|
"loss": 0.1994, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 45.00483091787439, |
|
"grad_norm": 0.003639970440417528, |
|
"learning_rate": 7.675791733762749e-06, |
|
"loss": 0.0002, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 45.005797101449275, |
|
"grad_norm": 0.007403469644486904, |
|
"learning_rate": 7.665056360708535e-06, |
|
"loss": 0.0121, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 45.00676328502416, |
|
"grad_norm": 0.0292381439357996, |
|
"learning_rate": 7.654320987654322e-06, |
|
"loss": 0.0353, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 45.00676328502416, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.1688188314437866, |
|
"eval_runtime": 16.7449, |
|
"eval_samples_per_second": 4.598, |
|
"eval_steps_per_second": 1.194, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 46.00096618357488, |
|
"grad_norm": 0.4787992238998413, |
|
"learning_rate": 7.643585614600108e-06, |
|
"loss": 0.0008, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 46.001932367149756, |
|
"grad_norm": 0.006452680099755526, |
|
"learning_rate": 7.632850241545895e-06, |
|
"loss": 0.2781, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 46.00289855072464, |
|
"grad_norm": 0.003960897680372, |
|
"learning_rate": 7.622114868491681e-06, |
|
"loss": 0.0366, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 46.00386473429952, |
|
"grad_norm": 0.0023400746285915375, |
|
"learning_rate": 7.6113794954374675e-06, |
|
"loss": 0.0001, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 46.00483091787439, |
|
"grad_norm": 0.015635930001735687, |
|
"learning_rate": 7.600644122383254e-06, |
|
"loss": 0.0001, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 46.005797101449275, |
|
"grad_norm": 0.002309344243258238, |
|
"learning_rate": 7.58990874932904e-06, |
|
"loss": 0.0001, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 46.00676328502416, |
|
"grad_norm": 0.0033812953624874353, |
|
"learning_rate": 7.579173376274827e-06, |
|
"loss": 0.0001, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 46.00676328502416, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 1.7684170007705688, |
|
"eval_runtime": 16.1588, |
|
"eval_samples_per_second": 4.765, |
|
"eval_steps_per_second": 1.238, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 47.00096618357488, |
|
"grad_norm": 0.004736083559691906, |
|
"learning_rate": 7.568438003220613e-06, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 47.001932367149756, |
|
"grad_norm": 0.0027902736328542233, |
|
"learning_rate": 7.557702630166399e-06, |
|
"loss": 0.0001, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 47.00289855072464, |
|
"grad_norm": 0.0036582525353878736, |
|
"learning_rate": 7.546967257112185e-06, |
|
"loss": 0.0001, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 47.00386473429952, |
|
"grad_norm": 0.002314971061423421, |
|
"learning_rate": 7.536231884057972e-06, |
|
"loss": 0.0001, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 47.00483091787439, |
|
"grad_norm": 231.95797729492188, |
|
"learning_rate": 7.525496511003758e-06, |
|
"loss": 0.0304, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 47.005797101449275, |
|
"grad_norm": 9.556648254394531, |
|
"learning_rate": 7.514761137949545e-06, |
|
"loss": 0.1891, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 47.00676328502416, |
|
"grad_norm": 0.08842357248067856, |
|
"learning_rate": 7.504025764895331e-06, |
|
"loss": 0.0002, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 47.00676328502416, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.3363182544708252, |
|
"eval_runtime": 14.464, |
|
"eval_samples_per_second": 5.324, |
|
"eval_steps_per_second": 1.383, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 48.00096618357488, |
|
"grad_norm": 0.0023621630389243364, |
|
"learning_rate": 7.493290391841117e-06, |
|
"loss": 0.0051, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 48.001932367149756, |
|
"grad_norm": 0.0059842816554009914, |
|
"learning_rate": 7.482555018786903e-06, |
|
"loss": 0.0001, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 48.00289855072464, |
|
"grad_norm": 0.016945144161581993, |
|
"learning_rate": 7.47181964573269e-06, |
|
"loss": 0.0014, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 48.00386473429952, |
|
"grad_norm": 0.014096066355705261, |
|
"learning_rate": 7.461084272678476e-06, |
|
"loss": 0.214, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 48.00483091787439, |
|
"grad_norm": 0.0021912576630711555, |
|
"learning_rate": 7.4503488996242625e-06, |
|
"loss": 0.0001, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 48.005797101449275, |
|
"grad_norm": 0.00600380590185523, |
|
"learning_rate": 7.439613526570049e-06, |
|
"loss": 0.0001, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 48.00676328502416, |
|
"grad_norm": 0.013205859810113907, |
|
"learning_rate": 7.428878153515835e-06, |
|
"loss": 0.1237, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 48.00676328502416, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 1.2230392694473267, |
|
"eval_runtime": 14.5301, |
|
"eval_samples_per_second": 5.299, |
|
"eval_steps_per_second": 1.376, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 49.00096618357488, |
|
"grad_norm": 0.002923307241871953, |
|
"learning_rate": 7.418142780461621e-06, |
|
"loss": 0.0003, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 49.001932367149756, |
|
"grad_norm": 0.07183127850294113, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0009, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 49.00289855072464, |
|
"grad_norm": 0.01003690529614687, |
|
"learning_rate": 7.396672034353194e-06, |
|
"loss": 0.0001, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 49.00386473429952, |
|
"grad_norm": 0.003056786023080349, |
|
"learning_rate": 7.38593666129898e-06, |
|
"loss": 0.0081, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 49.00483091787439, |
|
"grad_norm": 0.0015192265855148435, |
|
"learning_rate": 7.375201288244767e-06, |
|
"loss": 0.0001, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 49.005797101449275, |
|
"grad_norm": 0.005432800389826298, |
|
"learning_rate": 7.364465915190554e-06, |
|
"loss": 0.0001, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 49.00676328502416, |
|
"grad_norm": 0.002280445536598563, |
|
"learning_rate": 7.353730542136341e-06, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 49.00676328502416, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.4665330648422241, |
|
"eval_runtime": 14.8215, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 1.349, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 50.00096618357488, |
|
"grad_norm": 0.0026954906061291695, |
|
"learning_rate": 7.342995169082127e-06, |
|
"loss": 0.0001, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 50.001932367149756, |
|
"grad_norm": 0.007547179237008095, |
|
"learning_rate": 7.332259796027913e-06, |
|
"loss": 0.0001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 50.00289855072464, |
|
"grad_norm": 0.0018638098845258355, |
|
"learning_rate": 7.321524422973699e-06, |
|
"loss": 0.0001, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 50.00386473429952, |
|
"grad_norm": 0.00512617826461792, |
|
"learning_rate": 7.3107890499194855e-06, |
|
"loss": 0.0001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 50.00483091787439, |
|
"grad_norm": 0.001102678943425417, |
|
"learning_rate": 7.300053676865272e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 50.005797101449275, |
|
"grad_norm": 0.008461062796413898, |
|
"learning_rate": 7.2893183038110584e-06, |
|
"loss": 0.0001, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 50.00676328502416, |
|
"grad_norm": 0.006839872803539038, |
|
"learning_rate": 7.278582930756845e-06, |
|
"loss": 0.0, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 50.00676328502416, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.5471916198730469, |
|
"eval_runtime": 14.9136, |
|
"eval_samples_per_second": 5.163, |
|
"eval_steps_per_second": 1.341, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 51.00096618357488, |
|
"grad_norm": 0.0022453684359788895, |
|
"learning_rate": 7.2678475577026305e-06, |
|
"loss": 0.0001, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 51.001932367149756, |
|
"grad_norm": 0.004750104621052742, |
|
"learning_rate": 7.257112184648417e-06, |
|
"loss": 0.0001, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 51.00289855072464, |
|
"grad_norm": 0.001577241811901331, |
|
"learning_rate": 7.246376811594203e-06, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 51.00386473429952, |
|
"grad_norm": 0.001378105953335762, |
|
"learning_rate": 7.23564143853999e-06, |
|
"loss": 0.0002, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 51.00483091787439, |
|
"grad_norm": 0.007165323477238417, |
|
"learning_rate": 7.224906065485776e-06, |
|
"loss": 0.0001, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 51.005797101449275, |
|
"grad_norm": 0.002125757047906518, |
|
"learning_rate": 7.214170692431563e-06, |
|
"loss": 0.1188, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 51.00676328502416, |
|
"grad_norm": 1.5844531059265137, |
|
"learning_rate": 7.203435319377348e-06, |
|
"loss": 0.1479, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 51.00676328502416, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.3368756771087646, |
|
"eval_runtime": 14.6055, |
|
"eval_samples_per_second": 5.272, |
|
"eval_steps_per_second": 1.369, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 52.00096618357488, |
|
"grad_norm": 0.0030117840506136417, |
|
"learning_rate": 7.192699946323135e-06, |
|
"loss": 0.0001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 52.001932367149756, |
|
"grad_norm": 0.0017524833092465997, |
|
"learning_rate": 7.181964573268921e-06, |
|
"loss": 0.0001, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 52.00289855072464, |
|
"grad_norm": 0.002778457012027502, |
|
"learning_rate": 7.171229200214708e-06, |
|
"loss": 0.0001, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 52.00386473429952, |
|
"grad_norm": 333.92315673828125, |
|
"learning_rate": 7.160493827160494e-06, |
|
"loss": 0.0826, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 52.00483091787439, |
|
"grad_norm": 0.0024222638458013535, |
|
"learning_rate": 7.149758454106281e-06, |
|
"loss": 0.0297, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 52.005797101449275, |
|
"grad_norm": 0.0016652902122586966, |
|
"learning_rate": 7.139023081052067e-06, |
|
"loss": 0.0707, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 52.00676328502416, |
|
"grad_norm": 0.00755368173122406, |
|
"learning_rate": 7.128287707997853e-06, |
|
"loss": 0.0001, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 52.00676328502416, |
|
"eval_accuracy": 0.6753246753246753, |
|
"eval_loss": 2.2529079914093018, |
|
"eval_runtime": 14.6501, |
|
"eval_samples_per_second": 5.256, |
|
"eval_steps_per_second": 1.365, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 53.00096618357488, |
|
"grad_norm": 0.0013488616095855832, |
|
"learning_rate": 7.117552334943641e-06, |
|
"loss": 0.0001, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 53.001932367149756, |
|
"grad_norm": 0.0027625716757029295, |
|
"learning_rate": 7.106816961889426e-06, |
|
"loss": 0.0001, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 53.00289855072464, |
|
"grad_norm": 0.0010352464159950614, |
|
"learning_rate": 7.096081588835213e-06, |
|
"loss": 0.0, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 53.00386473429952, |
|
"grad_norm": 0.002267819829285145, |
|
"learning_rate": 7.085346215780999e-06, |
|
"loss": 0.2287, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 53.00483091787439, |
|
"grad_norm": 0.0032848615664988756, |
|
"learning_rate": 7.074610842726786e-06, |
|
"loss": 0.0001, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 53.005797101449275, |
|
"grad_norm": 0.0024115026462823153, |
|
"learning_rate": 7.063875469672572e-06, |
|
"loss": 0.0001, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 53.00676328502416, |
|
"grad_norm": 0.0190240778028965, |
|
"learning_rate": 7.053140096618359e-06, |
|
"loss": 0.1081, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 53.00676328502416, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4744716882705688, |
|
"eval_runtime": 14.7059, |
|
"eval_samples_per_second": 5.236, |
|
"eval_steps_per_second": 1.36, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 54.00096618357488, |
|
"grad_norm": 0.011739949695765972, |
|
"learning_rate": 7.042404723564144e-06, |
|
"loss": 0.0001, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 54.001932367149756, |
|
"grad_norm": 0.001719196210615337, |
|
"learning_rate": 7.031669350509931e-06, |
|
"loss": 0.0004, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 54.00289855072464, |
|
"grad_norm": 379.4192810058594, |
|
"learning_rate": 7.020933977455717e-06, |
|
"loss": 0.1521, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 54.00386473429952, |
|
"grad_norm": 0.003941697999835014, |
|
"learning_rate": 7.010198604401504e-06, |
|
"loss": 0.0015, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 54.00483091787439, |
|
"grad_norm": 0.002875625155866146, |
|
"learning_rate": 6.99946323134729e-06, |
|
"loss": 0.0561, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 54.005797101449275, |
|
"grad_norm": 0.0037783801089972258, |
|
"learning_rate": 6.9887278582930765e-06, |
|
"loss": 0.0001, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 54.00676328502416, |
|
"grad_norm": 0.0007094612810760736, |
|
"learning_rate": 6.977992485238862e-06, |
|
"loss": 0.0002, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 54.00676328502416, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 1.581336498260498, |
|
"eval_runtime": 14.5929, |
|
"eval_samples_per_second": 5.277, |
|
"eval_steps_per_second": 1.371, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 55.00096618357488, |
|
"grad_norm": 0.0009815659141167998, |
|
"learning_rate": 6.9672571121846486e-06, |
|
"loss": 0.0, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 55.001932367149756, |
|
"grad_norm": 0.0013574979966506362, |
|
"learning_rate": 6.956521739130435e-06, |
|
"loss": 0.0001, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 55.00289855072464, |
|
"grad_norm": 335.1041259765625, |
|
"learning_rate": 6.9457863660762215e-06, |
|
"loss": 0.1636, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 55.00386473429952, |
|
"grad_norm": 0.010944723151624203, |
|
"learning_rate": 6.935050993022008e-06, |
|
"loss": 0.0001, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 55.00483091787439, |
|
"grad_norm": 0.0034687011502683163, |
|
"learning_rate": 6.924315619967794e-06, |
|
"loss": 0.092, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 55.005797101449275, |
|
"grad_norm": 0.0015824460424482822, |
|
"learning_rate": 6.913580246913581e-06, |
|
"loss": 0.0001, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 55.00676328502416, |
|
"grad_norm": 0.001366764772683382, |
|
"learning_rate": 6.9028448738593664e-06, |
|
"loss": 0.0119, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 55.00676328502416, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.6006883382797241, |
|
"eval_runtime": 14.4536, |
|
"eval_samples_per_second": 5.327, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 56.00096618357488, |
|
"grad_norm": 0.0016056658932939172, |
|
"learning_rate": 6.892109500805153e-06, |
|
"loss": 0.0647, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 56.001932367149756, |
|
"grad_norm": 0.0010164413833990693, |
|
"learning_rate": 6.881374127750939e-06, |
|
"loss": 0.0018, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 56.00289855072464, |
|
"grad_norm": 0.0008678404265083373, |
|
"learning_rate": 6.870638754696727e-06, |
|
"loss": 0.0, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 56.00386473429952, |
|
"grad_norm": 0.0008416047203354537, |
|
"learning_rate": 6.859903381642513e-06, |
|
"loss": 0.1129, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 56.00483091787439, |
|
"grad_norm": 0.010661150328814983, |
|
"learning_rate": 6.8491680085882995e-06, |
|
"loss": 0.0004, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 56.005797101449275, |
|
"grad_norm": 0.0018914261600002646, |
|
"learning_rate": 6.838432635534086e-06, |
|
"loss": 0.0442, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 56.00676328502416, |
|
"grad_norm": 0.007342246826738119, |
|
"learning_rate": 6.8276972624798724e-06, |
|
"loss": 0.1478, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 56.00676328502416, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 2.3310229778289795, |
|
"eval_runtime": 14.3306, |
|
"eval_samples_per_second": 5.373, |
|
"eval_steps_per_second": 1.396, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 57.00096618357488, |
|
"grad_norm": 0.003021875163540244, |
|
"learning_rate": 6.816961889425658e-06, |
|
"loss": 0.0082, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 57.001932367149756, |
|
"grad_norm": 0.0016037713503465056, |
|
"learning_rate": 6.8062265163714445e-06, |
|
"loss": 0.0011, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 57.00289855072464, |
|
"grad_norm": 0.0027177485171705484, |
|
"learning_rate": 6.795491143317231e-06, |
|
"loss": 0.2467, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 57.00386473429952, |
|
"grad_norm": 0.0009143136558122933, |
|
"learning_rate": 6.784755770263017e-06, |
|
"loss": 0.0537, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 57.00483091787439, |
|
"grad_norm": 0.07625051587820053, |
|
"learning_rate": 6.774020397208804e-06, |
|
"loss": 0.2509, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 57.005797101449275, |
|
"grad_norm": 0.0012727385619655252, |
|
"learning_rate": 6.76328502415459e-06, |
|
"loss": 0.001, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 57.00676328502416, |
|
"grad_norm": 0.009039295837283134, |
|
"learning_rate": 6.752549651100376e-06, |
|
"loss": 0.0001, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 57.00676328502416, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.4787706136703491, |
|
"eval_runtime": 14.4009, |
|
"eval_samples_per_second": 5.347, |
|
"eval_steps_per_second": 1.389, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 58.00096618357488, |
|
"grad_norm": 0.001038399524986744, |
|
"learning_rate": 6.741814278046162e-06, |
|
"loss": 0.0001, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 58.001932367149756, |
|
"grad_norm": 0.0029470643494278193, |
|
"learning_rate": 6.731078904991949e-06, |
|
"loss": 0.1885, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 58.00289855072464, |
|
"grad_norm": 0.19380953907966614, |
|
"learning_rate": 6.720343531937735e-06, |
|
"loss": 0.0001, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 58.00386473429952, |
|
"grad_norm": 0.0030477980617433786, |
|
"learning_rate": 6.709608158883522e-06, |
|
"loss": 0.0002, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 58.00483091787439, |
|
"grad_norm": 0.0017886882415041327, |
|
"learning_rate": 6.698872785829308e-06, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 58.005797101449275, |
|
"grad_norm": 0.004812142346054316, |
|
"learning_rate": 6.688137412775095e-06, |
|
"loss": 0.0001, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 58.00676328502416, |
|
"grad_norm": 0.0009715890046209097, |
|
"learning_rate": 6.67740203972088e-06, |
|
"loss": 0.0001, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 58.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.185050129890442, |
|
"eval_runtime": 14.7015, |
|
"eval_samples_per_second": 5.238, |
|
"eval_steps_per_second": 1.36, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 59.00096618357488, |
|
"grad_norm": 0.0017103978898376226, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 59.001932367149756, |
|
"grad_norm": 0.002025015652179718, |
|
"learning_rate": 6.655931293612453e-06, |
|
"loss": 0.0, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 59.00289855072464, |
|
"grad_norm": 0.01431421097368002, |
|
"learning_rate": 6.6451959205582395e-06, |
|
"loss": 0.0001, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 59.00386473429952, |
|
"grad_norm": 0.0034724946599453688, |
|
"learning_rate": 6.634460547504026e-06, |
|
"loss": 0.0001, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 59.00483091787439, |
|
"grad_norm": 0.0009903705213218927, |
|
"learning_rate": 6.623725174449813e-06, |
|
"loss": 0.0, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 59.005797101449275, |
|
"grad_norm": 0.001704953727312386, |
|
"learning_rate": 6.6129898013956e-06, |
|
"loss": 0.0, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 59.00676328502416, |
|
"grad_norm": 0.001701401430182159, |
|
"learning_rate": 6.602254428341386e-06, |
|
"loss": 0.0001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 59.00676328502416, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.1919652223587036, |
|
"eval_runtime": 15.4843, |
|
"eval_samples_per_second": 4.973, |
|
"eval_steps_per_second": 1.292, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 60.00096618357488, |
|
"grad_norm": 0.0020329791586846113, |
|
"learning_rate": 6.591519055287172e-06, |
|
"loss": 0.0, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 60.001932367149756, |
|
"grad_norm": 0.002356892451643944, |
|
"learning_rate": 6.580783682232958e-06, |
|
"loss": 0.0, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 60.00289855072464, |
|
"grad_norm": 0.0017729549435898662, |
|
"learning_rate": 6.570048309178745e-06, |
|
"loss": 0.0, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 60.00386473429952, |
|
"grad_norm": 0.006136258598417044, |
|
"learning_rate": 6.559312936124531e-06, |
|
"loss": 0.0, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 60.00483091787439, |
|
"grad_norm": 0.0009840623242780566, |
|
"learning_rate": 6.548577563070318e-06, |
|
"loss": 0.0, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 60.005797101449275, |
|
"grad_norm": 0.014522840268909931, |
|
"learning_rate": 6.537842190016104e-06, |
|
"loss": 0.0, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 60.00676328502416, |
|
"grad_norm": 0.0010055291932076216, |
|
"learning_rate": 6.52710681696189e-06, |
|
"loss": 0.0904, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 60.00676328502416, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.1857966184616089, |
|
"eval_runtime": 15.6797, |
|
"eval_samples_per_second": 4.911, |
|
"eval_steps_per_second": 1.276, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 61.00096618357488, |
|
"grad_norm": 0.0076795658096671104, |
|
"learning_rate": 6.516371443907676e-06, |
|
"loss": 0.0931, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 61.001932367149756, |
|
"grad_norm": 0.0016425478970631957, |
|
"learning_rate": 6.5056360708534626e-06, |
|
"loss": 0.0, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 61.00289855072464, |
|
"grad_norm": 0.0013257049722597003, |
|
"learning_rate": 6.494900697799249e-06, |
|
"loss": 0.0002, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 61.00386473429952, |
|
"grad_norm": 0.0032681403681635857, |
|
"learning_rate": 6.4841653247450355e-06, |
|
"loss": 0.0004, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 61.00483091787439, |
|
"grad_norm": 355.2313232421875, |
|
"learning_rate": 6.473429951690822e-06, |
|
"loss": 0.1449, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 61.005797101449275, |
|
"grad_norm": 0.0017356682801619172, |
|
"learning_rate": 6.462694578636608e-06, |
|
"loss": 0.0002, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 61.00676328502416, |
|
"grad_norm": 0.007396780885756016, |
|
"learning_rate": 6.451959205582394e-06, |
|
"loss": 0.0001, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 61.00676328502416, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.4534144401550293, |
|
"eval_runtime": 15.1151, |
|
"eval_samples_per_second": 5.094, |
|
"eval_steps_per_second": 1.323, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 62.00096618357488, |
|
"grad_norm": 0.0020279129967093468, |
|
"learning_rate": 6.44122383252818e-06, |
|
"loss": 0.0001, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 62.001932367149756, |
|
"grad_norm": 0.0022937143221497536, |
|
"learning_rate": 6.430488459473967e-06, |
|
"loss": 0.0, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 62.00289855072464, |
|
"grad_norm": 0.0018241552170366049, |
|
"learning_rate": 6.419753086419753e-06, |
|
"loss": 0.0001, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 62.00386473429952, |
|
"grad_norm": 0.0027427878230810165, |
|
"learning_rate": 6.40901771336554e-06, |
|
"loss": 0.0008, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 62.00483091787439, |
|
"grad_norm": 159.2539520263672, |
|
"learning_rate": 6.398282340311326e-06, |
|
"loss": 0.2086, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 62.005797101449275, |
|
"grad_norm": 0.0029428687412291765, |
|
"learning_rate": 6.387546967257112e-06, |
|
"loss": 0.0693, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 62.00676328502416, |
|
"grad_norm": 0.0011164512252435088, |
|
"learning_rate": 6.376811594202898e-06, |
|
"loss": 0.0017, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 62.00676328502416, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.6715654134750366, |
|
"eval_runtime": 14.8229, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 1.349, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 63.00096618357488, |
|
"grad_norm": 0.36577680706977844, |
|
"learning_rate": 6.3660762211486856e-06, |
|
"loss": 0.0004, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 63.001932367149756, |
|
"grad_norm": 0.0026161600835621357, |
|
"learning_rate": 6.355340848094472e-06, |
|
"loss": 0.4853, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 63.00289855072464, |
|
"grad_norm": 0.0019047785317525268, |
|
"learning_rate": 6.3446054750402585e-06, |
|
"loss": 0.04, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 63.00386473429952, |
|
"grad_norm": 0.00939449667930603, |
|
"learning_rate": 6.333870101986045e-06, |
|
"loss": 0.0151, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 63.00483091787439, |
|
"grad_norm": 0.0031830815132707357, |
|
"learning_rate": 6.323134728931831e-06, |
|
"loss": 0.0005, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 63.005797101449275, |
|
"grad_norm": 0.0016728241462260485, |
|
"learning_rate": 6.312399355877618e-06, |
|
"loss": 0.0151, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 63.00676328502416, |
|
"grad_norm": 0.03096197545528412, |
|
"learning_rate": 6.301663982823404e-06, |
|
"loss": 0.0001, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 63.00676328502416, |
|
"eval_accuracy": 0.6883116883116883, |
|
"eval_loss": 2.201735019683838, |
|
"eval_runtime": 15.4263, |
|
"eval_samples_per_second": 4.991, |
|
"eval_steps_per_second": 1.296, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 64.00096618357487, |
|
"grad_norm": 0.005010953638702631, |
|
"learning_rate": 6.29092860976919e-06, |
|
"loss": 0.192, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 64.00193236714976, |
|
"grad_norm": 0.0011758300242945552, |
|
"learning_rate": 6.280193236714976e-06, |
|
"loss": 0.0159, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 64.00289855072464, |
|
"grad_norm": 0.010257155634462833, |
|
"learning_rate": 6.269457863660763e-06, |
|
"loss": 0.2044, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 64.00386473429951, |
|
"grad_norm": 0.0021658441983163357, |
|
"learning_rate": 6.258722490606549e-06, |
|
"loss": 0.0001, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 64.0048309178744, |
|
"grad_norm": 0.03462962433695793, |
|
"learning_rate": 6.247987117552336e-06, |
|
"loss": 0.0682, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 64.00579710144928, |
|
"grad_norm": 0.020629707723855972, |
|
"learning_rate": 6.237251744498122e-06, |
|
"loss": 0.3837, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 64.00676328502415, |
|
"grad_norm": 0.1369558870792389, |
|
"learning_rate": 6.226516371443908e-06, |
|
"loss": 0.3407, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 64.00676328502415, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.2423864603042603, |
|
"eval_runtime": 15.4244, |
|
"eval_samples_per_second": 4.992, |
|
"eval_steps_per_second": 1.297, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 65.00096618357487, |
|
"grad_norm": 90.10851287841797, |
|
"learning_rate": 6.215780998389694e-06, |
|
"loss": 0.016, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 65.00193236714976, |
|
"grad_norm": 0.003571063280105591, |
|
"learning_rate": 6.205045625335481e-06, |
|
"loss": 0.0371, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 65.00289855072464, |
|
"grad_norm": 0.00869289506226778, |
|
"learning_rate": 6.194310252281267e-06, |
|
"loss": 0.0001, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 65.00386473429951, |
|
"grad_norm": 0.042137209326028824, |
|
"learning_rate": 6.1835748792270535e-06, |
|
"loss": 0.0098, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 65.0048309178744, |
|
"grad_norm": 1.3429404497146606, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 0.0002, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 65.00579710144928, |
|
"grad_norm": 0.009308219887316227, |
|
"learning_rate": 6.162104133118626e-06, |
|
"loss": 0.2129, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 65.00676328502415, |
|
"grad_norm": 0.010884756222367287, |
|
"learning_rate": 6.151368760064412e-06, |
|
"loss": 0.0001, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 65.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.5786070823669434, |
|
"eval_runtime": 14.9171, |
|
"eval_samples_per_second": 5.162, |
|
"eval_steps_per_second": 1.341, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 66.00096618357487, |
|
"grad_norm": 0.001252433517947793, |
|
"learning_rate": 6.1406333870101985e-06, |
|
"loss": 0.0031, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 66.00193236714976, |
|
"grad_norm": 0.0008501612464897335, |
|
"learning_rate": 6.129898013955985e-06, |
|
"loss": 0.0051, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 66.00289855072464, |
|
"grad_norm": 0.0021611701231449842, |
|
"learning_rate": 6.119162640901772e-06, |
|
"loss": 0.0001, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 66.00386473429951, |
|
"grad_norm": 0.001628038240596652, |
|
"learning_rate": 6.108427267847559e-06, |
|
"loss": 0.0226, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 66.0048309178744, |
|
"grad_norm": 0.003019214142113924, |
|
"learning_rate": 6.097691894793345e-06, |
|
"loss": 0.0001, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 66.00579710144928, |
|
"grad_norm": 0.006033017765730619, |
|
"learning_rate": 6.086956521739132e-06, |
|
"loss": 0.0001, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 66.00676328502415, |
|
"grad_norm": 0.0029068191070109606, |
|
"learning_rate": 6.076221148684918e-06, |
|
"loss": 0.0002, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 66.00676328502415, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.3379225730895996, |
|
"eval_runtime": 15.4001, |
|
"eval_samples_per_second": 5.0, |
|
"eval_steps_per_second": 1.299, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 67.00096618357487, |
|
"grad_norm": 0.0017993523506447673, |
|
"learning_rate": 6.065485775630704e-06, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 67.00193236714976, |
|
"grad_norm": 0.020115720108151436, |
|
"learning_rate": 6.05475040257649e-06, |
|
"loss": 0.0001, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 67.00289855072464, |
|
"grad_norm": 1.1940608024597168, |
|
"learning_rate": 6.0440150295222766e-06, |
|
"loss": 0.0001, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 67.00386473429951, |
|
"grad_norm": 0.0011647121282294393, |
|
"learning_rate": 6.033279656468063e-06, |
|
"loss": 0.0, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 67.0048309178744, |
|
"grad_norm": 0.004334421828389168, |
|
"learning_rate": 6.0225442834138495e-06, |
|
"loss": 0.0, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 67.00579710144928, |
|
"grad_norm": 0.0025098167825490236, |
|
"learning_rate": 6.011808910359636e-06, |
|
"loss": 0.0013, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 67.00676328502415, |
|
"grad_norm": 0.0017039639642462134, |
|
"learning_rate": 6.0010735373054215e-06, |
|
"loss": 0.0005, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 67.00676328502415, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 1.1517488956451416, |
|
"eval_runtime": 14.5324, |
|
"eval_samples_per_second": 5.299, |
|
"eval_steps_per_second": 1.376, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 68.00096618357487, |
|
"grad_norm": 0.0010669537587091327, |
|
"learning_rate": 5.990338164251208e-06, |
|
"loss": 0.0001, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 68.00193236714976, |
|
"grad_norm": 0.002076763892546296, |
|
"learning_rate": 5.979602791196994e-06, |
|
"loss": 0.0, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 68.00289855072464, |
|
"grad_norm": 0.0011606409680098295, |
|
"learning_rate": 5.968867418142781e-06, |
|
"loss": 0.0, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 68.00386473429951, |
|
"grad_norm": 0.0014143381267786026, |
|
"learning_rate": 5.958132045088567e-06, |
|
"loss": 0.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 68.0048309178744, |
|
"grad_norm": 0.0015609278343617916, |
|
"learning_rate": 5.947396672034354e-06, |
|
"loss": 0.0001, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 68.00579710144928, |
|
"grad_norm": 0.002568858675658703, |
|
"learning_rate": 5.936661298980139e-06, |
|
"loss": 0.0, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 68.00676328502415, |
|
"grad_norm": 0.0010036162566393614, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.0, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 68.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.529435396194458, |
|
"eval_runtime": 14.6615, |
|
"eval_samples_per_second": 5.252, |
|
"eval_steps_per_second": 1.364, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 69.00096618357487, |
|
"grad_norm": 0.0011304821819067001, |
|
"learning_rate": 5.915190552871712e-06, |
|
"loss": 0.0, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 69.00193236714976, |
|
"grad_norm": 0.001226439024321735, |
|
"learning_rate": 5.904455179817499e-06, |
|
"loss": 0.0, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 69.00289855072464, |
|
"grad_norm": 0.000802580441813916, |
|
"learning_rate": 5.893719806763285e-06, |
|
"loss": 0.0, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 69.00386473429951, |
|
"grad_norm": 0.0015111935790628195, |
|
"learning_rate": 5.882984433709072e-06, |
|
"loss": 0.0701, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 69.0048309178744, |
|
"grad_norm": 0.0005540599231608212, |
|
"learning_rate": 5.872249060654859e-06, |
|
"loss": 0.0, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 69.00579710144928, |
|
"grad_norm": 0.000656483112834394, |
|
"learning_rate": 5.861513687600645e-06, |
|
"loss": 0.2159, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 69.00676328502415, |
|
"grad_norm": 0.0013909523840993643, |
|
"learning_rate": 5.850778314546432e-06, |
|
"loss": 0.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 69.00676328502415, |
|
"eval_accuracy": 0.6883116883116883, |
|
"eval_loss": 2.438077449798584, |
|
"eval_runtime": 14.6754, |
|
"eval_samples_per_second": 5.247, |
|
"eval_steps_per_second": 1.363, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 70.00096618357487, |
|
"grad_norm": 0.0017317779129371047, |
|
"learning_rate": 5.840042941492217e-06, |
|
"loss": 0.094, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 70.00193236714976, |
|
"grad_norm": 0.0013916000025346875, |
|
"learning_rate": 5.829307568438004e-06, |
|
"loss": 0.0001, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 70.00289855072464, |
|
"grad_norm": 0.027323855087161064, |
|
"learning_rate": 5.81857219538379e-06, |
|
"loss": 0.0001, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 70.00386473429951, |
|
"grad_norm": 0.0019869667012244463, |
|
"learning_rate": 5.807836822329577e-06, |
|
"loss": 0.0001, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 70.0048309178744, |
|
"grad_norm": 0.0005596183473244309, |
|
"learning_rate": 5.797101449275363e-06, |
|
"loss": 0.0002, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 70.00579710144928, |
|
"grad_norm": 0.0012641664361581206, |
|
"learning_rate": 5.78636607622115e-06, |
|
"loss": 0.0, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 70.00676328502415, |
|
"grad_norm": 0.0033598807640373707, |
|
"learning_rate": 5.775630703166935e-06, |
|
"loss": 0.0032, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 70.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.7951840162277222, |
|
"eval_runtime": 17.871, |
|
"eval_samples_per_second": 4.309, |
|
"eval_steps_per_second": 1.119, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 71.00096618357487, |
|
"grad_norm": 0.0025404111947864294, |
|
"learning_rate": 5.764895330112722e-06, |
|
"loss": 0.0, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 71.00193236714976, |
|
"grad_norm": 0.0016201400430873036, |
|
"learning_rate": 5.754159957058508e-06, |
|
"loss": 0.0, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 71.00289855072464, |
|
"grad_norm": 0.000684636237565428, |
|
"learning_rate": 5.743424584004295e-06, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 71.00386473429951, |
|
"grad_norm": 0.0006000622524879873, |
|
"learning_rate": 5.732689210950081e-06, |
|
"loss": 0.0, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 71.0048309178744, |
|
"grad_norm": 0.000872518983669579, |
|
"learning_rate": 5.7219538378958675e-06, |
|
"loss": 0.0, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 71.00579710144928, |
|
"grad_norm": 102.48182678222656, |
|
"learning_rate": 5.711218464841653e-06, |
|
"loss": 0.0035, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 71.00676328502415, |
|
"grad_norm": 0.0009674491011537611, |
|
"learning_rate": 5.70048309178744e-06, |
|
"loss": 0.0, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 71.00676328502415, |
|
"eval_accuracy": 0.6753246753246753, |
|
"eval_loss": 3.0253195762634277, |
|
"eval_runtime": 30.56, |
|
"eval_samples_per_second": 2.52, |
|
"eval_steps_per_second": 0.654, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 72.00096618357487, |
|
"grad_norm": 641.0316162109375, |
|
"learning_rate": 5.689747718733226e-06, |
|
"loss": 0.14, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 72.00193236714976, |
|
"grad_norm": 0.0011702714255079627, |
|
"learning_rate": 5.6790123456790125e-06, |
|
"loss": 0.0, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 72.00289855072464, |
|
"grad_norm": 0.0009515401907265186, |
|
"learning_rate": 5.668276972624799e-06, |
|
"loss": 0.0, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 72.00386473429951, |
|
"grad_norm": 0.0014254461275413632, |
|
"learning_rate": 5.657541599570585e-06, |
|
"loss": 0.2006, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 72.0048309178744, |
|
"grad_norm": 0.0018207349348813295, |
|
"learning_rate": 5.646806226516372e-06, |
|
"loss": 0.0, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 72.00579710144928, |
|
"grad_norm": 0.01077065709978342, |
|
"learning_rate": 5.6360708534621574e-06, |
|
"loss": 0.0001, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 72.00676328502415, |
|
"grad_norm": 0.0008518044487573206, |
|
"learning_rate": 5.625335480407944e-06, |
|
"loss": 0.214, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 72.00676328502415, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 1.932701826095581, |
|
"eval_runtime": 14.6424, |
|
"eval_samples_per_second": 5.259, |
|
"eval_steps_per_second": 1.366, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 73.00096618357487, |
|
"grad_norm": 0.002738331910222769, |
|
"learning_rate": 5.614600107353731e-06, |
|
"loss": 0.0, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 73.00193236714976, |
|
"grad_norm": 0.0025839614681899548, |
|
"learning_rate": 5.603864734299518e-06, |
|
"loss": 0.0, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 73.00289855072464, |
|
"grad_norm": 0.0006668622372671962, |
|
"learning_rate": 5.593129361245304e-06, |
|
"loss": 0.0745, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 73.00386473429951, |
|
"grad_norm": 0.0026406822726130486, |
|
"learning_rate": 5.5823939881910905e-06, |
|
"loss": 0.0001, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 73.0048309178744, |
|
"grad_norm": 0.01901336945593357, |
|
"learning_rate": 5.571658615136877e-06, |
|
"loss": 0.0001, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 73.00579710144928, |
|
"grad_norm": 0.04490555077791214, |
|
"learning_rate": 5.5609232420826634e-06, |
|
"loss": 0.0001, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 73.00676328502415, |
|
"grad_norm": 0.0009415218373760581, |
|
"learning_rate": 5.550187869028449e-06, |
|
"loss": 0.0, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 73.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 2.0236287117004395, |
|
"eval_runtime": 15.6052, |
|
"eval_samples_per_second": 4.934, |
|
"eval_steps_per_second": 1.282, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 74.00096618357487, |
|
"grad_norm": 0.0010939829517155886, |
|
"learning_rate": 5.5394524959742355e-06, |
|
"loss": 0.0, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 74.00193236714976, |
|
"grad_norm": 0.000889449380338192, |
|
"learning_rate": 5.528717122920022e-06, |
|
"loss": 0.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 74.00289855072464, |
|
"grad_norm": 0.0009812179487198591, |
|
"learning_rate": 5.517981749865808e-06, |
|
"loss": 0.0, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 74.00386473429951, |
|
"grad_norm": 0.000881813233718276, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.0, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 74.0048309178744, |
|
"grad_norm": 0.0004946072003804147, |
|
"learning_rate": 5.496511003757381e-06, |
|
"loss": 0.0, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 74.00579710144928, |
|
"grad_norm": 0.000799790199380368, |
|
"learning_rate": 5.485775630703167e-06, |
|
"loss": 0.0, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 74.00676328502415, |
|
"grad_norm": 0.002813389990478754, |
|
"learning_rate": 5.475040257648953e-06, |
|
"loss": 0.0, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 74.00676328502415, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.907597303390503, |
|
"eval_runtime": 15.6164, |
|
"eval_samples_per_second": 4.931, |
|
"eval_steps_per_second": 1.281, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 75.00096618357487, |
|
"grad_norm": 0.0011628314387053251, |
|
"learning_rate": 5.46430488459474e-06, |
|
"loss": 0.0, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 75.00193236714976, |
|
"grad_norm": 0.037489522248506546, |
|
"learning_rate": 5.453569511540526e-06, |
|
"loss": 0.0, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 75.00289855072464, |
|
"grad_norm": 0.0011903907870873809, |
|
"learning_rate": 5.442834138486313e-06, |
|
"loss": 0.0, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 75.00386473429951, |
|
"grad_norm": 0.0006152648711577058, |
|
"learning_rate": 5.432098765432099e-06, |
|
"loss": 0.0, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 75.0048309178744, |
|
"grad_norm": 0.00442465441301465, |
|
"learning_rate": 5.421363392377886e-06, |
|
"loss": 0.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 75.00579710144928, |
|
"grad_norm": 0.0028682094998657703, |
|
"learning_rate": 5.410628019323671e-06, |
|
"loss": 0.0003, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 75.00676328502415, |
|
"grad_norm": 0.0016861088806763291, |
|
"learning_rate": 5.399892646269458e-06, |
|
"loss": 0.0, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 75.00676328502415, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.707045316696167, |
|
"eval_runtime": 14.9212, |
|
"eval_samples_per_second": 5.16, |
|
"eval_steps_per_second": 1.34, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 76.00096618357487, |
|
"grad_norm": 0.0007070906576700509, |
|
"learning_rate": 5.389157273215244e-06, |
|
"loss": 0.0002, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 76.00193236714976, |
|
"grad_norm": 0.0004906499525532126, |
|
"learning_rate": 5.3784219001610306e-06, |
|
"loss": 0.0, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 76.00289855072464, |
|
"grad_norm": 0.0011014473857358098, |
|
"learning_rate": 5.367686527106818e-06, |
|
"loss": 0.0, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 76.00386473429951, |
|
"grad_norm": 0.0008452738984487951, |
|
"learning_rate": 5.356951154052604e-06, |
|
"loss": 0.0002, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 76.0048309178744, |
|
"grad_norm": 0.0014290370745584369, |
|
"learning_rate": 5.346215780998391e-06, |
|
"loss": 0.0, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 76.00579710144928, |
|
"grad_norm": 0.00044006307143718004, |
|
"learning_rate": 5.335480407944177e-06, |
|
"loss": 0.0, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 76.00676328502415, |
|
"grad_norm": 0.0015216703759506345, |
|
"learning_rate": 5.324745034889963e-06, |
|
"loss": 0.0003, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 76.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.8620779514312744, |
|
"eval_runtime": 14.639, |
|
"eval_samples_per_second": 5.26, |
|
"eval_steps_per_second": 1.366, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 77.00096618357487, |
|
"grad_norm": 0.006501473020762205, |
|
"learning_rate": 5.314009661835749e-06, |
|
"loss": 0.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 77.00193236714976, |
|
"grad_norm": 0.006775288842618465, |
|
"learning_rate": 5.303274288781536e-06, |
|
"loss": 0.0, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 77.00289855072464, |
|
"grad_norm": 0.002048175549134612, |
|
"learning_rate": 5.292538915727322e-06, |
|
"loss": 0.0, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 77.00386473429951, |
|
"grad_norm": 0.0005493107601068914, |
|
"learning_rate": 5.281803542673109e-06, |
|
"loss": 0.0, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 77.0048309178744, |
|
"grad_norm": 0.004135518334805965, |
|
"learning_rate": 5.271068169618895e-06, |
|
"loss": 0.0, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 77.00579710144928, |
|
"grad_norm": 0.0032074879854917526, |
|
"learning_rate": 5.2603327965646815e-06, |
|
"loss": 0.0, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 77.00676328502415, |
|
"grad_norm": 0.0005827154964208603, |
|
"learning_rate": 5.249597423510467e-06, |
|
"loss": 0.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 77.00676328502415, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.8846988677978516, |
|
"eval_runtime": 14.2136, |
|
"eval_samples_per_second": 5.417, |
|
"eval_steps_per_second": 1.407, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 78.00096618357487, |
|
"grad_norm": 0.0004196877998765558, |
|
"learning_rate": 5.2388620504562536e-06, |
|
"loss": 0.0, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 78.00193236714976, |
|
"grad_norm": 0.00211125984787941, |
|
"learning_rate": 5.22812667740204e-06, |
|
"loss": 0.0, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 78.00289855072464, |
|
"grad_norm": 0.00045795412734150887, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 0.0, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 78.00386473429951, |
|
"grad_norm": 0.0019368949579074979, |
|
"learning_rate": 5.206655931293613e-06, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 78.0048309178744, |
|
"grad_norm": 0.0030042598955333233, |
|
"learning_rate": 5.195920558239399e-06, |
|
"loss": 0.0, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 78.00579710144928, |
|
"grad_norm": 0.017981361597776413, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.0, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 78.00676328502415, |
|
"grad_norm": 0.0014057859079912305, |
|
"learning_rate": 5.1744498121309714e-06, |
|
"loss": 0.0, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 78.00676328502415, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.8880295753479004, |
|
"eval_runtime": 14.1273, |
|
"eval_samples_per_second": 5.45, |
|
"eval_steps_per_second": 1.416, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 79.00096618357487, |
|
"grad_norm": 0.0005187008646316826, |
|
"learning_rate": 5.163714439076758e-06, |
|
"loss": 0.0, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 79.00193236714976, |
|
"grad_norm": 0.005755449179559946, |
|
"learning_rate": 5.152979066022544e-06, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 79.00289855072464, |
|
"grad_norm": 0.0008575405227020383, |
|
"learning_rate": 5.142243692968331e-06, |
|
"loss": 0.0, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 79.00386473429951, |
|
"grad_norm": 0.0010344963520765305, |
|
"learning_rate": 5.131508319914117e-06, |
|
"loss": 0.0, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 79.0048309178744, |
|
"grad_norm": 0.0012300886446610093, |
|
"learning_rate": 5.1207729468599045e-06, |
|
"loss": 0.0, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 79.00579710144928, |
|
"grad_norm": 0.0005992982187308371, |
|
"learning_rate": 5.110037573805691e-06, |
|
"loss": 0.0, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 79.00676328502415, |
|
"grad_norm": 0.0005942785064689815, |
|
"learning_rate": 5.099302200751477e-06, |
|
"loss": 0.0001, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 79.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.8182388544082642, |
|
"eval_runtime": 14.6812, |
|
"eval_samples_per_second": 5.245, |
|
"eval_steps_per_second": 1.362, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 80.00096618357487, |
|
"grad_norm": 0.016651395708322525, |
|
"learning_rate": 5.088566827697263e-06, |
|
"loss": 0.0, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 80.00193236714976, |
|
"grad_norm": 0.0006846770993433893, |
|
"learning_rate": 5.0778314546430495e-06, |
|
"loss": 0.0, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 80.00289855072464, |
|
"grad_norm": 0.0005728365504182875, |
|
"learning_rate": 5.067096081588836e-06, |
|
"loss": 0.0, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 80.00386473429951, |
|
"grad_norm": 0.0016862893244251609, |
|
"learning_rate": 5.056360708534622e-06, |
|
"loss": 0.002, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 80.0048309178744, |
|
"grad_norm": 0.0006224879762157798, |
|
"learning_rate": 5.045625335480409e-06, |
|
"loss": 0.0, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 80.00579710144928, |
|
"grad_norm": 0.0005681588081642985, |
|
"learning_rate": 5.034889962426195e-06, |
|
"loss": 0.0118, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 80.00676328502415, |
|
"grad_norm": 0.00028208878939040005, |
|
"learning_rate": 5.024154589371981e-06, |
|
"loss": 0.0, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 80.00676328502415, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7965093851089478, |
|
"eval_runtime": 14.7927, |
|
"eval_samples_per_second": 5.205, |
|
"eval_steps_per_second": 1.352, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 81.00096618357487, |
|
"grad_norm": 0.014044140465557575, |
|
"learning_rate": 5.013419216317767e-06, |
|
"loss": 0.091, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 81.00193236714976, |
|
"grad_norm": 0.0007891967543400824, |
|
"learning_rate": 5.002683843263554e-06, |
|
"loss": 0.1254, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 81.00289855072464, |
|
"grad_norm": 0.0007473126170225441, |
|
"learning_rate": 4.99194847020934e-06, |
|
"loss": 0.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 81.00386473429951, |
|
"grad_norm": 0.001082300441339612, |
|
"learning_rate": 4.981213097155127e-06, |
|
"loss": 0.0, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 81.0048309178744, |
|
"grad_norm": 0.0004553549806587398, |
|
"learning_rate": 4.970477724100913e-06, |
|
"loss": 0.0, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 81.00579710144928, |
|
"grad_norm": 0.00046312747872434556, |
|
"learning_rate": 4.959742351046699e-06, |
|
"loss": 0.0001, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 81.00676328502415, |
|
"grad_norm": 0.0009012920781970024, |
|
"learning_rate": 4.949006977992485e-06, |
|
"loss": 0.0001, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 81.00676328502415, |
|
"eval_accuracy": 0.6753246753246753, |
|
"eval_loss": 3.0535547733306885, |
|
"eval_runtime": 14.6889, |
|
"eval_samples_per_second": 5.242, |
|
"eval_steps_per_second": 1.362, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 82.00096618357487, |
|
"grad_norm": 0.000618190853856504, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.0018, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 82.00193236714976, |
|
"grad_norm": 0.0016643240815028548, |
|
"learning_rate": 4.927536231884059e-06, |
|
"loss": 0.1313, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 82.00289855072464, |
|
"grad_norm": 0.10652245581150055, |
|
"learning_rate": 4.9168008588298446e-06, |
|
"loss": 0.6473, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 82.00386473429951, |
|
"grad_norm": 0.0007297329721041024, |
|
"learning_rate": 4.906065485775631e-06, |
|
"loss": 0.3155, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 82.0048309178744, |
|
"grad_norm": 0.0021761939860880375, |
|
"learning_rate": 4.8953301127214175e-06, |
|
"loss": 0.0016, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 82.00579710144928, |
|
"grad_norm": 0.0007605966529808939, |
|
"learning_rate": 4.884594739667204e-06, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 82.00676328502415, |
|
"grad_norm": 0.0007947117555886507, |
|
"learning_rate": 4.87385936661299e-06, |
|
"loss": 0.0005, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 82.00676328502415, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 1.5426697731018066, |
|
"eval_runtime": 14.4326, |
|
"eval_samples_per_second": 5.335, |
|
"eval_steps_per_second": 1.386, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 83.00096618357487, |
|
"grad_norm": 0.016618404537439346, |
|
"learning_rate": 4.863123993558777e-06, |
|
"loss": 0.0, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 83.00193236714976, |
|
"grad_norm": 0.0010025931987911463, |
|
"learning_rate": 4.852388620504562e-06, |
|
"loss": 0.0, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 83.00289855072464, |
|
"grad_norm": 0.003782440209761262, |
|
"learning_rate": 4.841653247450349e-06, |
|
"loss": 0.0871, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 83.00386473429951, |
|
"grad_norm": 0.03455811366438866, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.0, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 83.0048309178744, |
|
"grad_norm": 0.0011453116312623024, |
|
"learning_rate": 4.820182501341922e-06, |
|
"loss": 0.0, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 83.00579710144928, |
|
"grad_norm": 0.0074026999063789845, |
|
"learning_rate": 4.809447128287709e-06, |
|
"loss": 0.0, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 83.00676328502415, |
|
"grad_norm": 0.0005923007847741246, |
|
"learning_rate": 4.798711755233495e-06, |
|
"loss": 0.0006, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 83.00676328502415, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 1.8892239332199097, |
|
"eval_runtime": 14.285, |
|
"eval_samples_per_second": 5.39, |
|
"eval_steps_per_second": 1.4, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 84.00096618357487, |
|
"grad_norm": 0.0004263128503225744, |
|
"learning_rate": 4.787976382179281e-06, |
|
"loss": 0.0, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 84.00193236714976, |
|
"grad_norm": 0.0024923263117671013, |
|
"learning_rate": 4.7772410091250676e-06, |
|
"loss": 0.1811, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 84.00289855072464, |
|
"grad_norm": 0.0008309787954203784, |
|
"learning_rate": 4.766505636070854e-06, |
|
"loss": 0.0001, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 84.00386473429951, |
|
"grad_norm": 0.0008232109830714762, |
|
"learning_rate": 4.7557702630166405e-06, |
|
"loss": 0.0001, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 84.0048309178744, |
|
"grad_norm": 0.0004610401520039886, |
|
"learning_rate": 4.745034889962427e-06, |
|
"loss": 0.0, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 84.00579710144928, |
|
"grad_norm": 0.0005724320071749389, |
|
"learning_rate": 4.7342995169082125e-06, |
|
"loss": 0.2355, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 84.00676328502415, |
|
"grad_norm": 0.0007560947560705245, |
|
"learning_rate": 4.723564143853999e-06, |
|
"loss": 0.0001, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 84.00676328502415, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 1.9647512435913086, |
|
"eval_runtime": 14.5317, |
|
"eval_samples_per_second": 5.299, |
|
"eval_steps_per_second": 1.376, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 85.00096618357487, |
|
"grad_norm": 0.0017191518563777208, |
|
"learning_rate": 4.712828770799785e-06, |
|
"loss": 0.0, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 85.00193236714976, |
|
"grad_norm": 0.004426192957907915, |
|
"learning_rate": 4.702093397745572e-06, |
|
"loss": 0.0, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 85.00289855072464, |
|
"grad_norm": 0.0018137526931241155, |
|
"learning_rate": 4.691358024691358e-06, |
|
"loss": 0.1938, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 85.00386473429951, |
|
"grad_norm": 0.0018216772004961967, |
|
"learning_rate": 4.680622651637145e-06, |
|
"loss": 0.0137, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 85.0048309178744, |
|
"grad_norm": 0.0010804088087752461, |
|
"learning_rate": 4.669887278582931e-06, |
|
"loss": 0.2, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 85.00579710144928, |
|
"grad_norm": 0.0011872736504301429, |
|
"learning_rate": 4.659151905528718e-06, |
|
"loss": 0.0, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 85.00676328502415, |
|
"grad_norm": 0.0015459536807611585, |
|
"learning_rate": 4.648416532474504e-06, |
|
"loss": 0.0, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 85.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.7624553442001343, |
|
"eval_runtime": 14.3078, |
|
"eval_samples_per_second": 5.382, |
|
"eval_steps_per_second": 1.398, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 86.00096618357487, |
|
"grad_norm": 0.0007234421791508794, |
|
"learning_rate": 4.637681159420291e-06, |
|
"loss": 0.0, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 86.00193236714976, |
|
"grad_norm": 0.001056589768268168, |
|
"learning_rate": 4.626945786366077e-06, |
|
"loss": 0.0, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 86.00289855072464, |
|
"grad_norm": 0.0015586380613967776, |
|
"learning_rate": 4.616210413311863e-06, |
|
"loss": 0.0, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 86.00386473429951, |
|
"grad_norm": 0.0019250113982707262, |
|
"learning_rate": 4.605475040257649e-06, |
|
"loss": 0.0, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 86.0048309178744, |
|
"grad_norm": 0.4438691735267639, |
|
"learning_rate": 4.5947396672034355e-06, |
|
"loss": 0.0001, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 86.00579710144928, |
|
"grad_norm": 0.00045592489186674356, |
|
"learning_rate": 4.584004294149222e-06, |
|
"loss": 0.0, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 86.00676328502415, |
|
"grad_norm": 0.0016952749574556947, |
|
"learning_rate": 4.5732689210950084e-06, |
|
"loss": 0.1655, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 86.00676328502415, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.6750868558883667, |
|
"eval_runtime": 14.1595, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 1.412, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 87.00096618357487, |
|
"grad_norm": 0.00048763351514935493, |
|
"learning_rate": 4.562533548040795e-06, |
|
"loss": 0.0001, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 87.00193236714976, |
|
"grad_norm": 0.0004426055820658803, |
|
"learning_rate": 4.551798174986581e-06, |
|
"loss": 0.0, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 87.00289855072464, |
|
"grad_norm": 0.00048457886441610754, |
|
"learning_rate": 4.541062801932368e-06, |
|
"loss": 0.0, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 87.00386473429951, |
|
"grad_norm": 0.0007973539759404957, |
|
"learning_rate": 4.530327428878154e-06, |
|
"loss": 0.0, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 87.0048309178744, |
|
"grad_norm": 0.004327856469899416, |
|
"learning_rate": 4.519592055823941e-06, |
|
"loss": 0.0, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 87.00579710144928, |
|
"grad_norm": 0.0011828347342088819, |
|
"learning_rate": 4.508856682769726e-06, |
|
"loss": 0.0, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 87.00676328502415, |
|
"grad_norm": 0.0006070521776564419, |
|
"learning_rate": 4.498121309715513e-06, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 87.00676328502415, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 1.8558989763259888, |
|
"eval_runtime": 17.515, |
|
"eval_samples_per_second": 4.396, |
|
"eval_steps_per_second": 1.142, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 88.00096618357487, |
|
"grad_norm": 0.010230960324406624, |
|
"learning_rate": 4.487385936661299e-06, |
|
"loss": 0.0, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 88.00193236714976, |
|
"grad_norm": 0.0005492049385793507, |
|
"learning_rate": 4.476650563607086e-06, |
|
"loss": 0.0, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 88.00289855072464, |
|
"grad_norm": 0.0004779798910021782, |
|
"learning_rate": 4.465915190552872e-06, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 88.00386473429951, |
|
"grad_norm": 0.0003807265020441264, |
|
"learning_rate": 4.4551798174986585e-06, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 88.0048309178744, |
|
"grad_norm": 0.0005069471662864089, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 88.00579710144928, |
|
"grad_norm": 0.0007103086682036519, |
|
"learning_rate": 4.4337090713902314e-06, |
|
"loss": 0.0, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 88.00676328502415, |
|
"grad_norm": 0.0003308405866846442, |
|
"learning_rate": 4.422973698336018e-06, |
|
"loss": 0.0, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 88.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.8885866403579712, |
|
"eval_runtime": 14.812, |
|
"eval_samples_per_second": 5.198, |
|
"eval_steps_per_second": 1.35, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 89.00096618357487, |
|
"grad_norm": 0.001938937697559595, |
|
"learning_rate": 4.412238325281804e-06, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 89.00193236714976, |
|
"grad_norm": 0.0003955487336497754, |
|
"learning_rate": 4.401502952227591e-06, |
|
"loss": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 89.00289855072464, |
|
"grad_norm": 0.0007904915837571025, |
|
"learning_rate": 4.390767579173376e-06, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 89.00386473429951, |
|
"grad_norm": 0.0005482785636559129, |
|
"learning_rate": 4.380032206119163e-06, |
|
"loss": 0.0, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 89.0048309178744, |
|
"grad_norm": 0.0005851537571288645, |
|
"learning_rate": 4.369296833064949e-06, |
|
"loss": 0.0, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 89.00579710144928, |
|
"grad_norm": 0.0003069818776566535, |
|
"learning_rate": 4.358561460010736e-06, |
|
"loss": 0.0, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 89.00676328502415, |
|
"grad_norm": 0.019225388765335083, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 89.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.8957127332687378, |
|
"eval_runtime": 17.6162, |
|
"eval_samples_per_second": 4.371, |
|
"eval_steps_per_second": 1.135, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 90.00096618357487, |
|
"grad_norm": 0.0005754005396738648, |
|
"learning_rate": 4.337090713902309e-06, |
|
"loss": 0.0, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 90.00193236714976, |
|
"grad_norm": 0.0008540857234038413, |
|
"learning_rate": 4.326355340848094e-06, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 90.00289855072464, |
|
"grad_norm": 0.0004658667021431029, |
|
"learning_rate": 4.315619967793881e-06, |
|
"loss": 0.0, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 90.00386473429951, |
|
"grad_norm": 0.0007123078685253859, |
|
"learning_rate": 4.304884594739668e-06, |
|
"loss": 0.0, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 90.0048309178744, |
|
"grad_norm": 0.0006745746941305697, |
|
"learning_rate": 4.2941492216854545e-06, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 90.00579710144928, |
|
"grad_norm": 0.0003169019764754921, |
|
"learning_rate": 4.28341384863124e-06, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 90.00676328502415, |
|
"grad_norm": 0.0005867715808562934, |
|
"learning_rate": 4.2726784755770265e-06, |
|
"loss": 0.0, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 90.00676328502415, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.8181320428848267, |
|
"eval_runtime": 17.5597, |
|
"eval_samples_per_second": 4.385, |
|
"eval_steps_per_second": 1.139, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 91.00096618357487, |
|
"grad_norm": 0.0008447110303677619, |
|
"learning_rate": 4.261943102522813e-06, |
|
"loss": 0.0, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 91.00193236714976, |
|
"grad_norm": 0.00033777134376578033, |
|
"learning_rate": 4.251207729468599e-06, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 91.00289855072464, |
|
"grad_norm": 0.0013186403084546328, |
|
"learning_rate": 4.240472356414386e-06, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 91.00386473429951, |
|
"grad_norm": 0.0007186243892647326, |
|
"learning_rate": 4.229736983360172e-06, |
|
"loss": 0.0, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 91.0048309178744, |
|
"grad_norm": 0.0007361398893408477, |
|
"learning_rate": 4.219001610305958e-06, |
|
"loss": 0.0, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 91.00579710144928, |
|
"grad_norm": 0.0004776048008352518, |
|
"learning_rate": 4.208266237251744e-06, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 91.00676328502415, |
|
"grad_norm": 0.0003450379881542176, |
|
"learning_rate": 4.197530864197531e-06, |
|
"loss": 0.0, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 91.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.829852819442749, |
|
"eval_runtime": 17.0231, |
|
"eval_samples_per_second": 4.523, |
|
"eval_steps_per_second": 1.175, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 92.00096618357487, |
|
"grad_norm": 0.001014379202388227, |
|
"learning_rate": 4.186795491143318e-06, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 92.00193236714976, |
|
"grad_norm": 0.006536729633808136, |
|
"learning_rate": 4.1760601180891046e-06, |
|
"loss": 0.2397, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 92.00289855072464, |
|
"grad_norm": 0.0005509128677658737, |
|
"learning_rate": 4.16532474503489e-06, |
|
"loss": 0.0, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 92.00386473429951, |
|
"grad_norm": 0.0008106320165097713, |
|
"learning_rate": 4.154589371980677e-06, |
|
"loss": 0.0, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 92.0048309178744, |
|
"grad_norm": 0.0016772482777014375, |
|
"learning_rate": 4.143853998926463e-06, |
|
"loss": 0.0, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 92.00579710144928, |
|
"grad_norm": 0.0008628064533695579, |
|
"learning_rate": 4.1331186258722495e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 92.00676328502415, |
|
"grad_norm": 0.00043893515248782933, |
|
"learning_rate": 4.122383252818036e-06, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 92.00676328502415, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.518646478652954, |
|
"eval_runtime": 14.5281, |
|
"eval_samples_per_second": 5.3, |
|
"eval_steps_per_second": 1.377, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 93.00096618357487, |
|
"grad_norm": 0.00041715340921655297, |
|
"learning_rate": 4.1116478797638224e-06, |
|
"loss": 0.0, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 93.00193236714976, |
|
"grad_norm": 0.0004664282314479351, |
|
"learning_rate": 4.100912506709608e-06, |
|
"loss": 0.0, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 93.00289855072464, |
|
"grad_norm": 0.000606657937169075, |
|
"learning_rate": 4.0901771336553945e-06, |
|
"loss": 0.0, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 93.00386473429951, |
|
"grad_norm": 0.00048786430852487683, |
|
"learning_rate": 4.079441760601181e-06, |
|
"loss": 0.0, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 93.0048309178744, |
|
"grad_norm": 0.00024992445833049715, |
|
"learning_rate": 4.068706387546967e-06, |
|
"loss": 0.0, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 93.00579710144928, |
|
"grad_norm": 0.0005548360059037805, |
|
"learning_rate": 4.057971014492754e-06, |
|
"loss": 0.0, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 93.00676328502415, |
|
"grad_norm": 0.0003060603339690715, |
|
"learning_rate": 4.04723564143854e-06, |
|
"loss": 0.0393, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 93.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.9233503341674805, |
|
"eval_runtime": 14.3806, |
|
"eval_samples_per_second": 5.354, |
|
"eval_steps_per_second": 1.391, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 94.00096618357487, |
|
"grad_norm": 0.0003109258250333369, |
|
"learning_rate": 4.036500268384327e-06, |
|
"loss": 0.0031, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 94.00193236714976, |
|
"grad_norm": 0.0008100513368844986, |
|
"learning_rate": 4.025764895330113e-06, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 94.00289855072464, |
|
"grad_norm": 0.007947184145450592, |
|
"learning_rate": 4.0150295222759e-06, |
|
"loss": 0.0, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 94.00386473429951, |
|
"grad_norm": 0.0008597071864642203, |
|
"learning_rate": 4.004294149221686e-06, |
|
"loss": 0.0, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 94.0048309178744, |
|
"grad_norm": 0.0004177772789262235, |
|
"learning_rate": 3.9935587761674725e-06, |
|
"loss": 0.0, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 94.00579710144928, |
|
"grad_norm": 0.0028885523788630962, |
|
"learning_rate": 3.982823403113258e-06, |
|
"loss": 0.0, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 94.00676328502415, |
|
"grad_norm": 0.00042609605588950217, |
|
"learning_rate": 3.972088030059045e-06, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 94.00676328502415, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1198887825012207, |
|
"eval_runtime": 16.0169, |
|
"eval_samples_per_second": 4.807, |
|
"eval_steps_per_second": 1.249, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 95.00096618357487, |
|
"grad_norm": 0.0003437358245719224, |
|
"learning_rate": 3.961352657004831e-06, |
|
"loss": 0.0, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 95.00193236714976, |
|
"grad_norm": 0.0002173613611375913, |
|
"learning_rate": 3.9506172839506175e-06, |
|
"loss": 0.0, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 95.00289855072464, |
|
"grad_norm": 0.0003850398352369666, |
|
"learning_rate": 3.939881910896404e-06, |
|
"loss": 0.0, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 95.00386473429951, |
|
"grad_norm": 0.00038904434768483043, |
|
"learning_rate": 3.92914653784219e-06, |
|
"loss": 0.0, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 95.0048309178744, |
|
"grad_norm": 0.0010339280124753714, |
|
"learning_rate": 3.918411164787977e-06, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 95.00579710144928, |
|
"grad_norm": 0.0012966475915163755, |
|
"learning_rate": 3.907675791733763e-06, |
|
"loss": 0.0, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 95.00676328502415, |
|
"grad_norm": 0.16356423497200012, |
|
"learning_rate": 3.89694041867955e-06, |
|
"loss": 0.0, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 95.00676328502415, |
|
"eval_accuracy": 0.7402597402597403, |
|
"eval_loss": 2.1308841705322266, |
|
"eval_runtime": 18.5425, |
|
"eval_samples_per_second": 4.153, |
|
"eval_steps_per_second": 1.079, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 96.00096618357487, |
|
"grad_norm": 0.000308900693198666, |
|
"learning_rate": 3.886205045625336e-06, |
|
"loss": 0.0, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 96.00193236714976, |
|
"grad_norm": 0.008645855821669102, |
|
"learning_rate": 3.875469672571122e-06, |
|
"loss": 0.0, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 96.00289855072464, |
|
"grad_norm": 0.001571167609654367, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.1613, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 96.00386473429951, |
|
"grad_norm": 0.000592782802414149, |
|
"learning_rate": 3.853998926462695e-06, |
|
"loss": 0.1357, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 96.0048309178744, |
|
"grad_norm": 0.0014255845453590155, |
|
"learning_rate": 3.843263553408481e-06, |
|
"loss": 0.0, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 96.00579710144928, |
|
"grad_norm": 0.0014380579814314842, |
|
"learning_rate": 3.832528180354268e-06, |
|
"loss": 0.0, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 96.00676328502415, |
|
"grad_norm": 0.00042985283653251827, |
|
"learning_rate": 3.821792807300054e-06, |
|
"loss": 0.0009, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 96.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 1.931089162826538, |
|
"eval_runtime": 17.8388, |
|
"eval_samples_per_second": 4.316, |
|
"eval_steps_per_second": 1.121, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 97.00096618357487, |
|
"grad_norm": 0.03030485101044178, |
|
"learning_rate": 3.8110574342458405e-06, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 97.00193236714976, |
|
"grad_norm": 0.0003676621417980641, |
|
"learning_rate": 3.800322061191627e-06, |
|
"loss": 0.0015, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 97.00289855072464, |
|
"grad_norm": 0.0006548650562763214, |
|
"learning_rate": 3.7895866881374134e-06, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 97.00386473429951, |
|
"grad_norm": 0.00035164476139470935, |
|
"learning_rate": 3.7788513150831994e-06, |
|
"loss": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 97.0048309178744, |
|
"grad_norm": 0.14484809339046478, |
|
"learning_rate": 3.768115942028986e-06, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 97.00579710144928, |
|
"grad_norm": 0.00030128343496471643, |
|
"learning_rate": 3.7573805689747723e-06, |
|
"loss": 0.0003, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 97.00676328502415, |
|
"grad_norm": 0.0002703612844925374, |
|
"learning_rate": 3.7466451959205584e-06, |
|
"loss": 0.0001, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 97.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.7857838869094849, |
|
"eval_runtime": 17.5204, |
|
"eval_samples_per_second": 4.395, |
|
"eval_steps_per_second": 1.142, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 98.00096618357487, |
|
"grad_norm": 0.0002425261918688193, |
|
"learning_rate": 3.735909822866345e-06, |
|
"loss": 0.0007, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 98.00193236714976, |
|
"grad_norm": 0.0003744879795704037, |
|
"learning_rate": 3.7251744498121313e-06, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 98.00289855072464, |
|
"grad_norm": 0.0009580018813721836, |
|
"learning_rate": 3.7144390767579173e-06, |
|
"loss": 0.0, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 98.00386473429951, |
|
"grad_norm": 0.00039155595004558563, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.2244, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 98.0048309178744, |
|
"grad_norm": 0.001988824224099517, |
|
"learning_rate": 3.69296833064949e-06, |
|
"loss": 0.0, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 98.00579710144928, |
|
"grad_norm": 0.00045414830674417317, |
|
"learning_rate": 3.682232957595277e-06, |
|
"loss": 0.0294, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 98.00676328502415, |
|
"grad_norm": 0.0010452043497934937, |
|
"learning_rate": 3.6714975845410635e-06, |
|
"loss": 0.0894, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 98.00676328502415, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.5577243566513062, |
|
"eval_runtime": 18.0262, |
|
"eval_samples_per_second": 4.272, |
|
"eval_steps_per_second": 1.109, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 99.00096618357487, |
|
"grad_norm": 0.010575399734079838, |
|
"learning_rate": 3.6607622114868495e-06, |
|
"loss": 0.0002, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 99.00193236714976, |
|
"grad_norm": 0.010563873685896397, |
|
"learning_rate": 3.650026838432636e-06, |
|
"loss": 0.0, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 99.00289855072464, |
|
"grad_norm": 0.00037548152613453567, |
|
"learning_rate": 3.6392914653784224e-06, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 99.00386473429951, |
|
"grad_norm": 0.0008382857777178288, |
|
"learning_rate": 3.6285560923242085e-06, |
|
"loss": 0.0, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 99.0048309178744, |
|
"grad_norm": 0.000387381121981889, |
|
"learning_rate": 3.617820719269995e-06, |
|
"loss": 0.0, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 99.00579710144928, |
|
"grad_norm": 0.0008243945194408298, |
|
"learning_rate": 3.6070853462157814e-06, |
|
"loss": 0.0001, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 99.00676328502415, |
|
"grad_norm": 0.0006387074827216566, |
|
"learning_rate": 3.5963499731615674e-06, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 99.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.8137568235397339, |
|
"eval_runtime": 17.7771, |
|
"eval_samples_per_second": 4.331, |
|
"eval_steps_per_second": 1.125, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 100.00096618357487, |
|
"grad_norm": 0.0014033769257366657, |
|
"learning_rate": 3.585614600107354e-06, |
|
"loss": 0.105, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 100.00193236714976, |
|
"grad_norm": 0.00042622030014172196, |
|
"learning_rate": 3.5748792270531403e-06, |
|
"loss": 0.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 100.00289855072464, |
|
"grad_norm": 0.0019426278304308653, |
|
"learning_rate": 3.5641438539989263e-06, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 100.00386473429951, |
|
"grad_norm": 0.0008854815387167037, |
|
"learning_rate": 3.553408480944713e-06, |
|
"loss": 0.0, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 100.0048309178744, |
|
"grad_norm": 0.0004940590006299317, |
|
"learning_rate": 3.5426731078904997e-06, |
|
"loss": 0.0, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 100.00579710144928, |
|
"grad_norm": 0.0007130287704057992, |
|
"learning_rate": 3.531937734836286e-06, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 100.00676328502415, |
|
"grad_norm": 0.0007455471204593778, |
|
"learning_rate": 3.521202361782072e-06, |
|
"loss": 0.0, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 100.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 2.0068461894989014, |
|
"eval_runtime": 15.1443, |
|
"eval_samples_per_second": 5.084, |
|
"eval_steps_per_second": 1.321, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 101.00096618357487, |
|
"grad_norm": 0.0003840252757072449, |
|
"learning_rate": 3.5104669887278586e-06, |
|
"loss": 0.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 101.00193236714976, |
|
"grad_norm": 0.00036437122616916895, |
|
"learning_rate": 3.499731615673645e-06, |
|
"loss": 0.0006, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 101.00289855072464, |
|
"grad_norm": 0.001055071479640901, |
|
"learning_rate": 3.488996242619431e-06, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 101.00386473429951, |
|
"grad_norm": 0.04166965186595917, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.174, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 101.0048309178744, |
|
"grad_norm": 0.0005369763821363449, |
|
"learning_rate": 3.467525496511004e-06, |
|
"loss": 0.0004, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 101.00579710144928, |
|
"grad_norm": 0.0004044979577884078, |
|
"learning_rate": 3.4567901234567904e-06, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 101.00676328502415, |
|
"grad_norm": 0.0003100241592619568, |
|
"learning_rate": 3.4460547504025764e-06, |
|
"loss": 0.0163, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 101.00676328502415, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 1.8340067863464355, |
|
"eval_runtime": 17.1815, |
|
"eval_samples_per_second": 4.482, |
|
"eval_steps_per_second": 1.164, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 102.00096618357487, |
|
"grad_norm": 0.0005807825946249068, |
|
"learning_rate": 3.4353193773483633e-06, |
|
"loss": 0.0015, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 102.00193236714976, |
|
"grad_norm": 0.0004728983039967716, |
|
"learning_rate": 3.4245840042941498e-06, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 102.00289855072464, |
|
"grad_norm": 0.00022850323875900358, |
|
"learning_rate": 3.4138486312399362e-06, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 102.00386473429951, |
|
"grad_norm": 0.006223591510206461, |
|
"learning_rate": 3.4031132581857222e-06, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 102.0048309178744, |
|
"grad_norm": 0.00024900989956222475, |
|
"learning_rate": 3.3923778851315087e-06, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 102.00579710144928, |
|
"grad_norm": 0.0019101315410807729, |
|
"learning_rate": 3.381642512077295e-06, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 102.00676328502415, |
|
"grad_norm": 0.0003227620618417859, |
|
"learning_rate": 3.370907139023081e-06, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 102.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.3226364850997925, |
|
"eval_runtime": 18.0086, |
|
"eval_samples_per_second": 4.276, |
|
"eval_steps_per_second": 1.111, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 103.00096618357487, |
|
"grad_norm": 0.0004981742240488529, |
|
"learning_rate": 3.3601717659688676e-06, |
|
"loss": 0.0, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 103.00193236714976, |
|
"grad_norm": 0.000562319066375494, |
|
"learning_rate": 3.349436392914654e-06, |
|
"loss": 0.0, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 103.00289855072464, |
|
"grad_norm": 0.0005576030816882849, |
|
"learning_rate": 3.33870101986044e-06, |
|
"loss": 0.0, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 103.00386473429951, |
|
"grad_norm": 0.0009905042825266719, |
|
"learning_rate": 3.3279656468062265e-06, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 103.0048309178744, |
|
"grad_norm": 0.00032818218460306525, |
|
"learning_rate": 3.317230273752013e-06, |
|
"loss": 0.0833, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 103.00579710144928, |
|
"grad_norm": 0.0029643636662513018, |
|
"learning_rate": 3.3064949006978e-06, |
|
"loss": 0.0, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 103.00676328502415, |
|
"grad_norm": 0.0008541871211491525, |
|
"learning_rate": 3.295759527643586e-06, |
|
"loss": 0.0, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 103.00676328502415, |
|
"eval_accuracy": 0.7532467532467533, |
|
"eval_loss": 2.460693597793579, |
|
"eval_runtime": 17.2217, |
|
"eval_samples_per_second": 4.471, |
|
"eval_steps_per_second": 1.161, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 104.00096618357487, |
|
"grad_norm": 0.0004660151316784322, |
|
"learning_rate": 3.2850241545893724e-06, |
|
"loss": 0.2667, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 104.00193236714976, |
|
"grad_norm": 0.006872973870486021, |
|
"learning_rate": 3.274288781535159e-06, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 104.00289855072464, |
|
"grad_norm": 0.0034734373912215233, |
|
"learning_rate": 3.263553408480945e-06, |
|
"loss": 0.0, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 104.00386473429951, |
|
"grad_norm": 0.002416298259049654, |
|
"learning_rate": 3.2528180354267313e-06, |
|
"loss": 0.0952, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 104.0048309178744, |
|
"grad_norm": 0.00047030055429786444, |
|
"learning_rate": 3.2420826623725177e-06, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 104.00579710144928, |
|
"grad_norm": 0.002508195349946618, |
|
"learning_rate": 3.231347289318304e-06, |
|
"loss": 0.0, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 104.00676328502415, |
|
"grad_norm": 0.006559464149177074, |
|
"learning_rate": 3.22061191626409e-06, |
|
"loss": 0.0683, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 104.00676328502415, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 1.754974126815796, |
|
"eval_runtime": 14.6171, |
|
"eval_samples_per_second": 5.268, |
|
"eval_steps_per_second": 1.368, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 105.00096618357487, |
|
"grad_norm": 0.0030552211683243513, |
|
"learning_rate": 3.2098765432098767e-06, |
|
"loss": 0.0, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 105.00193236714976, |
|
"grad_norm": 0.000478706177091226, |
|
"learning_rate": 3.199141170155663e-06, |
|
"loss": 0.0002, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 105.00289855072464, |
|
"grad_norm": 0.0003611068823374808, |
|
"learning_rate": 3.188405797101449e-06, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 105.00386473429951, |
|
"grad_norm": 0.000608165399171412, |
|
"learning_rate": 3.177670424047236e-06, |
|
"loss": 0.0653, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 105.0048309178744, |
|
"grad_norm": 0.0009645195677876472, |
|
"learning_rate": 3.1669350509930225e-06, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 105.00579710144928, |
|
"grad_norm": 0.0010169371962547302, |
|
"learning_rate": 3.156199677938809e-06, |
|
"loss": 0.0, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 105.00676328502415, |
|
"grad_norm": 0.004264978691935539, |
|
"learning_rate": 3.145464304884595e-06, |
|
"loss": 0.0, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 105.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.4899604320526123, |
|
"eval_runtime": 14.5365, |
|
"eval_samples_per_second": 5.297, |
|
"eval_steps_per_second": 1.376, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 106.00096618357487, |
|
"grad_norm": 0.0018262077355757356, |
|
"learning_rate": 3.1347289318303814e-06, |
|
"loss": 0.0, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 106.00193236714976, |
|
"grad_norm": 0.000662806851323694, |
|
"learning_rate": 3.123993558776168e-06, |
|
"loss": 0.0, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 106.00289855072464, |
|
"grad_norm": 0.0005241535254754126, |
|
"learning_rate": 3.113258185721954e-06, |
|
"loss": 0.198, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 106.00386473429951, |
|
"grad_norm": 0.0009639605414122343, |
|
"learning_rate": 3.1025228126677403e-06, |
|
"loss": 0.0, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 106.0048309178744, |
|
"grad_norm": 0.0003563823411241174, |
|
"learning_rate": 3.0917874396135268e-06, |
|
"loss": 0.0008, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 106.00579710144928, |
|
"grad_norm": 0.00036870595067739487, |
|
"learning_rate": 3.081052066559313e-06, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 106.00676328502415, |
|
"grad_norm": 0.00026721181347966194, |
|
"learning_rate": 3.0703166935050992e-06, |
|
"loss": 0.0, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 106.00676328502415, |
|
"eval_accuracy": 0.7662337662337663, |
|
"eval_loss": 1.5684095621109009, |
|
"eval_runtime": 16.9217, |
|
"eval_samples_per_second": 4.55, |
|
"eval_steps_per_second": 1.182, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 107.00096618357487, |
|
"grad_norm": 624.6681518554688, |
|
"learning_rate": 3.059581320450886e-06, |
|
"loss": 0.0527, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 107.00193236714976, |
|
"grad_norm": 0.0003674910985864699, |
|
"learning_rate": 3.0488459473966726e-06, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 107.00289855072464, |
|
"grad_norm": 0.0003636605979409069, |
|
"learning_rate": 3.038110574342459e-06, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 107.00386473429951, |
|
"grad_norm": 0.0015479899011552334, |
|
"learning_rate": 3.027375201288245e-06, |
|
"loss": 0.0, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 107.0048309178744, |
|
"grad_norm": 0.0007740533910691738, |
|
"learning_rate": 3.0166398282340315e-06, |
|
"loss": 0.0, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 107.00579710144928, |
|
"grad_norm": 0.0003727949515450746, |
|
"learning_rate": 3.005904455179818e-06, |
|
"loss": 0.0001, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 107.00676328502415, |
|
"grad_norm": 0.00039142067544162273, |
|
"learning_rate": 2.995169082125604e-06, |
|
"loss": 0.0, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 107.00676328502415, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7333017587661743, |
|
"eval_runtime": 16.6874, |
|
"eval_samples_per_second": 4.614, |
|
"eval_steps_per_second": 1.199, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 108.00096618357487, |
|
"grad_norm": 0.0002302713255630806, |
|
"learning_rate": 2.9844337090713904e-06, |
|
"loss": 0.0, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 108.00193236714976, |
|
"grad_norm": 0.00029453245224431157, |
|
"learning_rate": 2.973698336017177e-06, |
|
"loss": 0.0001, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 108.00289855072464, |
|
"grad_norm": 0.0043038977310061455, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.0, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 108.00386473429951, |
|
"grad_norm": 292.66595458984375, |
|
"learning_rate": 2.9522275899087494e-06, |
|
"loss": 0.0253, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 108.0048309178744, |
|
"grad_norm": 0.0004467540711630136, |
|
"learning_rate": 2.941492216854536e-06, |
|
"loss": 0.0, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 108.00579710144928, |
|
"grad_norm": 0.0002925453591160476, |
|
"learning_rate": 2.9307568438003227e-06, |
|
"loss": 0.3068, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 108.00676328502415, |
|
"grad_norm": 0.0020885635167360306, |
|
"learning_rate": 2.9200214707461087e-06, |
|
"loss": 0.0, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 108.00676328502415, |
|
"eval_accuracy": 0.7922077922077922, |
|
"eval_loss": 1.423266887664795, |
|
"eval_runtime": 14.9029, |
|
"eval_samples_per_second": 5.167, |
|
"eval_steps_per_second": 1.342, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 109.00096618357487, |
|
"grad_norm": 0.0003770174225792289, |
|
"learning_rate": 2.909286097691895e-06, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 109.00193236714976, |
|
"grad_norm": 319.81683349609375, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.2953, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 109.00289855072464, |
|
"grad_norm": 0.000277810642728582, |
|
"learning_rate": 2.8878153515834676e-06, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 109.00386473429951, |
|
"grad_norm": 0.0017440535593777895, |
|
"learning_rate": 2.877079978529254e-06, |
|
"loss": 0.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 109.0048309178744, |
|
"grad_norm": 0.004614552017301321, |
|
"learning_rate": 2.8663446054750405e-06, |
|
"loss": 0.0, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 109.00579710144928, |
|
"grad_norm": 0.0003517542500048876, |
|
"learning_rate": 2.8556092324208266e-06, |
|
"loss": 0.0, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 109.00676328502415, |
|
"grad_norm": 0.0006136648589745164, |
|
"learning_rate": 2.844873859366613e-06, |
|
"loss": 0.0001, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 109.00676328502415, |
|
"eval_accuracy": 0.7792207792207793, |
|
"eval_loss": 1.7541829347610474, |
|
"eval_runtime": 14.6741, |
|
"eval_samples_per_second": 5.247, |
|
"eval_steps_per_second": 1.363, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 110.00096618357487, |
|
"grad_norm": 0.003632753388956189, |
|
"learning_rate": 2.8341384863123995e-06, |
|
"loss": 0.0, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 110.00193236714976, |
|
"grad_norm": 0.0011343782534822822, |
|
"learning_rate": 2.823403113258186e-06, |
|
"loss": 0.0, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 110.00289855072464, |
|
"grad_norm": 0.00032457453198730946, |
|
"learning_rate": 2.812667740203972e-06, |
|
"loss": 0.0172, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 110.00386473429951, |
|
"grad_norm": 0.0008219513692893088, |
|
"learning_rate": 2.801932367149759e-06, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 110.0048309178744, |
|
"grad_norm": 0.00046765225124545395, |
|
"learning_rate": 2.7911969940955453e-06, |
|
"loss": 0.0, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 110.00579710144928, |
|
"grad_norm": 0.006037019193172455, |
|
"learning_rate": 2.7804616210413317e-06, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 110.00676328502415, |
|
"grad_norm": 0.00038607208989560604, |
|
"learning_rate": 2.7697262479871177e-06, |
|
"loss": 0.0, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 110.00676328502415, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.4554383754730225, |
|
"eval_runtime": 18.1834, |
|
"eval_samples_per_second": 4.235, |
|
"eval_steps_per_second": 1.1, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 111.00096618357487, |
|
"grad_norm": 0.003369076643139124, |
|
"learning_rate": 2.758990874932904e-06, |
|
"loss": 0.0, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 111.00193236714976, |
|
"grad_norm": 0.00028180310619063675, |
|
"learning_rate": 2.7482555018786907e-06, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 111.00289855072464, |
|
"grad_norm": 0.00040791460196487606, |
|
"learning_rate": 2.7375201288244767e-06, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 111.00386473429951, |
|
"grad_norm": 0.2944580912590027, |
|
"learning_rate": 2.726784755770263e-06, |
|
"loss": 0.0001, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 111.0048309178744, |
|
"grad_norm": 0.000506663927808404, |
|
"learning_rate": 2.7160493827160496e-06, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 111.00579710144928, |
|
"grad_norm": 0.0006107607623562217, |
|
"learning_rate": 2.7053140096618356e-06, |
|
"loss": 0.0, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 111.00676328502415, |
|
"grad_norm": 0.0007616582443006337, |
|
"learning_rate": 2.694578636607622e-06, |
|
"loss": 0.0, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 111.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.3537590503692627, |
|
"eval_runtime": 25.5207, |
|
"eval_samples_per_second": 3.017, |
|
"eval_steps_per_second": 0.784, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 112.00096618357487, |
|
"grad_norm": 0.00020077962835785002, |
|
"learning_rate": 2.683843263553409e-06, |
|
"loss": 0.0, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 112.00193236714976, |
|
"grad_norm": 0.0007396721048280597, |
|
"learning_rate": 2.6731078904991954e-06, |
|
"loss": 0.0, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 112.00289855072464, |
|
"grad_norm": 0.001002350589260459, |
|
"learning_rate": 2.6623725174449814e-06, |
|
"loss": 0.0, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 112.00386473429951, |
|
"grad_norm": 0.0007556098862551153, |
|
"learning_rate": 2.651637144390768e-06, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 112.0048309178744, |
|
"grad_norm": 0.0019177094800397754, |
|
"learning_rate": 2.6409017713365543e-06, |
|
"loss": 0.0, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 112.00579710144928, |
|
"grad_norm": 0.004157165996730328, |
|
"learning_rate": 2.6301663982823408e-06, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 112.00676328502415, |
|
"grad_norm": 0.0004525023396126926, |
|
"learning_rate": 2.6194310252281268e-06, |
|
"loss": 0.0, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 112.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4164801836013794, |
|
"eval_runtime": 14.6287, |
|
"eval_samples_per_second": 5.264, |
|
"eval_steps_per_second": 1.367, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 113.00096618357487, |
|
"grad_norm": 0.0003341760893817991, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.0, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 113.00193236714976, |
|
"grad_norm": 0.0007722051814198494, |
|
"learning_rate": 2.5979602791196997e-06, |
|
"loss": 0.0, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 113.00289855072464, |
|
"grad_norm": 0.0003543299390003085, |
|
"learning_rate": 2.5872249060654857e-06, |
|
"loss": 0.0, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 113.00386473429951, |
|
"grad_norm": 0.003534851362928748, |
|
"learning_rate": 2.576489533011272e-06, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 113.0048309178744, |
|
"grad_norm": 0.0003302747500129044, |
|
"learning_rate": 2.5657541599570586e-06, |
|
"loss": 0.0, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 113.00579710144928, |
|
"grad_norm": 0.00041915668407455087, |
|
"learning_rate": 2.5550187869028455e-06, |
|
"loss": 0.0, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 113.00676328502415, |
|
"grad_norm": 0.02611609548330307, |
|
"learning_rate": 2.5442834138486315e-06, |
|
"loss": 0.0, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 113.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4228990077972412, |
|
"eval_runtime": 14.244, |
|
"eval_samples_per_second": 5.406, |
|
"eval_steps_per_second": 1.404, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 114.00096618357487, |
|
"grad_norm": 0.0005513859214261174, |
|
"learning_rate": 2.533548040794418e-06, |
|
"loss": 0.0, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 114.00193236714976, |
|
"grad_norm": 0.0006181516218930483, |
|
"learning_rate": 2.5228126677402044e-06, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 114.00289855072464, |
|
"grad_norm": 0.0005718721658922732, |
|
"learning_rate": 2.5120772946859904e-06, |
|
"loss": 0.0, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 114.00386473429951, |
|
"grad_norm": 0.0015514239203184843, |
|
"learning_rate": 2.501341921631777e-06, |
|
"loss": 0.0, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 114.0048309178744, |
|
"grad_norm": 0.0005155864637345076, |
|
"learning_rate": 2.4906065485775633e-06, |
|
"loss": 0.0, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 114.00579710144928, |
|
"grad_norm": 0.0005761452484875917, |
|
"learning_rate": 2.4798711755233494e-06, |
|
"loss": 0.0, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 114.00676328502415, |
|
"grad_norm": 0.0003508743247948587, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.0, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 114.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4191442728042603, |
|
"eval_runtime": 14.3381, |
|
"eval_samples_per_second": 5.37, |
|
"eval_steps_per_second": 1.395, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 115.00096618357487, |
|
"grad_norm": 0.00038109711022116244, |
|
"learning_rate": 2.4584004294149223e-06, |
|
"loss": 0.0, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 115.00193236714976, |
|
"grad_norm": 0.0004090418224222958, |
|
"learning_rate": 2.4476650563607087e-06, |
|
"loss": 0.0, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 115.00289855072464, |
|
"grad_norm": 0.00023632495140191168, |
|
"learning_rate": 2.436929683306495e-06, |
|
"loss": 0.0, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 115.00386473429951, |
|
"grad_norm": 0.00041399948531761765, |
|
"learning_rate": 2.426194310252281e-06, |
|
"loss": 0.0, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 115.0048309178744, |
|
"grad_norm": 0.0004869647673331201, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 115.00579710144928, |
|
"grad_norm": 0.0002684770443011075, |
|
"learning_rate": 2.4047235641438545e-06, |
|
"loss": 0.0, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 115.00676328502415, |
|
"grad_norm": 0.005061628762632608, |
|
"learning_rate": 2.3939881910896406e-06, |
|
"loss": 0.0, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 115.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4363770484924316, |
|
"eval_runtime": 14.3464, |
|
"eval_samples_per_second": 5.367, |
|
"eval_steps_per_second": 1.394, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 116.00096618357487, |
|
"grad_norm": 0.00040192605229094625, |
|
"learning_rate": 2.383252818035427e-06, |
|
"loss": 0.0, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 116.00193236714976, |
|
"grad_norm": 0.0004934113239869475, |
|
"learning_rate": 2.3725174449812135e-06, |
|
"loss": 0.0, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 116.00289855072464, |
|
"grad_norm": 0.0008514419896528125, |
|
"learning_rate": 2.3617820719269995e-06, |
|
"loss": 0.0, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 116.00386473429951, |
|
"grad_norm": 0.01015880610793829, |
|
"learning_rate": 2.351046698872786e-06, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 116.0048309178744, |
|
"grad_norm": 0.0004901864449493587, |
|
"learning_rate": 2.3403113258185724e-06, |
|
"loss": 0.0, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 116.00579710144928, |
|
"grad_norm": 0.00022043399803806096, |
|
"learning_rate": 2.329575952764359e-06, |
|
"loss": 0.0, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 116.00676328502415, |
|
"grad_norm": 0.00031955583835951984, |
|
"learning_rate": 2.3188405797101453e-06, |
|
"loss": 0.0, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 116.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.457450032234192, |
|
"eval_runtime": 16.0943, |
|
"eval_samples_per_second": 4.784, |
|
"eval_steps_per_second": 1.243, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 117.00096618357487, |
|
"grad_norm": 0.0003374749212525785, |
|
"learning_rate": 2.3081052066559313e-06, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 117.00193236714976, |
|
"grad_norm": 0.0002894297649618238, |
|
"learning_rate": 2.2973698336017178e-06, |
|
"loss": 0.0, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 117.00289855072464, |
|
"grad_norm": 0.00022455077851191163, |
|
"learning_rate": 2.2866344605475042e-06, |
|
"loss": 0.0, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 117.00386473429951, |
|
"grad_norm": 0.00020910402236040682, |
|
"learning_rate": 2.2758990874932907e-06, |
|
"loss": 0.0, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 117.0048309178744, |
|
"grad_norm": 0.00024087671772576869, |
|
"learning_rate": 2.265163714439077e-06, |
|
"loss": 0.0, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 117.00579710144928, |
|
"grad_norm": 0.000636860728263855, |
|
"learning_rate": 2.254428341384863e-06, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 117.00676328502415, |
|
"grad_norm": 0.0007749819196760654, |
|
"learning_rate": 2.2436929683306496e-06, |
|
"loss": 0.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 117.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.4639664888381958, |
|
"eval_runtime": 15.7612, |
|
"eval_samples_per_second": 4.885, |
|
"eval_steps_per_second": 1.269, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 118.00096618357487, |
|
"grad_norm": 0.0007536153425462544, |
|
"learning_rate": 2.232957595276436e-06, |
|
"loss": 0.0, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 118.00193236714976, |
|
"grad_norm": 0.00032429906423203647, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 118.00289855072464, |
|
"grad_norm": 0.0006189732230268419, |
|
"learning_rate": 2.211486849168009e-06, |
|
"loss": 0.0, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 118.00386473429951, |
|
"grad_norm": 0.0018710630247369409, |
|
"learning_rate": 2.2007514761137954e-06, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 118.0048309178744, |
|
"grad_norm": 0.00020172024960629642, |
|
"learning_rate": 2.1900161030595814e-06, |
|
"loss": 0.0, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 118.00579710144928, |
|
"grad_norm": 0.0002452440094202757, |
|
"learning_rate": 2.179280730005368e-06, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 118.00676328502415, |
|
"grad_norm": 0.0005612657987512648, |
|
"learning_rate": 2.1685453569511543e-06, |
|
"loss": 0.0, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 118.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.480674386024475, |
|
"eval_runtime": 16.8552, |
|
"eval_samples_per_second": 4.568, |
|
"eval_steps_per_second": 1.187, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 119.00096618357487, |
|
"grad_norm": 0.004412544891238213, |
|
"learning_rate": 2.1578099838969404e-06, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 119.00193236714976, |
|
"grad_norm": 0.0001875559682957828, |
|
"learning_rate": 2.1470746108427272e-06, |
|
"loss": 0.0, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 119.00289855072464, |
|
"grad_norm": 0.00025120144709944725, |
|
"learning_rate": 2.1363392377885133e-06, |
|
"loss": 0.0, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 119.00386473429951, |
|
"grad_norm": 0.0002451414766255766, |
|
"learning_rate": 2.1256038647342997e-06, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 119.0048309178744, |
|
"grad_norm": 0.00071542157093063, |
|
"learning_rate": 2.114868491680086e-06, |
|
"loss": 0.0, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 119.00579710144928, |
|
"grad_norm": 0.0005497967358678579, |
|
"learning_rate": 2.104133118625872e-06, |
|
"loss": 0.0, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 119.00676328502415, |
|
"grad_norm": 0.00022419539163820446, |
|
"learning_rate": 2.093397745571659e-06, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 119.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.5029710531234741, |
|
"eval_runtime": 16.8065, |
|
"eval_samples_per_second": 4.582, |
|
"eval_steps_per_second": 1.19, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 120.00096618357487, |
|
"grad_norm": 0.00019987497944384813, |
|
"learning_rate": 2.082662372517445e-06, |
|
"loss": 0.0, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 120.00193236714976, |
|
"grad_norm": 0.00037460497696883976, |
|
"learning_rate": 2.0719269994632315e-06, |
|
"loss": 0.0, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 120.00289855072464, |
|
"grad_norm": 0.0003980303881689906, |
|
"learning_rate": 2.061191626409018e-06, |
|
"loss": 0.0, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 120.00386473429951, |
|
"grad_norm": 0.00016923666407819837, |
|
"learning_rate": 2.050456253354804e-06, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 120.0048309178744, |
|
"grad_norm": 0.0003269107255619019, |
|
"learning_rate": 2.0397208803005905e-06, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 120.00579710144928, |
|
"grad_norm": 0.0008120771963149309, |
|
"learning_rate": 2.028985507246377e-06, |
|
"loss": 0.0, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 120.00676328502415, |
|
"grad_norm": 0.0005975480889901519, |
|
"learning_rate": 2.0182501341921634e-06, |
|
"loss": 0.0, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 120.00676328502415, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.518750786781311, |
|
"eval_runtime": 17.4768, |
|
"eval_samples_per_second": 4.406, |
|
"eval_steps_per_second": 1.144, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 121.00096618357487, |
|
"grad_norm": 0.00044110111775808036, |
|
"learning_rate": 2.00751476113795e-06, |
|
"loss": 0.0, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 121.00193236714976, |
|
"grad_norm": 0.00020110349578317255, |
|
"learning_rate": 1.9967793880837363e-06, |
|
"loss": 0.0, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 121.00289855072464, |
|
"grad_norm": 0.000270154036115855, |
|
"learning_rate": 1.9860440150295223e-06, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 121.00386473429951, |
|
"grad_norm": 0.0002893579949159175, |
|
"learning_rate": 1.9753086419753087e-06, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 121.0048309178744, |
|
"grad_norm": 0.001373679144307971, |
|
"learning_rate": 1.964573268921095e-06, |
|
"loss": 0.0, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 121.00579710144928, |
|
"grad_norm": 0.0003092541010119021, |
|
"learning_rate": 1.9538378958668816e-06, |
|
"loss": 0.0, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 121.00676328502415, |
|
"grad_norm": 0.00024617547751404345, |
|
"learning_rate": 1.943102522812668e-06, |
|
"loss": 0.0, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 121.00676328502415, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.5642110109329224, |
|
"eval_runtime": 16.6862, |
|
"eval_samples_per_second": 4.615, |
|
"eval_steps_per_second": 1.199, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 122.00096618357487, |
|
"grad_norm": 0.00046582010691054165, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 122.00193236714976, |
|
"grad_norm": 0.00022211925534065813, |
|
"learning_rate": 1.9216317767042406e-06, |
|
"loss": 0.0, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 122.00289855072464, |
|
"grad_norm": 0.0002465721336193383, |
|
"learning_rate": 1.910896403650027e-06, |
|
"loss": 0.0, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 122.00386473429951, |
|
"grad_norm": 0.0007177837542258203, |
|
"learning_rate": 1.9001610305958135e-06, |
|
"loss": 0.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 122.0048309178744, |
|
"grad_norm": 0.0001443681976525113, |
|
"learning_rate": 1.8894256575415997e-06, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 122.00579710144928, |
|
"grad_norm": 0.00030018811230547726, |
|
"learning_rate": 1.8786902844873862e-06, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 122.00676328502415, |
|
"grad_norm": 0.00017983531870413572, |
|
"learning_rate": 1.8679549114331724e-06, |
|
"loss": 0.0, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 122.00676328502415, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.5662660598754883, |
|
"eval_runtime": 16.7576, |
|
"eval_samples_per_second": 4.595, |
|
"eval_steps_per_second": 1.193, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 123.00096618357487, |
|
"grad_norm": 0.00021314997866284102, |
|
"learning_rate": 1.8572195383789586e-06, |
|
"loss": 0.0, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 123.00193236714976, |
|
"grad_norm": 0.00018588421517051756, |
|
"learning_rate": 1.846484165324745e-06, |
|
"loss": 0.0, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 123.00289855072464, |
|
"grad_norm": 0.0005342851509340107, |
|
"learning_rate": 1.8357487922705318e-06, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 123.00386473429951, |
|
"grad_norm": 0.00028879634919576347, |
|
"learning_rate": 1.825013419216318e-06, |
|
"loss": 0.0, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 123.0048309178744, |
|
"grad_norm": 0.0004933462478220463, |
|
"learning_rate": 1.8142780461621042e-06, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 123.00579710144928, |
|
"grad_norm": 0.0003157542960252613, |
|
"learning_rate": 1.8035426731078907e-06, |
|
"loss": 0.0, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 123.00676328502415, |
|
"grad_norm": 0.00021309992007445544, |
|
"learning_rate": 1.792807300053677e-06, |
|
"loss": 0.0, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 123.00676328502415, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.5686160326004028, |
|
"eval_runtime": 17.2629, |
|
"eval_samples_per_second": 4.46, |
|
"eval_steps_per_second": 1.159, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 124.00096618357487, |
|
"grad_norm": 0.24094708263874054, |
|
"learning_rate": 1.7820719269994632e-06, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 124.00193236714976, |
|
"grad_norm": 0.0003956431755796075, |
|
"learning_rate": 1.7713365539452498e-06, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 124.00289855072464, |
|
"grad_norm": 0.00026967411395162344, |
|
"learning_rate": 1.760601180891036e-06, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 124.00386473429951, |
|
"grad_norm": 0.0006772116175852716, |
|
"learning_rate": 1.7498658078368225e-06, |
|
"loss": 0.0, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 124.0048309178744, |
|
"grad_norm": 0.0004103895334992558, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.0, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 124.00579710144928, |
|
"grad_norm": 0.00021756005298811942, |
|
"learning_rate": 1.7283950617283952e-06, |
|
"loss": 0.0, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 124.00676328502415, |
|
"grad_norm": 0.0003636313776951283, |
|
"learning_rate": 1.7176596886741817e-06, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 124.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4284155368804932, |
|
"eval_runtime": 14.6131, |
|
"eval_samples_per_second": 5.269, |
|
"eval_steps_per_second": 1.369, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 125.00096618357487, |
|
"grad_norm": 0.0002465183206368238, |
|
"learning_rate": 1.7069243156199681e-06, |
|
"loss": 0.0, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 125.00193236714976, |
|
"grad_norm": 0.0002711952547542751, |
|
"learning_rate": 1.6961889425657543e-06, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 125.00289855072464, |
|
"grad_norm": 0.0005906698643229902, |
|
"learning_rate": 1.6854535695115406e-06, |
|
"loss": 0.0, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 125.00386473429951, |
|
"grad_norm": 0.0008631067466922104, |
|
"learning_rate": 1.674718196457327e-06, |
|
"loss": 0.0, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 125.0048309178744, |
|
"grad_norm": 0.00018012075452134013, |
|
"learning_rate": 1.6639828234031133e-06, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 125.00579710144928, |
|
"grad_norm": 0.00032579831895418465, |
|
"learning_rate": 1.6532474503489e-06, |
|
"loss": 0.0, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 125.00676328502415, |
|
"grad_norm": 0.00019309454364702106, |
|
"learning_rate": 1.6425120772946862e-06, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 125.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4352055788040161, |
|
"eval_runtime": 15.1216, |
|
"eval_samples_per_second": 5.092, |
|
"eval_steps_per_second": 1.323, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 126.00096618357487, |
|
"grad_norm": 0.00021101922902744263, |
|
"learning_rate": 1.6317767042404724e-06, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 126.00193236714976, |
|
"grad_norm": 0.00024815279175527394, |
|
"learning_rate": 1.6210413311862589e-06, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 126.00289855072464, |
|
"grad_norm": 0.00025953681324608624, |
|
"learning_rate": 1.610305958132045e-06, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 126.00386473429951, |
|
"grad_norm": 0.00018137147708330303, |
|
"learning_rate": 1.5995705850778316e-06, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 126.0048309178744, |
|
"grad_norm": 0.0001805800711736083, |
|
"learning_rate": 1.588835212023618e-06, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 126.00579710144928, |
|
"grad_norm": 0.0001789192610885948, |
|
"learning_rate": 1.5780998389694045e-06, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 126.00676328502415, |
|
"grad_norm": 0.0029380114283412695, |
|
"learning_rate": 1.5673644659151907e-06, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 126.00676328502415, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 1.4391511678695679, |
|
"eval_runtime": 14.8548, |
|
"eval_samples_per_second": 5.184, |
|
"eval_steps_per_second": 1.346, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 127.00096618357487, |
|
"grad_norm": 0.00032747775549069047, |
|
"learning_rate": 1.556629092860977e-06, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 127.00193236714976, |
|
"grad_norm": 0.0001772013056324795, |
|
"learning_rate": 1.5458937198067634e-06, |
|
"loss": 0.0, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 127.00289855072464, |
|
"grad_norm": 0.0002507749304641038, |
|
"learning_rate": 1.5351583467525496e-06, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 127.00386473429951, |
|
"grad_norm": 0.00020645925542339683, |
|
"learning_rate": 1.5244229736983363e-06, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 127.0048309178744, |
|
"grad_norm": 0.0002455931098666042, |
|
"learning_rate": 1.5136876006441225e-06, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 127.00579710144928, |
|
"grad_norm": 0.0001611367188161239, |
|
"learning_rate": 1.502952227589909e-06, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 127.00676328502415, |
|
"grad_norm": 0.00029621319845318794, |
|
"learning_rate": 1.4922168545356952e-06, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 127.00676328502415, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5199832916259766, |
|
"eval_runtime": 17.1653, |
|
"eval_samples_per_second": 4.486, |
|
"eval_steps_per_second": 1.165, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 128.0009661835749, |
|
"grad_norm": 0.00026790890842676163, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 128.00193236714975, |
|
"grad_norm": 0.0002922885469160974, |
|
"learning_rate": 1.470746108427268e-06, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 128.00289855072464, |
|
"grad_norm": 0.00022152680321596563, |
|
"learning_rate": 1.4600107353730544e-06, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 128.00386473429953, |
|
"grad_norm": 0.0002079719997709617, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 128.0048309178744, |
|
"grad_norm": 0.0004370302485767752, |
|
"learning_rate": 1.438539989264627e-06, |
|
"loss": 0.0, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 128.00579710144928, |
|
"grad_norm": 0.00025471995468251407, |
|
"learning_rate": 1.4278046162104133e-06, |
|
"loss": 0.0, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 128.00676328502416, |
|
"grad_norm": 0.00036597729194909334, |
|
"learning_rate": 1.4170692431561997e-06, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 128.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5243946313858032, |
|
"eval_runtime": 19.1328, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 1.045, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 129.0009661835749, |
|
"grad_norm": 0.0020494377240538597, |
|
"learning_rate": 1.406333870101986e-06, |
|
"loss": 0.0, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 129.00193236714975, |
|
"grad_norm": 0.0005171003867872059, |
|
"learning_rate": 1.3955984970477726e-06, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 129.00289855072464, |
|
"grad_norm": 0.0006602657376788557, |
|
"learning_rate": 1.3848631239935589e-06, |
|
"loss": 0.0, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 129.00386473429953, |
|
"grad_norm": 0.000498250825330615, |
|
"learning_rate": 1.3741277509393453e-06, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 129.0048309178744, |
|
"grad_norm": 0.00011794278543675318, |
|
"learning_rate": 1.3633923778851316e-06, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 129.00579710144928, |
|
"grad_norm": 0.00023995035735424608, |
|
"learning_rate": 1.3526570048309178e-06, |
|
"loss": 0.0, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 129.00676328502416, |
|
"grad_norm": 0.00020481545652728528, |
|
"learning_rate": 1.3419216317767045e-06, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 129.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5282416343688965, |
|
"eval_runtime": 20.8777, |
|
"eval_samples_per_second": 3.688, |
|
"eval_steps_per_second": 0.958, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 130.0009661835749, |
|
"grad_norm": 0.00041437524487264454, |
|
"learning_rate": 1.3311862587224907e-06, |
|
"loss": 0.0, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 130.00193236714975, |
|
"grad_norm": 0.00026965918368659914, |
|
"learning_rate": 1.3204508856682772e-06, |
|
"loss": 0.0, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 130.00289855072464, |
|
"grad_norm": 0.00022876296134199947, |
|
"learning_rate": 1.3097155126140634e-06, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 130.00386473429953, |
|
"grad_norm": 0.0004432128043845296, |
|
"learning_rate": 1.2989801395598498e-06, |
|
"loss": 0.0, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 130.0048309178744, |
|
"grad_norm": 0.00015545626229140908, |
|
"learning_rate": 1.288244766505636e-06, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 130.00579710144928, |
|
"grad_norm": 0.00017510508769191802, |
|
"learning_rate": 1.2775093934514227e-06, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 130.00676328502416, |
|
"grad_norm": 0.00014436942001339048, |
|
"learning_rate": 1.266774020397209e-06, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 130.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5338480472564697, |
|
"eval_runtime": 21.879, |
|
"eval_samples_per_second": 3.519, |
|
"eval_steps_per_second": 0.914, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 131.0009661835749, |
|
"grad_norm": 0.0001512864837422967, |
|
"learning_rate": 1.2560386473429952e-06, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 131.00193236714975, |
|
"grad_norm": 0.0008248549420386553, |
|
"learning_rate": 1.2453032742887817e-06, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 131.00289855072464, |
|
"grad_norm": 0.00021860776178073138, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 131.00386473429953, |
|
"grad_norm": 0.00015075539704412222, |
|
"learning_rate": 1.2238325281803544e-06, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 131.0048309178744, |
|
"grad_norm": 0.00016689079347997904, |
|
"learning_rate": 1.2130971551261406e-06, |
|
"loss": 0.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 131.00579710144928, |
|
"grad_norm": 0.00032013689633458853, |
|
"learning_rate": 1.2023617820719273e-06, |
|
"loss": 0.0, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 131.00676328502416, |
|
"grad_norm": 0.0002864132111426443, |
|
"learning_rate": 1.1916264090177135e-06, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 131.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5488603115081787, |
|
"eval_runtime": 33.1783, |
|
"eval_samples_per_second": 2.321, |
|
"eval_steps_per_second": 0.603, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 132.0009661835749, |
|
"grad_norm": 0.00016558170318603516, |
|
"learning_rate": 1.1808910359634997e-06, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 132.00193236714975, |
|
"grad_norm": 0.0001805421052267775, |
|
"learning_rate": 1.1701556629092862e-06, |
|
"loss": 0.0, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 132.00289855072464, |
|
"grad_norm": 0.00022684381110593677, |
|
"learning_rate": 1.1594202898550726e-06, |
|
"loss": 0.0, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 132.00386473429953, |
|
"grad_norm": 0.00018865799938794225, |
|
"learning_rate": 1.1486849168008589e-06, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 132.0048309178744, |
|
"grad_norm": 0.0002286021481268108, |
|
"learning_rate": 1.1379495437466453e-06, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 132.00579710144928, |
|
"grad_norm": 0.0002894134959205985, |
|
"learning_rate": 1.1272141706924316e-06, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 132.00676328502416, |
|
"grad_norm": 0.00023053883342072368, |
|
"learning_rate": 1.116478797638218e-06, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 132.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5529676675796509, |
|
"eval_runtime": 22.5575, |
|
"eval_samples_per_second": 3.413, |
|
"eval_steps_per_second": 0.887, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 133.0009661835749, |
|
"grad_norm": 0.00038470380241051316, |
|
"learning_rate": 1.1057434245840045e-06, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 133.00193236714975, |
|
"grad_norm": 0.00019789740326814353, |
|
"learning_rate": 1.0950080515297907e-06, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 133.00289855072464, |
|
"grad_norm": 0.00023541324480902404, |
|
"learning_rate": 1.0842726784755772e-06, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 133.00386473429953, |
|
"grad_norm": 0.0025287040043622255, |
|
"learning_rate": 1.0735373054213636e-06, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 133.0048309178744, |
|
"grad_norm": 0.0004253005899954587, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 133.00579710144928, |
|
"grad_norm": 0.0001564955891808495, |
|
"learning_rate": 1.052066559312936e-06, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 133.00676328502416, |
|
"grad_norm": 0.0002814004838000983, |
|
"learning_rate": 1.0413311862587225e-06, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 133.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5586316585540771, |
|
"eval_runtime": 21.6913, |
|
"eval_samples_per_second": 3.55, |
|
"eval_steps_per_second": 0.922, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 134.0009661835749, |
|
"grad_norm": 0.00016957947809714824, |
|
"learning_rate": 1.030595813204509e-06, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 134.00193236714975, |
|
"grad_norm": 0.0001453941804356873, |
|
"learning_rate": 1.0198604401502952e-06, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 134.00289855072464, |
|
"grad_norm": 0.0002459472743794322, |
|
"learning_rate": 1.0091250670960817e-06, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 134.00386473429953, |
|
"grad_norm": 0.0001978687650989741, |
|
"learning_rate": 9.983896940418681e-07, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 134.0048309178744, |
|
"grad_norm": 0.0002101514401147142, |
|
"learning_rate": 9.876543209876544e-07, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 134.00579710144928, |
|
"grad_norm": 0.00020504761778283864, |
|
"learning_rate": 9.769189479334408e-07, |
|
"loss": 0.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 134.00676328502416, |
|
"grad_norm": 0.00021766134887002409, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 134.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.564234733581543, |
|
"eval_runtime": 20.8788, |
|
"eval_samples_per_second": 3.688, |
|
"eval_steps_per_second": 0.958, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 135.0009661835749, |
|
"grad_norm": 0.00020511502225417644, |
|
"learning_rate": 9.554482018250135e-07, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 135.00193236714975, |
|
"grad_norm": 0.001212193281389773, |
|
"learning_rate": 9.447128287707999e-07, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 135.00289855072464, |
|
"grad_norm": 0.0002920062397606671, |
|
"learning_rate": 9.339774557165862e-07, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 135.00386473429953, |
|
"grad_norm": 0.0001773640251485631, |
|
"learning_rate": 9.232420826623725e-07, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 135.0048309178744, |
|
"grad_norm": 0.0002293359866598621, |
|
"learning_rate": 9.12506709608159e-07, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 135.00579710144928, |
|
"grad_norm": 0.00026728856028057635, |
|
"learning_rate": 9.017713365539453e-07, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 135.00676328502416, |
|
"grad_norm": 0.00027180055622011423, |
|
"learning_rate": 8.910359634997316e-07, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 135.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5596168041229248, |
|
"eval_runtime": 21.4836, |
|
"eval_samples_per_second": 3.584, |
|
"eval_steps_per_second": 0.931, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 136.0009661835749, |
|
"grad_norm": 0.0005629212246276438, |
|
"learning_rate": 8.80300590445518e-07, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 136.00193236714975, |
|
"grad_norm": 0.000151635889778845, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 136.00289855072464, |
|
"grad_norm": 0.0002448921441100538, |
|
"learning_rate": 8.588298443370908e-07, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 136.00386473429953, |
|
"grad_norm": 0.008513822220265865, |
|
"learning_rate": 8.480944712828772e-07, |
|
"loss": 0.0, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 136.0048309178744, |
|
"grad_norm": 0.00017195107648149133, |
|
"learning_rate": 8.373590982286635e-07, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 136.00579710144928, |
|
"grad_norm": 0.00014387645933311433, |
|
"learning_rate": 8.2662372517445e-07, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 136.00676328502416, |
|
"grad_norm": 0.0002819636429194361, |
|
"learning_rate": 8.158883521202362e-07, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 136.00676328502416, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 1.5680683851242065, |
|
"eval_runtime": 19.9494, |
|
"eval_samples_per_second": 3.86, |
|
"eval_steps_per_second": 1.003, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 137.0009661835749, |
|
"grad_norm": 0.00016729481285437942, |
|
"learning_rate": 8.051529790660226e-07, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 137.00193236714975, |
|
"grad_norm": 0.0003245656844228506, |
|
"learning_rate": 7.94417606011809e-07, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 137.00289855072464, |
|
"grad_norm": 0.00014796361210756004, |
|
"learning_rate": 7.836822329575953e-07, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 137.00386473429953, |
|
"grad_norm": 633.5186767578125, |
|
"learning_rate": 7.729468599033817e-07, |
|
"loss": 0.0272, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 137.0048309178744, |
|
"grad_norm": 0.0002833566104527563, |
|
"learning_rate": 7.622114868491681e-07, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 137.00579710144928, |
|
"grad_norm": 0.00018804871069733053, |
|
"learning_rate": 7.514761137949545e-07, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 137.00676328502416, |
|
"grad_norm": 0.0010792305693030357, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 137.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.449820637702942, |
|
"eval_runtime": 20.7119, |
|
"eval_samples_per_second": 3.718, |
|
"eval_steps_per_second": 0.966, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 138.0009661835749, |
|
"grad_norm": 0.00046943960478529334, |
|
"learning_rate": 7.300053676865272e-07, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 138.00193236714975, |
|
"grad_norm": 0.002076608594506979, |
|
"learning_rate": 7.192699946323135e-07, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 138.00289855072464, |
|
"grad_norm": 0.0034116620663553476, |
|
"learning_rate": 7.085346215780999e-07, |
|
"loss": 0.0001, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 138.00386473429953, |
|
"grad_norm": 0.00017700823082122952, |
|
"learning_rate": 6.977992485238863e-07, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 138.0048309178744, |
|
"grad_norm": 0.0004598258819896728, |
|
"learning_rate": 6.870638754696727e-07, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 138.00579710144928, |
|
"grad_norm": 0.0019175253110006452, |
|
"learning_rate": 6.763285024154589e-07, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 138.00676328502416, |
|
"grad_norm": 0.00016673437494318932, |
|
"learning_rate": 6.655931293612454e-07, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 138.00676328502416, |
|
"eval_accuracy": 0.8311688311688312, |
|
"eval_loss": 1.6159300804138184, |
|
"eval_runtime": 19.9379, |
|
"eval_samples_per_second": 3.862, |
|
"eval_steps_per_second": 1.003, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 139.0009661835749, |
|
"grad_norm": 0.00019458989845588803, |
|
"learning_rate": 6.548577563070317e-07, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 139.00193236714975, |
|
"grad_norm": 0.0003470699302852154, |
|
"learning_rate": 6.44122383252818e-07, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 139.00289855072464, |
|
"grad_norm": 0.00048812173190526664, |
|
"learning_rate": 6.333870101986045e-07, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 139.00386473429953, |
|
"grad_norm": 0.0002671309339348227, |
|
"learning_rate": 6.226516371443908e-07, |
|
"loss": 0.0, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 139.0048309178744, |
|
"grad_norm": 0.0003757915983442217, |
|
"learning_rate": 6.119162640901772e-07, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 139.00579710144928, |
|
"grad_norm": 0.0009131430997513235, |
|
"learning_rate": 6.011808910359636e-07, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 139.00676328502416, |
|
"grad_norm": 0.0003928108490072191, |
|
"learning_rate": 5.904455179817499e-07, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 139.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.6949611902236938, |
|
"eval_runtime": 20.4828, |
|
"eval_samples_per_second": 3.759, |
|
"eval_steps_per_second": 0.976, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 140.0009661835749, |
|
"grad_norm": 0.00022178050130605698, |
|
"learning_rate": 5.797101449275363e-07, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 140.00193236714975, |
|
"grad_norm": 0.00025778423878364265, |
|
"learning_rate": 5.689747718733227e-07, |
|
"loss": 0.0, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 140.00289855072464, |
|
"grad_norm": 0.0012736802455037832, |
|
"learning_rate": 5.58239398819109e-07, |
|
"loss": 0.0, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 140.00386473429953, |
|
"grad_norm": 0.00027229098486714065, |
|
"learning_rate": 5.475040257648954e-07, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 140.0048309178744, |
|
"grad_norm": 0.00017557268438395113, |
|
"learning_rate": 5.367686527106818e-07, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 140.00579710144928, |
|
"grad_norm": 0.0002299802436027676, |
|
"learning_rate": 5.26033279656468e-07, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 140.00676328502416, |
|
"grad_norm": 0.00015179581532720476, |
|
"learning_rate": 5.152979066022545e-07, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 140.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.6978023052215576, |
|
"eval_runtime": 17.5663, |
|
"eval_samples_per_second": 4.383, |
|
"eval_steps_per_second": 1.139, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 141.0009661835749, |
|
"grad_norm": 0.0007337291608564556, |
|
"learning_rate": 5.045625335480408e-07, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 141.00193236714975, |
|
"grad_norm": 0.00023553297796752304, |
|
"learning_rate": 4.938271604938272e-07, |
|
"loss": 0.0, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 141.00289855072464, |
|
"grad_norm": 0.00022139857173897326, |
|
"learning_rate": 4.830917874396135e-07, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 141.00386473429953, |
|
"grad_norm": 0.0001859077747212723, |
|
"learning_rate": 4.7235641438539993e-07, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 141.0048309178744, |
|
"grad_norm": 0.00024027664039749652, |
|
"learning_rate": 4.616210413311863e-07, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 141.00579710144928, |
|
"grad_norm": 0.00047888257540762424, |
|
"learning_rate": 4.5088566827697267e-07, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 141.00676328502416, |
|
"grad_norm": 0.00018929631914943457, |
|
"learning_rate": 4.40150295222759e-07, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 141.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.6985359191894531, |
|
"eval_runtime": 14.7785, |
|
"eval_samples_per_second": 5.21, |
|
"eval_steps_per_second": 1.353, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 142.0009661835749, |
|
"grad_norm": 0.0006224442622624338, |
|
"learning_rate": 4.294149221685454e-07, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 142.00193236714975, |
|
"grad_norm": 0.00015741850074846298, |
|
"learning_rate": 4.1867954911433176e-07, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 142.00289855072464, |
|
"grad_norm": 0.0006889476208016276, |
|
"learning_rate": 4.079441760601181e-07, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 142.00386473429953, |
|
"grad_norm": 0.0005071698105894029, |
|
"learning_rate": 3.972088030059045e-07, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 142.0048309178744, |
|
"grad_norm": 0.00013677505194209516, |
|
"learning_rate": 3.8647342995169085e-07, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 142.00579710144928, |
|
"grad_norm": 0.0006237445049919188, |
|
"learning_rate": 3.7573805689747724e-07, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 142.00676328502416, |
|
"grad_norm": 0.00034121310454793274, |
|
"learning_rate": 3.650026838432636e-07, |
|
"loss": 0.0, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 142.00676328502416, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.6995044946670532, |
|
"eval_runtime": 15.0633, |
|
"eval_samples_per_second": 5.112, |
|
"eval_steps_per_second": 1.328, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 143.0009661835749, |
|
"grad_norm": 0.00015302658721338958, |
|
"learning_rate": 3.5426731078904993e-07, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 143.00193236714975, |
|
"grad_norm": 0.00034528967808000743, |
|
"learning_rate": 3.4353193773483633e-07, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 143.00289855072464, |
|
"grad_norm": 0.00026275747222825885, |
|
"learning_rate": 3.327965646806227e-07, |
|
"loss": 0.0, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 143.00386473429953, |
|
"grad_norm": 0.0006553785642609, |
|
"learning_rate": 3.22061191626409e-07, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 143.0048309178744, |
|
"grad_norm": 0.0002892035699915141, |
|
"learning_rate": 3.113258185721954e-07, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 143.00579710144928, |
|
"grad_norm": 0.0003178605402354151, |
|
"learning_rate": 3.005904455179818e-07, |
|
"loss": 0.0, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 143.00676328502416, |
|
"grad_norm": 0.00017081064288504422, |
|
"learning_rate": 2.8985507246376816e-07, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 143.00676328502416, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7036974430084229, |
|
"eval_runtime": 14.5168, |
|
"eval_samples_per_second": 5.304, |
|
"eval_steps_per_second": 1.378, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 144.0009661835749, |
|
"grad_norm": 0.00016073655569925904, |
|
"learning_rate": 2.791196994095545e-07, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 144.00193236714975, |
|
"grad_norm": 0.0004204573924653232, |
|
"learning_rate": 2.683843263553409e-07, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 144.00289855072464, |
|
"grad_norm": 0.0003274252521805465, |
|
"learning_rate": 2.5764895330112725e-07, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 144.00386473429953, |
|
"grad_norm": 0.0001679103879723698, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 144.0048309178744, |
|
"grad_norm": 0.00028845362248830497, |
|
"learning_rate": 2.3617820719269996e-07, |
|
"loss": 0.0, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 144.00579710144928, |
|
"grad_norm": 0.00044526177225634456, |
|
"learning_rate": 2.2544283413848634e-07, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 144.00676328502416, |
|
"grad_norm": 0.0007641853298991919, |
|
"learning_rate": 2.147074610842727e-07, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 144.00676328502416, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7055643796920776, |
|
"eval_runtime": 16.0115, |
|
"eval_samples_per_second": 4.809, |
|
"eval_steps_per_second": 1.249, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 145.0009661835749, |
|
"grad_norm": 0.0006160350749269128, |
|
"learning_rate": 2.0397208803005905e-07, |
|
"loss": 0.0, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 145.00193236714975, |
|
"grad_norm": 0.0005327651742845774, |
|
"learning_rate": 1.9323671497584542e-07, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 145.00289855072464, |
|
"grad_norm": 0.00017193098028656095, |
|
"learning_rate": 1.825013419216318e-07, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 145.00386473429953, |
|
"grad_norm": 0.00017320859478786588, |
|
"learning_rate": 1.7176596886741817e-07, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 145.0048309178744, |
|
"grad_norm": 0.0001653715589782223, |
|
"learning_rate": 1.610305958132045e-07, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 145.00579710144928, |
|
"grad_norm": 0.0001977159408852458, |
|
"learning_rate": 1.502952227589909e-07, |
|
"loss": 0.0, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 145.00676328502416, |
|
"grad_norm": 0.001278374344110489, |
|
"learning_rate": 1.3955984970477725e-07, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 145.00676328502416, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7054346799850464, |
|
"eval_runtime": 16.0538, |
|
"eval_samples_per_second": 4.796, |
|
"eval_steps_per_second": 1.246, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 146.0009661835749, |
|
"grad_norm": 0.0002044195425696671, |
|
"learning_rate": 1.2882447665056362e-07, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 146.00193236714975, |
|
"grad_norm": 0.00018759335216600448, |
|
"learning_rate": 1.1808910359634998e-07, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 146.00289855072464, |
|
"grad_norm": 0.0003087896038778126, |
|
"learning_rate": 1.0735373054213635e-07, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 146.00386473429953, |
|
"grad_norm": 0.0002801946538966149, |
|
"learning_rate": 9.661835748792271e-08, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 146.0048309178744, |
|
"grad_norm": 0.00012826151214540005, |
|
"learning_rate": 8.588298443370908e-08, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 146.00579710144928, |
|
"grad_norm": 0.0002482128038536757, |
|
"learning_rate": 7.514761137949545e-08, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 146.00676328502416, |
|
"grad_norm": 0.0003177243925165385, |
|
"learning_rate": 6.441223832528181e-08, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 146.00676328502416, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7054239511489868, |
|
"eval_runtime": 18.0904, |
|
"eval_samples_per_second": 4.256, |
|
"eval_steps_per_second": 1.106, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 147.0009661835749, |
|
"grad_norm": 0.0007799161248840392, |
|
"learning_rate": 5.367686527106818e-08, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 147.00193236714975, |
|
"grad_norm": 0.00712132453918457, |
|
"learning_rate": 4.294149221685454e-08, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 147.00289855072464, |
|
"grad_norm": 0.00016804441111162305, |
|
"learning_rate": 3.2206119162640906e-08, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 147.00386473429953, |
|
"grad_norm": 0.00015434020315296948, |
|
"learning_rate": 2.147074610842727e-08, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 147.0048309178744, |
|
"grad_norm": 0.4991607367992401, |
|
"learning_rate": 1.0735373054213635e-08, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"grad_norm": 0.00018625461962074041, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"eval_accuracy": 0.8051948051948052, |
|
"eval_loss": 1.7040551900863647, |
|
"eval_runtime": 18.6543, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 1.072, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"step": 10350, |
|
"total_flos": 1.8049752771658895e+20, |
|
"train_loss": 0.08939714265258908, |
|
"train_runtime": 20289.1727, |
|
"train_samples_per_second": 2.04, |
|
"train_steps_per_second": 0.51 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 1.1517490148544312, |
|
"eval_runtime": 20.1404, |
|
"eval_samples_per_second": 3.823, |
|
"eval_steps_per_second": 0.993, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 1.1517488956451416, |
|
"eval_runtime": 18.4764, |
|
"eval_samples_per_second": 4.167, |
|
"eval_steps_per_second": 1.082, |
|
"step": 10350 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8049752771658895e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|