|
{ |
|
"best_metric": 0.7837837837837838, |
|
"best_model_checkpoint": "MAE-CT-M1N0-M12_v8_split1_v3/checkpoint-568", |
|
"epoch": 147.006, |
|
"eval_steps": 500, |
|
"global_step": 10500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009523809523809524, |
|
"grad_norm": 1.495897650718689, |
|
"learning_rate": 9.523809523809525e-08, |
|
"loss": 0.6806, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0019047619047619048, |
|
"grad_norm": 2.5130839347839355, |
|
"learning_rate": 1.904761904761905e-07, |
|
"loss": 0.6831, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002857142857142857, |
|
"grad_norm": 1.683809518814087, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.6792, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0038095238095238095, |
|
"grad_norm": 3.095991849899292, |
|
"learning_rate": 3.80952380952381e-07, |
|
"loss": 0.6764, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004761904761904762, |
|
"grad_norm": 2.464594841003418, |
|
"learning_rate": 4.7619047619047623e-07, |
|
"loss": 0.6689, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005714285714285714, |
|
"grad_norm": 2.2026219367980957, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.6583, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006666666666666667, |
|
"grad_norm": 1.6293625831604004, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.6862, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0067619047619047615, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 0.6570932269096375, |
|
"eval_runtime": 17.5734, |
|
"eval_samples_per_second": 4.211, |
|
"eval_steps_per_second": 1.081, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.000857142857143, |
|
"grad_norm": 4.512332439422607, |
|
"learning_rate": 7.61904761904762e-07, |
|
"loss": 0.6868, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0018095238095237, |
|
"grad_norm": 4.540114402770996, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.6768, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0027619047619047, |
|
"grad_norm": 2.093035936355591, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 0.6615, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0037142857142858, |
|
"grad_norm": 3.182893991470337, |
|
"learning_rate": 1.0476190476190478e-06, |
|
"loss": 0.6468, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0046666666666666, |
|
"grad_norm": 2.688915491104126, |
|
"learning_rate": 1.142857142857143e-06, |
|
"loss": 0.6293, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0056190476190476, |
|
"grad_norm": 4.570864200592041, |
|
"learning_rate": 1.2380952380952382e-06, |
|
"loss": 0.643, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0065714285714287, |
|
"grad_norm": 5.001130104064941, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.665, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0067619047619047, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 0.6369683742523193, |
|
"eval_runtime": 17.5533, |
|
"eval_samples_per_second": 4.216, |
|
"eval_steps_per_second": 1.082, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.0007619047619047, |
|
"grad_norm": 3.4650774002075195, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.6296, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.001714285714286, |
|
"grad_norm": 6.552379131317139, |
|
"learning_rate": 1.523809523809524e-06, |
|
"loss": 0.6634, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.002666666666667, |
|
"grad_norm": 6.5488715171813965, |
|
"learning_rate": 1.6190476190476193e-06, |
|
"loss": 0.6709, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0036190476190474, |
|
"grad_norm": 9.810216903686523, |
|
"learning_rate": 1.7142857142857145e-06, |
|
"loss": 0.6067, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0045714285714284, |
|
"grad_norm": 12.946127891540527, |
|
"learning_rate": 1.8095238095238097e-06, |
|
"loss": 0.6301, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0055238095238095, |
|
"grad_norm": 17.473913192749023, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 0.5936, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0064761904761905, |
|
"grad_norm": 5.558756351470947, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7033, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.006761904761905, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 0.6253643035888672, |
|
"eval_runtime": 17.8289, |
|
"eval_samples_per_second": 4.151, |
|
"eval_steps_per_second": 1.066, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.0006666666666666, |
|
"grad_norm": 4.486657619476318, |
|
"learning_rate": 2.0952380952380955e-06, |
|
"loss": 0.6552, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0016190476190476, |
|
"grad_norm": 6.97869348526001, |
|
"learning_rate": 2.1904761904761908e-06, |
|
"loss": 0.5872, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0025714285714287, |
|
"grad_norm": 7.152403354644775, |
|
"learning_rate": 2.285714285714286e-06, |
|
"loss": 0.6369, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.0035238095238097, |
|
"grad_norm": 7.3148298263549805, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.5187, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0044761904761903, |
|
"grad_norm": 7.124256610870361, |
|
"learning_rate": 2.4761904761904764e-06, |
|
"loss": 0.6672, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.0054285714285713, |
|
"grad_norm": 6.500947952270508, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.6748, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.0063809523809524, |
|
"grad_norm": 10.486983299255371, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.6524, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.006761904761905, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 0.6090587377548218, |
|
"eval_runtime": 16.4336, |
|
"eval_samples_per_second": 4.503, |
|
"eval_steps_per_second": 1.156, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.000571428571429, |
|
"grad_norm": 10.162851333618164, |
|
"learning_rate": 2.7619047619047625e-06, |
|
"loss": 0.6116, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.0015238095238095, |
|
"grad_norm": 6.945367336273193, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.6209, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.00247619047619, |
|
"grad_norm": 8.757481575012207, |
|
"learning_rate": 2.9523809523809525e-06, |
|
"loss": 0.595, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.003428571428572, |
|
"grad_norm": 6.791728496551514, |
|
"learning_rate": 3.047619047619048e-06, |
|
"loss": 0.5939, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.004380952380952, |
|
"grad_norm": 11.641304969787598, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.7177, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.005333333333334, |
|
"grad_norm": 8.894887924194336, |
|
"learning_rate": 3.2380952380952385e-06, |
|
"loss": 0.5451, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.006285714285714, |
|
"grad_norm": 10.148953437805176, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.5611, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.0067619047619045, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 0.5565000176429749, |
|
"eval_runtime": 16.1354, |
|
"eval_samples_per_second": 4.586, |
|
"eval_steps_per_second": 1.178, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 5.00047619047619, |
|
"grad_norm": 16.05340576171875, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.5934, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.001428571428572, |
|
"grad_norm": 21.09893798828125, |
|
"learning_rate": 3.523809523809524e-06, |
|
"loss": 0.599, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.002380952380952, |
|
"grad_norm": 18.13679313659668, |
|
"learning_rate": 3.6190476190476194e-06, |
|
"loss": 0.5421, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.003333333333333, |
|
"grad_norm": 12.51164722442627, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.5152, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.0042857142857144, |
|
"grad_norm": 23.869234085083008, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 0.4998, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.005238095238095, |
|
"grad_norm": 14.866220474243164, |
|
"learning_rate": 3.9047619047619055e-06, |
|
"loss": 0.5252, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.0061904761904765, |
|
"grad_norm": 9.281255722045898, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.4274, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.0067619047619045, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 0.5154068470001221, |
|
"eval_runtime": 16.3563, |
|
"eval_samples_per_second": 4.524, |
|
"eval_steps_per_second": 1.162, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 6.000380952380953, |
|
"grad_norm": 8.937905311584473, |
|
"learning_rate": 4.095238095238096e-06, |
|
"loss": 0.6578, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.001333333333333, |
|
"grad_norm": 10.171916007995605, |
|
"learning_rate": 4.190476190476191e-06, |
|
"loss": 0.5081, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.002285714285715, |
|
"grad_norm": 23.720195770263672, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.5327, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.003238095238095, |
|
"grad_norm": 19.935134887695312, |
|
"learning_rate": 4.3809523809523815e-06, |
|
"loss": 0.4616, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.004190476190476, |
|
"grad_norm": 13.191886901855469, |
|
"learning_rate": 4.476190476190477e-06, |
|
"loss": 0.3997, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.005142857142857, |
|
"grad_norm": 34.281044006347656, |
|
"learning_rate": 4.571428571428572e-06, |
|
"loss": 0.5115, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.006095238095238, |
|
"grad_norm": 15.229226112365723, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.4797, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.0067619047619045, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 0.5644029378890991, |
|
"eval_runtime": 15.9353, |
|
"eval_samples_per_second": 4.644, |
|
"eval_steps_per_second": 1.192, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 7.000285714285714, |
|
"grad_norm": 12.772289276123047, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.3397, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0012380952380955, |
|
"grad_norm": 30.241724014282227, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.4096, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.002190476190476, |
|
"grad_norm": 28.286476135253906, |
|
"learning_rate": 4.952380952380953e-06, |
|
"loss": 0.4814, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.003142857142858, |
|
"grad_norm": 43.063167572021484, |
|
"learning_rate": 5.047619047619048e-06, |
|
"loss": 0.4013, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.004095238095238, |
|
"grad_norm": 12.676410675048828, |
|
"learning_rate": 5.142857142857142e-06, |
|
"loss": 0.5215, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.005047619047619, |
|
"grad_norm": 33.401485443115234, |
|
"learning_rate": 5.2380952380952384e-06, |
|
"loss": 0.3229, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.006, |
|
"grad_norm": 29.031524658203125, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.3758, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.0067619047619045, |
|
"eval_accuracy": 0.7837837837837838, |
|
"eval_loss": 0.49420419335365295, |
|
"eval_runtime": 15.2009, |
|
"eval_samples_per_second": 4.868, |
|
"eval_steps_per_second": 1.25, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 8.000190476190475, |
|
"grad_norm": 60.28892517089844, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.3936, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.001142857142858, |
|
"grad_norm": 0.988732635974884, |
|
"learning_rate": 5.523809523809525e-06, |
|
"loss": 0.3985, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.002095238095238, |
|
"grad_norm": 22.0004940032959, |
|
"learning_rate": 5.619047619047619e-06, |
|
"loss": 0.8432, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.003047619047619, |
|
"grad_norm": 6.368250846862793, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.3863, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.004, |
|
"grad_norm": 44.51639175415039, |
|
"learning_rate": 5.8095238095238106e-06, |
|
"loss": 0.2897, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.00495238095238, |
|
"grad_norm": 48.01280975341797, |
|
"learning_rate": 5.904761904761905e-06, |
|
"loss": 0.2577, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.005904761904763, |
|
"grad_norm": 37.27678680419922, |
|
"learning_rate": 6e-06, |
|
"loss": 0.4243, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.006761904761905, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 0.525236189365387, |
|
"eval_runtime": 15.1435, |
|
"eval_samples_per_second": 4.887, |
|
"eval_steps_per_second": 1.255, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 9.000095238095238, |
|
"grad_norm": 15.225837707519531, |
|
"learning_rate": 6.095238095238096e-06, |
|
"loss": 0.3945, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.001047619047618, |
|
"grad_norm": 15.552347183227539, |
|
"learning_rate": 6.1904761904761914e-06, |
|
"loss": 0.4775, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.002, |
|
"grad_norm": 47.491615295410156, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.3006, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.002952380952381, |
|
"grad_norm": 37.66072463989258, |
|
"learning_rate": 6.380952380952381e-06, |
|
"loss": 0.5796, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.003904761904762, |
|
"grad_norm": 53.462364196777344, |
|
"learning_rate": 6.476190476190477e-06, |
|
"loss": 0.473, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.004857142857142, |
|
"grad_norm": 5.171151638031006, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.2178, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.005809523809523, |
|
"grad_norm": 41.881038665771484, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.5717, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.006761904761905, |
|
"grad_norm": 319.6321716308594, |
|
"learning_rate": 6.761904761904763e-06, |
|
"loss": 0.5133, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 9.006761904761905, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 0.6872884631156921, |
|
"eval_runtime": 15.7818, |
|
"eval_samples_per_second": 4.689, |
|
"eval_steps_per_second": 1.204, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.00095238095238, |
|
"grad_norm": 32.255435943603516, |
|
"learning_rate": 6.857142857142858e-06, |
|
"loss": 0.3737, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.001904761904761, |
|
"grad_norm": 16.93897819519043, |
|
"learning_rate": 6.952380952380952e-06, |
|
"loss": 0.211, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.002857142857144, |
|
"grad_norm": 59.26026153564453, |
|
"learning_rate": 7.047619047619048e-06, |
|
"loss": 0.3487, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.003809523809524, |
|
"grad_norm": 1.3887319564819336, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.1528, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.004761904761905, |
|
"grad_norm": 4.799934387207031, |
|
"learning_rate": 7.238095238095239e-06, |
|
"loss": 0.3359, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.005714285714285, |
|
"grad_norm": 60.12313461303711, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.4078, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 10.006666666666666, |
|
"grad_norm": 86.35945892333984, |
|
"learning_rate": 7.428571428571429e-06, |
|
"loss": 0.3709, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 10.006761904761905, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 0.6554831862449646, |
|
"eval_runtime": 15.3209, |
|
"eval_samples_per_second": 4.83, |
|
"eval_steps_per_second": 1.24, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 11.000857142857143, |
|
"grad_norm": 6.803892612457275, |
|
"learning_rate": 7.523809523809524e-06, |
|
"loss": 0.1958, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.001809523809523, |
|
"grad_norm": 101.62811279296875, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 0.205, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.002761904761904, |
|
"grad_norm": 41.35865783691406, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.3161, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.003714285714286, |
|
"grad_norm": 5.086019992828369, |
|
"learning_rate": 7.809523809523811e-06, |
|
"loss": 0.2116, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.004666666666667, |
|
"grad_norm": 79.2950210571289, |
|
"learning_rate": 7.904761904761904e-06, |
|
"loss": 0.3454, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 11.005619047619048, |
|
"grad_norm": 6.022214412689209, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.1427, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 11.006571428571428, |
|
"grad_norm": 5.330665588378906, |
|
"learning_rate": 8.095238095238097e-06, |
|
"loss": 0.2793, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 11.006761904761905, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 0.7139844298362732, |
|
"eval_runtime": 14.3175, |
|
"eval_samples_per_second": 5.169, |
|
"eval_steps_per_second": 1.327, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 12.000761904761905, |
|
"grad_norm": 3.918455123901367, |
|
"learning_rate": 8.190476190476192e-06, |
|
"loss": 0.1305, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.001714285714286, |
|
"grad_norm": 208.3162384033203, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.2397, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.002666666666666, |
|
"grad_norm": 90.90999603271484, |
|
"learning_rate": 8.380952380952382e-06, |
|
"loss": 0.1608, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.003619047619047, |
|
"grad_norm": 106.05206298828125, |
|
"learning_rate": 8.476190476190477e-06, |
|
"loss": 0.673, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 12.00457142857143, |
|
"grad_norm": 72.45829772949219, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.5077, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.00552380952381, |
|
"grad_norm": 124.6170425415039, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.2456, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 12.00647619047619, |
|
"grad_norm": 36.30199432373047, |
|
"learning_rate": 8.761904761904763e-06, |
|
"loss": 0.6153, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 12.006761904761905, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 1.3005902767181396, |
|
"eval_runtime": 13.5492, |
|
"eval_samples_per_second": 5.462, |
|
"eval_steps_per_second": 1.402, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 13.000666666666667, |
|
"grad_norm": 0.5888111591339111, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.1448, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.001619047619048, |
|
"grad_norm": 72.55310821533203, |
|
"learning_rate": 8.952380952380953e-06, |
|
"loss": 0.5699, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.002571428571429, |
|
"grad_norm": 21.676172256469727, |
|
"learning_rate": 9.047619047619049e-06, |
|
"loss": 0.5474, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.00352380952381, |
|
"grad_norm": 14.727178573608398, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.2944, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 13.00447619047619, |
|
"grad_norm": 62.19205856323242, |
|
"learning_rate": 9.238095238095239e-06, |
|
"loss": 0.5067, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 13.005428571428572, |
|
"grad_norm": 36.74239730834961, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.1512, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 13.006380952380953, |
|
"grad_norm": 162.0737762451172, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.7185, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 13.006761904761905, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 1.6663051843643188, |
|
"eval_runtime": 13.7203, |
|
"eval_samples_per_second": 5.393, |
|
"eval_steps_per_second": 1.385, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 14.000571428571428, |
|
"grad_norm": 4.640463352203369, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.508, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.00152380952381, |
|
"grad_norm": 99.73812103271484, |
|
"learning_rate": 9.61904761904762e-06, |
|
"loss": 0.4121, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.002476190476191, |
|
"grad_norm": 82.50711822509766, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.5418, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.003428571428572, |
|
"grad_norm": 8.718976974487305, |
|
"learning_rate": 9.80952380952381e-06, |
|
"loss": 0.3846, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 14.004380952380952, |
|
"grad_norm": 0.05186394974589348, |
|
"learning_rate": 9.904761904761906e-06, |
|
"loss": 0.084, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 14.005333333333333, |
|
"grad_norm": 149.41354370117188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5234, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.006285714285715, |
|
"grad_norm": 28.37025260925293, |
|
"learning_rate": 9.989417989417989e-06, |
|
"loss": 0.4609, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 14.006761904761905, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.3522089719772339, |
|
"eval_runtime": 13.8555, |
|
"eval_samples_per_second": 5.341, |
|
"eval_steps_per_second": 1.371, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 15.00047619047619, |
|
"grad_norm": 3.3527989387512207, |
|
"learning_rate": 9.97883597883598e-06, |
|
"loss": 0.0314, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 15.001428571428571, |
|
"grad_norm": 2.5399253368377686, |
|
"learning_rate": 9.968253968253969e-06, |
|
"loss": 0.6274, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 15.002380952380953, |
|
"grad_norm": 73.98735046386719, |
|
"learning_rate": 9.957671957671959e-06, |
|
"loss": 0.2495, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 15.003333333333334, |
|
"grad_norm": 0.07959811389446259, |
|
"learning_rate": 9.947089947089947e-06, |
|
"loss": 0.2372, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.004285714285714, |
|
"grad_norm": 0.023227743804454803, |
|
"learning_rate": 9.936507936507937e-06, |
|
"loss": 0.0695, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 15.005238095238095, |
|
"grad_norm": 0.45850658416748047, |
|
"learning_rate": 9.925925925925927e-06, |
|
"loss": 0.3362, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 15.006190476190476, |
|
"grad_norm": 110.30821228027344, |
|
"learning_rate": 9.915343915343916e-06, |
|
"loss": 0.236, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 15.006761904761905, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.2227578163146973, |
|
"eval_runtime": 13.4678, |
|
"eval_samples_per_second": 5.495, |
|
"eval_steps_per_second": 1.411, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 16.00038095238095, |
|
"grad_norm": 0.26170384883880615, |
|
"learning_rate": 9.904761904761906e-06, |
|
"loss": 0.1766, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 16.001333333333335, |
|
"grad_norm": 0.05248340964317322, |
|
"learning_rate": 9.894179894179896e-06, |
|
"loss": 0.1428, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 16.002285714285716, |
|
"grad_norm": 0.059930965304374695, |
|
"learning_rate": 9.883597883597884e-06, |
|
"loss": 0.1355, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 16.003238095238096, |
|
"grad_norm": 0.021113038063049316, |
|
"learning_rate": 9.873015873015874e-06, |
|
"loss": 0.3233, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 16.004190476190477, |
|
"grad_norm": 0.026357360184192657, |
|
"learning_rate": 9.862433862433864e-06, |
|
"loss": 0.463, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 16.005142857142857, |
|
"grad_norm": 0.10321231931447983, |
|
"learning_rate": 9.851851851851852e-06, |
|
"loss": 0.0118, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 16.006095238095238, |
|
"grad_norm": 0.0149616077542305, |
|
"learning_rate": 9.841269841269842e-06, |
|
"loss": 0.0519, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.006761904761905, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 1.0972933769226074, |
|
"eval_runtime": 13.5211, |
|
"eval_samples_per_second": 5.473, |
|
"eval_steps_per_second": 1.405, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 17.000285714285713, |
|
"grad_norm": 0.028757641091942787, |
|
"learning_rate": 9.830687830687832e-06, |
|
"loss": 0.259, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 17.001238095238094, |
|
"grad_norm": 20.525066375732422, |
|
"learning_rate": 9.82010582010582e-06, |
|
"loss": 0.037, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 17.002190476190478, |
|
"grad_norm": 0.12178980559110641, |
|
"learning_rate": 9.80952380952381e-06, |
|
"loss": 0.2156, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 17.00314285714286, |
|
"grad_norm": 25.35184097290039, |
|
"learning_rate": 9.7989417989418e-06, |
|
"loss": 0.181, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 17.00409523809524, |
|
"grad_norm": 27.569835662841797, |
|
"learning_rate": 9.788359788359789e-06, |
|
"loss": 0.2445, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.00504761904762, |
|
"grad_norm": 0.11105721443891525, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.2643, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 17.006, |
|
"grad_norm": 3.3096115589141846, |
|
"learning_rate": 9.767195767195769e-06, |
|
"loss": 0.0026, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 17.006761904761905, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 1.4475635290145874, |
|
"eval_runtime": 13.5861, |
|
"eval_samples_per_second": 5.447, |
|
"eval_steps_per_second": 1.398, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 18.000190476190475, |
|
"grad_norm": 206.65469360351562, |
|
"learning_rate": 9.756613756613757e-06, |
|
"loss": 0.4347, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 18.001142857142856, |
|
"grad_norm": 0.15358828008174896, |
|
"learning_rate": 9.746031746031747e-06, |
|
"loss": 0.0009, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 18.002095238095237, |
|
"grad_norm": 109.42031860351562, |
|
"learning_rate": 9.735449735449735e-06, |
|
"loss": 0.1551, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.00304761904762, |
|
"grad_norm": 0.013478557579219341, |
|
"learning_rate": 9.724867724867725e-06, |
|
"loss": 0.1445, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.004, |
|
"grad_norm": 0.012817839160561562, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.0023, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 18.004952380952382, |
|
"grad_norm": 0.17729417979717255, |
|
"learning_rate": 9.703703703703703e-06, |
|
"loss": 0.0118, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 18.005904761904763, |
|
"grad_norm": 0.5205228328704834, |
|
"learning_rate": 9.693121693121693e-06, |
|
"loss": 0.357, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 18.006761904761905, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 1.448710560798645, |
|
"eval_runtime": 13.6656, |
|
"eval_samples_per_second": 5.415, |
|
"eval_steps_per_second": 1.39, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 19.000095238095238, |
|
"grad_norm": 0.02125757932662964, |
|
"learning_rate": 9.682539682539683e-06, |
|
"loss": 0.0994, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 19.00104761904762, |
|
"grad_norm": 0.006553493440151215, |
|
"learning_rate": 9.671957671957672e-06, |
|
"loss": 0.3126, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 19.002, |
|
"grad_norm": 0.07267153263092041, |
|
"learning_rate": 9.661375661375663e-06, |
|
"loss": 0.4631, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 19.00295238095238, |
|
"grad_norm": 0.1298568695783615, |
|
"learning_rate": 9.650793650793652e-06, |
|
"loss": 0.1857, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 19.003904761904764, |
|
"grad_norm": 163.23121643066406, |
|
"learning_rate": 9.64021164021164e-06, |
|
"loss": 0.0433, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 19.004857142857144, |
|
"grad_norm": 187.11117553710938, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.1836, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.005809523809525, |
|
"grad_norm": 193.7169189453125, |
|
"learning_rate": 9.61904761904762e-06, |
|
"loss": 0.104, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 19.006761904761905, |
|
"grad_norm": 0.1640291064977646, |
|
"learning_rate": 9.60846560846561e-06, |
|
"loss": 0.4262, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 19.006761904761905, |
|
"eval_accuracy": 0.7837837837837838, |
|
"eval_loss": 1.1604030132293701, |
|
"eval_runtime": 14.6483, |
|
"eval_samples_per_second": 5.052, |
|
"eval_steps_per_second": 1.297, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 20.00095238095238, |
|
"grad_norm": 0.09950070828199387, |
|
"learning_rate": 9.597883597883598e-06, |
|
"loss": 0.282, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 20.00190476190476, |
|
"grad_norm": 0.9464425444602966, |
|
"learning_rate": 9.587301587301588e-06, |
|
"loss": 0.283, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 20.002857142857142, |
|
"grad_norm": 0.007702260743826628, |
|
"learning_rate": 9.576719576719578e-06, |
|
"loss": 0.191, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.003809523809522, |
|
"grad_norm": 0.018454020842909813, |
|
"learning_rate": 9.566137566137567e-06, |
|
"loss": 0.023, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 20.004761904761907, |
|
"grad_norm": 0.07745273411273956, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.1185, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 20.005714285714287, |
|
"grad_norm": 0.00807939562946558, |
|
"learning_rate": 9.544973544973546e-06, |
|
"loss": 0.0006, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 20.006666666666668, |
|
"grad_norm": 0.011443381197750568, |
|
"learning_rate": 9.534391534391535e-06, |
|
"loss": 0.0021, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 20.006761904761905, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 1.771959900856018, |
|
"eval_runtime": 14.7868, |
|
"eval_samples_per_second": 5.004, |
|
"eval_steps_per_second": 1.285, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 21.000857142857143, |
|
"grad_norm": 1.0574586391448975, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.0004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.001809523809523, |
|
"grad_norm": 0.1903315633535385, |
|
"learning_rate": 9.513227513227515e-06, |
|
"loss": 0.2308, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 21.002761904761904, |
|
"grad_norm": 0.011690843850374222, |
|
"learning_rate": 9.502645502645503e-06, |
|
"loss": 0.0002, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 21.003714285714285, |
|
"grad_norm": 0.03922194615006447, |
|
"learning_rate": 9.492063492063493e-06, |
|
"loss": 0.2452, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 21.004666666666665, |
|
"grad_norm": 0.013964025303721428, |
|
"learning_rate": 9.481481481481483e-06, |
|
"loss": 0.0625, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 21.005619047619046, |
|
"grad_norm": 0.022155698388814926, |
|
"learning_rate": 9.470899470899471e-06, |
|
"loss": 0.0013, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 21.00657142857143, |
|
"grad_norm": 0.0031711491756141186, |
|
"learning_rate": 9.460317460317461e-06, |
|
"loss": 0.0132, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 21.006761904761905, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.7387584447860718, |
|
"eval_runtime": 14.2802, |
|
"eval_samples_per_second": 5.182, |
|
"eval_steps_per_second": 1.331, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 22.000761904761905, |
|
"grad_norm": 0.003926535602658987, |
|
"learning_rate": 9.449735449735451e-06, |
|
"loss": 0.0005, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 22.001714285714286, |
|
"grad_norm": 0.6359225511550903, |
|
"learning_rate": 9.43915343915344e-06, |
|
"loss": 0.0741, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 22.002666666666666, |
|
"grad_norm": 0.010339989326894283, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.1656, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 22.003619047619047, |
|
"grad_norm": 0.007642359938472509, |
|
"learning_rate": 9.417989417989418e-06, |
|
"loss": 0.0215, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.004571428571428, |
|
"grad_norm": 0.006055203732103109, |
|
"learning_rate": 9.407407407407408e-06, |
|
"loss": 0.0006, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 22.005523809523808, |
|
"grad_norm": 0.01843923330307007, |
|
"learning_rate": 9.396825396825398e-06, |
|
"loss": 0.2335, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 22.00647619047619, |
|
"grad_norm": 0.009593057446181774, |
|
"learning_rate": 9.386243386243386e-06, |
|
"loss": 0.1451, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 22.006761904761905, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 1.7954270839691162, |
|
"eval_runtime": 14.1525, |
|
"eval_samples_per_second": 5.229, |
|
"eval_steps_per_second": 1.343, |
|
"step": 1633 |
|
}, |
|
{ |
|
"epoch": 23.000666666666667, |
|
"grad_norm": 0.0035956420470029116, |
|
"learning_rate": 9.375661375661376e-06, |
|
"loss": 0.0002, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 23.001619047619048, |
|
"grad_norm": 0.006967821158468723, |
|
"learning_rate": 9.365079365079366e-06, |
|
"loss": 0.0006, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 23.00257142857143, |
|
"grad_norm": 0.00322016142308712, |
|
"learning_rate": 9.354497354497354e-06, |
|
"loss": 0.0927, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 23.00352380952381, |
|
"grad_norm": 81.37845611572266, |
|
"learning_rate": 9.343915343915344e-06, |
|
"loss": 0.0083, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 23.00447619047619, |
|
"grad_norm": 0.010911311022937298, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.0152, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 23.00542857142857, |
|
"grad_norm": 181.4998779296875, |
|
"learning_rate": 9.322751322751323e-06, |
|
"loss": 0.0175, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 23.00638095238095, |
|
"grad_norm": 0.01505933329463005, |
|
"learning_rate": 9.312169312169313e-06, |
|
"loss": 0.0099, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 23.006761904761905, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.161924123764038, |
|
"eval_runtime": 14.9741, |
|
"eval_samples_per_second": 4.942, |
|
"eval_steps_per_second": 1.269, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 24.00057142857143, |
|
"grad_norm": 0.12847761809825897, |
|
"learning_rate": 9.301587301587303e-06, |
|
"loss": 0.1637, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 24.00152380952381, |
|
"grad_norm": 0.03343566879630089, |
|
"learning_rate": 9.291005291005291e-06, |
|
"loss": 0.0007, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 24.00247619047619, |
|
"grad_norm": 0.0027026699390262365, |
|
"learning_rate": 9.280423280423281e-06, |
|
"loss": 0.0002, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 24.00342857142857, |
|
"grad_norm": 0.008595957420766354, |
|
"learning_rate": 9.26984126984127e-06, |
|
"loss": 0.3226, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 24.004380952380952, |
|
"grad_norm": 0.004393964074552059, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 0.0001, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.005333333333333, |
|
"grad_norm": 0.0036259551998227835, |
|
"learning_rate": 9.248677248677249e-06, |
|
"loss": 0.0002, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 24.006285714285713, |
|
"grad_norm": 0.00916161946952343, |
|
"learning_rate": 9.238095238095239e-06, |
|
"loss": 0.0001, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 24.006761904761905, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.6523562669754028, |
|
"eval_runtime": 14.6247, |
|
"eval_samples_per_second": 5.06, |
|
"eval_steps_per_second": 1.299, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 25.000476190476192, |
|
"grad_norm": 0.005314418114721775, |
|
"learning_rate": 9.227513227513229e-06, |
|
"loss": 0.0001, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 25.001428571428573, |
|
"grad_norm": 0.003808894893154502, |
|
"learning_rate": 9.216931216931217e-06, |
|
"loss": 0.2069, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 25.002380952380953, |
|
"grad_norm": 0.0019496126333251595, |
|
"learning_rate": 9.206349206349207e-06, |
|
"loss": 0.1818, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 25.003333333333334, |
|
"grad_norm": 55.36027908325195, |
|
"learning_rate": 9.195767195767197e-06, |
|
"loss": 0.2119, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 25.004285714285714, |
|
"grad_norm": 0.05727091431617737, |
|
"learning_rate": 9.185185185185186e-06, |
|
"loss": 0.0027, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 25.005238095238095, |
|
"grad_norm": 0.006919063627719879, |
|
"learning_rate": 9.174603174603176e-06, |
|
"loss": 0.058, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 25.006190476190476, |
|
"grad_norm": 0.41044607758522034, |
|
"learning_rate": 9.164021164021166e-06, |
|
"loss": 0.0005, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 25.006761904761905, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.849861741065979, |
|
"eval_runtime": 14.846, |
|
"eval_samples_per_second": 4.985, |
|
"eval_steps_per_second": 1.28, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 26.00038095238095, |
|
"grad_norm": 0.007502442691475153, |
|
"learning_rate": 9.153439153439154e-06, |
|
"loss": 0.0048, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 26.001333333333335, |
|
"grad_norm": 194.1666259765625, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.2684, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 26.002285714285716, |
|
"grad_norm": 0.04846418648958206, |
|
"learning_rate": 9.132275132275134e-06, |
|
"loss": 0.0003, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 26.003238095238096, |
|
"grad_norm": 0.0025523772928863764, |
|
"learning_rate": 9.121693121693122e-06, |
|
"loss": 0.0312, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 26.004190476190477, |
|
"grad_norm": 0.003015185473486781, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.0001, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 26.005142857142857, |
|
"grad_norm": 29.853809356689453, |
|
"learning_rate": 9.1005291005291e-06, |
|
"loss": 0.0028, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.006095238095238, |
|
"grad_norm": 0.006052908953279257, |
|
"learning_rate": 9.08994708994709e-06, |
|
"loss": 0.0388, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 26.006761904761905, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 1.8791685104370117, |
|
"eval_runtime": 14.4914, |
|
"eval_samples_per_second": 5.106, |
|
"eval_steps_per_second": 1.311, |
|
"step": 1917 |
|
}, |
|
{ |
|
"epoch": 27.000285714285713, |
|
"grad_norm": 0.003093892941251397, |
|
"learning_rate": 9.07936507936508e-06, |
|
"loss": 0.4347, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 27.001238095238094, |
|
"grad_norm": 0.006142797879874706, |
|
"learning_rate": 9.068783068783069e-06, |
|
"loss": 0.0019, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 27.002190476190478, |
|
"grad_norm": 272.3396301269531, |
|
"learning_rate": 9.058201058201059e-06, |
|
"loss": 0.2855, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 27.00314285714286, |
|
"grad_norm": 0.0017996703973039985, |
|
"learning_rate": 9.047619047619049e-06, |
|
"loss": 0.2704, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 27.00409523809524, |
|
"grad_norm": 0.03826872631907463, |
|
"learning_rate": 9.037037037037037e-06, |
|
"loss": 0.1214, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 27.00504761904762, |
|
"grad_norm": 15.44973087310791, |
|
"learning_rate": 9.026455026455027e-06, |
|
"loss": 0.17, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 27.006, |
|
"grad_norm": 2.6301112174987793, |
|
"learning_rate": 9.015873015873017e-06, |
|
"loss": 0.1798, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 27.006761904761905, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 1.2950594425201416, |
|
"eval_runtime": 14.8518, |
|
"eval_samples_per_second": 4.983, |
|
"eval_steps_per_second": 1.279, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 28.000190476190475, |
|
"grad_norm": 0.3684341013431549, |
|
"learning_rate": 9.005291005291005e-06, |
|
"loss": 0.0012, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 28.001142857142856, |
|
"grad_norm": 0.007811195217072964, |
|
"learning_rate": 8.994708994708995e-06, |
|
"loss": 0.0002, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.002095238095237, |
|
"grad_norm": 0.016496405005455017, |
|
"learning_rate": 8.984126984126985e-06, |
|
"loss": 0.0557, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 28.00304761904762, |
|
"grad_norm": 0.014805680140852928, |
|
"learning_rate": 8.973544973544973e-06, |
|
"loss": 0.0223, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 28.004, |
|
"grad_norm": 16.467445373535156, |
|
"learning_rate": 8.962962962962963e-06, |
|
"loss": 0.0051, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 28.004952380952382, |
|
"grad_norm": 237.7123260498047, |
|
"learning_rate": 8.952380952380953e-06, |
|
"loss": 0.9651, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 28.005904761904763, |
|
"grad_norm": 0.03144896402955055, |
|
"learning_rate": 8.941798941798942e-06, |
|
"loss": 0.2354, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 28.006761904761905, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.5408201217651367, |
|
"eval_runtime": 15.1773, |
|
"eval_samples_per_second": 4.876, |
|
"eval_steps_per_second": 1.252, |
|
"step": 2059 |
|
}, |
|
{ |
|
"epoch": 29.000095238095238, |
|
"grad_norm": 0.04290107265114784, |
|
"learning_rate": 8.931216931216932e-06, |
|
"loss": 0.0665, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 29.00104761904762, |
|
"grad_norm": 0.009639314375817776, |
|
"learning_rate": 8.920634920634922e-06, |
|
"loss": 0.1078, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 29.002, |
|
"grad_norm": 0.004252797923982143, |
|
"learning_rate": 8.910052910052912e-06, |
|
"loss": 0.1316, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 29.00295238095238, |
|
"grad_norm": 0.16179914772510529, |
|
"learning_rate": 8.8994708994709e-06, |
|
"loss": 0.0026, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 29.003904761904764, |
|
"grad_norm": 0.24769946932792664, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.004857142857144, |
|
"grad_norm": 0.16514120995998383, |
|
"learning_rate": 8.87830687830688e-06, |
|
"loss": 0.4554, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 29.005809523809525, |
|
"grad_norm": 0.010989787988364697, |
|
"learning_rate": 8.867724867724868e-06, |
|
"loss": 0.0976, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 29.006761904761905, |
|
"grad_norm": 0.005337660200893879, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.0024, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 29.006761904761905, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.9223977327346802, |
|
"eval_runtime": 15.8736, |
|
"eval_samples_per_second": 4.662, |
|
"eval_steps_per_second": 1.197, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 30.00095238095238, |
|
"grad_norm": 0.07220576703548431, |
|
"learning_rate": 8.846560846560848e-06, |
|
"loss": 0.0922, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 30.00190476190476, |
|
"grad_norm": 0.008359185419976711, |
|
"learning_rate": 8.835978835978837e-06, |
|
"loss": 0.1279, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 30.002857142857142, |
|
"grad_norm": 0.016845321282744408, |
|
"learning_rate": 8.825396825396827e-06, |
|
"loss": 0.0001, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 30.003809523809522, |
|
"grad_norm": 0.08929844200611115, |
|
"learning_rate": 8.814814814814817e-06, |
|
"loss": 0.0452, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 30.004761904761907, |
|
"grad_norm": 0.004775241948664188, |
|
"learning_rate": 8.804232804232805e-06, |
|
"loss": 0.0145, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 30.005714285714287, |
|
"grad_norm": 0.00836123526096344, |
|
"learning_rate": 8.793650793650795e-06, |
|
"loss": 0.0796, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 30.006666666666668, |
|
"grad_norm": 0.0014215363189578056, |
|
"learning_rate": 8.783068783068783e-06, |
|
"loss": 0.0018, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 30.006761904761905, |
|
"eval_accuracy": 0.6486486486486487, |
|
"eval_loss": 2.5244226455688477, |
|
"eval_runtime": 15.1839, |
|
"eval_samples_per_second": 4.874, |
|
"eval_steps_per_second": 1.251, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 31.000857142857143, |
|
"grad_norm": 0.007338838651776314, |
|
"learning_rate": 8.772486772486773e-06, |
|
"loss": 0.0004, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 31.001809523809523, |
|
"grad_norm": 0.010591942816972733, |
|
"learning_rate": 8.761904761904763e-06, |
|
"loss": 0.0001, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 31.002761904761904, |
|
"grad_norm": 0.034093666821718216, |
|
"learning_rate": 8.751322751322751e-06, |
|
"loss": 0.0002, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 31.003714285714285, |
|
"grad_norm": 0.02081795036792755, |
|
"learning_rate": 8.740740740740741e-06, |
|
"loss": 0.0001, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 31.004666666666665, |
|
"grad_norm": 1.6572245359420776, |
|
"learning_rate": 8.730158730158731e-06, |
|
"loss": 0.1236, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 31.005619047619046, |
|
"grad_norm": 0.013071013614535332, |
|
"learning_rate": 8.71957671957672e-06, |
|
"loss": 0.1374, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 31.00657142857143, |
|
"grad_norm": 0.10946688055992126, |
|
"learning_rate": 8.70899470899471e-06, |
|
"loss": 0.1072, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 31.006761904761905, |
|
"eval_accuracy": 0.6486486486486487, |
|
"eval_loss": 2.8444180488586426, |
|
"eval_runtime": 15.0199, |
|
"eval_samples_per_second": 4.927, |
|
"eval_steps_per_second": 1.265, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 32.0007619047619, |
|
"grad_norm": 0.8098803758621216, |
|
"learning_rate": 8.6984126984127e-06, |
|
"loss": 0.1896, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 32.001714285714286, |
|
"grad_norm": 0.003531635971739888, |
|
"learning_rate": 8.687830687830688e-06, |
|
"loss": 0.1047, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 32.00266666666667, |
|
"grad_norm": 0.003388685407117009, |
|
"learning_rate": 8.677248677248678e-06, |
|
"loss": 0.0001, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 32.00361904761905, |
|
"grad_norm": 0.0055401683785021305, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.0003, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 32.00457142857143, |
|
"grad_norm": 0.019029097631573677, |
|
"learning_rate": 8.656084656084656e-06, |
|
"loss": 0.0004, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 32.00552380952381, |
|
"grad_norm": 0.0014148615300655365, |
|
"learning_rate": 8.645502645502646e-06, |
|
"loss": 0.0041, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 32.00647619047619, |
|
"grad_norm": 0.005223044194281101, |
|
"learning_rate": 8.634920634920636e-06, |
|
"loss": 0.0664, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 32.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.8276509046554565, |
|
"eval_runtime": 16.0726, |
|
"eval_samples_per_second": 4.604, |
|
"eval_steps_per_second": 1.182, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 33.00066666666667, |
|
"grad_norm": 0.0018431965727359056, |
|
"learning_rate": 8.624338624338624e-06, |
|
"loss": 0.026, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 33.001619047619045, |
|
"grad_norm": 0.003196166828274727, |
|
"learning_rate": 8.613756613756614e-06, |
|
"loss": 0.0002, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 33.00257142857143, |
|
"grad_norm": 0.008334203623235226, |
|
"learning_rate": 8.603174603174604e-06, |
|
"loss": 0.0006, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 33.00352380952381, |
|
"grad_norm": 0.0023803089279681444, |
|
"learning_rate": 8.592592592592593e-06, |
|
"loss": 0.0003, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 33.00447619047619, |
|
"grad_norm": 365.64215087890625, |
|
"learning_rate": 8.582010582010583e-06, |
|
"loss": 0.1007, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 33.005428571428574, |
|
"grad_norm": 0.0017724215285852551, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.1023, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 33.00638095238095, |
|
"grad_norm": 0.002012843731790781, |
|
"learning_rate": 8.560846560846563e-06, |
|
"loss": 0.0122, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 33.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.1148488521575928, |
|
"eval_runtime": 17.6577, |
|
"eval_samples_per_second": 4.191, |
|
"eval_steps_per_second": 1.076, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 34.000571428571426, |
|
"grad_norm": 0.0028000962920486927, |
|
"learning_rate": 8.550264550264551e-06, |
|
"loss": 0.0001, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 34.00152380952381, |
|
"grad_norm": 0.00524574751034379, |
|
"learning_rate": 8.53968253968254e-06, |
|
"loss": 0.2539, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 34.00247619047619, |
|
"grad_norm": 0.0048956056125462055, |
|
"learning_rate": 8.529100529100531e-06, |
|
"loss": 0.1583, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 34.00342857142857, |
|
"grad_norm": 0.0051225321367383, |
|
"learning_rate": 8.518518518518519e-06, |
|
"loss": 0.4901, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 34.004380952380956, |
|
"grad_norm": 2.2200734615325928, |
|
"learning_rate": 8.507936507936509e-06, |
|
"loss": 0.0005, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 34.00533333333333, |
|
"grad_norm": 0.007252862676978111, |
|
"learning_rate": 8.497354497354499e-06, |
|
"loss": 0.1284, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 34.00628571428572, |
|
"grad_norm": 6.2736735343933105, |
|
"learning_rate": 8.486772486772487e-06, |
|
"loss": 0.1118, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 34.0067619047619, |
|
"eval_accuracy": 0.7702702702702703, |
|
"eval_loss": 1.5536247491836548, |
|
"eval_runtime": 15.8753, |
|
"eval_samples_per_second": 4.661, |
|
"eval_steps_per_second": 1.197, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 35.00047619047619, |
|
"grad_norm": 0.0035065035335719585, |
|
"learning_rate": 8.476190476190477e-06, |
|
"loss": 0.0003, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 35.00142857142857, |
|
"grad_norm": 0.0031883029732853174, |
|
"learning_rate": 8.465608465608466e-06, |
|
"loss": 0.0003, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 35.00238095238095, |
|
"grad_norm": 1.8205444812774658, |
|
"learning_rate": 8.455026455026456e-06, |
|
"loss": 0.0798, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 35.00333333333333, |
|
"grad_norm": 0.006734704598784447, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.0001, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 35.004285714285714, |
|
"grad_norm": 0.004560893401503563, |
|
"learning_rate": 8.433862433862434e-06, |
|
"loss": 0.0127, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 35.0052380952381, |
|
"grad_norm": 0.012678610160946846, |
|
"learning_rate": 8.423280423280424e-06, |
|
"loss": 0.0024, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 35.006190476190476, |
|
"grad_norm": 62.18834686279297, |
|
"learning_rate": 8.412698412698414e-06, |
|
"loss": 0.1987, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 35.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.292334794998169, |
|
"eval_runtime": 16.0954, |
|
"eval_samples_per_second": 4.598, |
|
"eval_steps_per_second": 1.18, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 36.00038095238095, |
|
"grad_norm": 0.032790932804346085, |
|
"learning_rate": 8.402116402116402e-06, |
|
"loss": 0.0001, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 36.001333333333335, |
|
"grad_norm": 0.0441834032535553, |
|
"learning_rate": 8.391534391534392e-06, |
|
"loss": 0.2269, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 36.00228571428571, |
|
"grad_norm": 0.006388077512383461, |
|
"learning_rate": 8.380952380952382e-06, |
|
"loss": 0.0498, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 36.003238095238096, |
|
"grad_norm": 0.02494201622903347, |
|
"learning_rate": 8.37037037037037e-06, |
|
"loss": 0.0001, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 36.00419047619047, |
|
"grad_norm": 0.08396976441144943, |
|
"learning_rate": 8.35978835978836e-06, |
|
"loss": 0.0001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 36.00514285714286, |
|
"grad_norm": 0.0030845170840620995, |
|
"learning_rate": 8.34920634920635e-06, |
|
"loss": 0.0001, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 36.00609523809524, |
|
"grad_norm": 0.7375411987304688, |
|
"learning_rate": 8.338624338624339e-06, |
|
"loss": 0.0012, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 36.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.6784675121307373, |
|
"eval_runtime": 15.9991, |
|
"eval_samples_per_second": 4.625, |
|
"eval_steps_per_second": 1.188, |
|
"step": 2627 |
|
}, |
|
{ |
|
"epoch": 37.00028571428572, |
|
"grad_norm": 0.00671563483774662, |
|
"learning_rate": 8.328042328042329e-06, |
|
"loss": 0.0001, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 37.001238095238094, |
|
"grad_norm": 0.030820587649941444, |
|
"learning_rate": 8.317460317460319e-06, |
|
"loss": 0.1668, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 37.00219047619048, |
|
"grad_norm": 0.0018743366235867143, |
|
"learning_rate": 8.306878306878307e-06, |
|
"loss": 0.0001, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 37.003142857142855, |
|
"grad_norm": 0.0010029770201072097, |
|
"learning_rate": 8.296296296296297e-06, |
|
"loss": 0.0001, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 37.00409523809524, |
|
"grad_norm": 0.0015597708988934755, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.0001, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 37.005047619047616, |
|
"grad_norm": 0.001962661510333419, |
|
"learning_rate": 8.275132275132275e-06, |
|
"loss": 0.0001, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 37.006, |
|
"grad_norm": 0.025595329701900482, |
|
"learning_rate": 8.264550264550265e-06, |
|
"loss": 0.0027, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 37.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.240028142929077, |
|
"eval_runtime": 16.2638, |
|
"eval_samples_per_second": 4.55, |
|
"eval_steps_per_second": 1.168, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 38.000190476190475, |
|
"grad_norm": 0.0010542930103838444, |
|
"learning_rate": 8.253968253968254e-06, |
|
"loss": 0.0012, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 38.00114285714286, |
|
"grad_norm": 0.0016928648110479116, |
|
"learning_rate": 8.243386243386245e-06, |
|
"loss": 0.0522, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 38.00209523809524, |
|
"grad_norm": 129.03274536132812, |
|
"learning_rate": 8.232804232804234e-06, |
|
"loss": 0.1343, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 38.00304761904762, |
|
"grad_norm": 0.1395167112350464, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.0005, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 38.004, |
|
"grad_norm": 0.011270579881966114, |
|
"learning_rate": 8.211640211640213e-06, |
|
"loss": 0.1077, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 38.00495238095238, |
|
"grad_norm": 0.1653362661600113, |
|
"learning_rate": 8.201058201058202e-06, |
|
"loss": 0.1599, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 38.00590476190476, |
|
"grad_norm": 0.005356424022465944, |
|
"learning_rate": 8.190476190476192e-06, |
|
"loss": 0.0002, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 38.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.2459213733673096, |
|
"eval_runtime": 16.4119, |
|
"eval_samples_per_second": 4.509, |
|
"eval_steps_per_second": 1.158, |
|
"step": 2769 |
|
}, |
|
{ |
|
"epoch": 39.00009523809524, |
|
"grad_norm": 0.0011994903907179832, |
|
"learning_rate": 8.179894179894182e-06, |
|
"loss": 0.4059, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 39.00104761904762, |
|
"grad_norm": 135.89295959472656, |
|
"learning_rate": 8.16931216931217e-06, |
|
"loss": 0.0199, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 39.002, |
|
"grad_norm": 0.005463339388370514, |
|
"learning_rate": 8.15873015873016e-06, |
|
"loss": 0.0001, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 39.00295238095238, |
|
"grad_norm": 0.002280671149492264, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 0.0124, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 39.003904761904764, |
|
"grad_norm": 0.0023753687273710966, |
|
"learning_rate": 8.137566137566138e-06, |
|
"loss": 0.0001, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 39.00485714285714, |
|
"grad_norm": 0.05094405636191368, |
|
"learning_rate": 8.126984126984128e-06, |
|
"loss": 0.0002, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 39.005809523809525, |
|
"grad_norm": 0.003550964640453458, |
|
"learning_rate": 8.116402116402117e-06, |
|
"loss": 0.0936, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 39.0067619047619, |
|
"grad_norm": 0.3809496760368347, |
|
"learning_rate": 8.105820105820107e-06, |
|
"loss": 0.0099, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 39.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.360102415084839, |
|
"eval_runtime": 182.5763, |
|
"eval_samples_per_second": 0.405, |
|
"eval_steps_per_second": 0.104, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 40.000952380952384, |
|
"grad_norm": 0.0014301723567768931, |
|
"learning_rate": 8.095238095238097e-06, |
|
"loss": 0.0041, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 40.00190476190476, |
|
"grad_norm": 0.005376802291721106, |
|
"learning_rate": 8.084656084656085e-06, |
|
"loss": 0.0001, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 40.002857142857145, |
|
"grad_norm": 315.20269775390625, |
|
"learning_rate": 8.074074074074075e-06, |
|
"loss": 0.0808, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 40.00380952380952, |
|
"grad_norm": 0.033404335379600525, |
|
"learning_rate": 8.063492063492065e-06, |
|
"loss": 0.0503, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 40.00476190476191, |
|
"grad_norm": 0.002346677239984274, |
|
"learning_rate": 8.052910052910053e-06, |
|
"loss": 0.0001, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 40.005714285714284, |
|
"grad_norm": 0.004127295687794685, |
|
"learning_rate": 8.042328042328043e-06, |
|
"loss": 0.0002, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 40.00666666666667, |
|
"grad_norm": 0.0013346931664273143, |
|
"learning_rate": 8.031746031746033e-06, |
|
"loss": 0.0071, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 40.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.0561344623565674, |
|
"eval_runtime": 16.8361, |
|
"eval_samples_per_second": 4.395, |
|
"eval_steps_per_second": 1.129, |
|
"step": 2911 |
|
}, |
|
{ |
|
"epoch": 41.00085714285714, |
|
"grad_norm": 0.0015484824543818831, |
|
"learning_rate": 8.021164021164021e-06, |
|
"loss": 0.0, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 41.00180952380953, |
|
"grad_norm": 0.0011268400121480227, |
|
"learning_rate": 8.010582010582011e-06, |
|
"loss": 0.0302, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 41.002761904761904, |
|
"grad_norm": 0.002251312369480729, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2746, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 41.00371428571429, |
|
"grad_norm": 0.0010610901517793536, |
|
"learning_rate": 7.98941798941799e-06, |
|
"loss": 0.2491, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 41.004666666666665, |
|
"grad_norm": 0.003384027164429426, |
|
"learning_rate": 7.97883597883598e-06, |
|
"loss": 0.1268, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 41.00561904761905, |
|
"grad_norm": 0.01891510747373104, |
|
"learning_rate": 7.968253968253968e-06, |
|
"loss": 0.0234, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 41.00657142857143, |
|
"grad_norm": 48.88753128051758, |
|
"learning_rate": 7.957671957671958e-06, |
|
"loss": 0.0086, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 41.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.189833641052246, |
|
"eval_runtime": 18.7226, |
|
"eval_samples_per_second": 3.952, |
|
"eval_steps_per_second": 1.015, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 42.0007619047619, |
|
"grad_norm": 0.0049520800821483135, |
|
"learning_rate": 7.947089947089948e-06, |
|
"loss": 0.2009, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 42.001714285714286, |
|
"grad_norm": 0.0075964052230119705, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.0002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.00266666666667, |
|
"grad_norm": 0.172276109457016, |
|
"learning_rate": 7.925925925925926e-06, |
|
"loss": 0.0001, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 42.00361904761905, |
|
"grad_norm": 112.24790954589844, |
|
"learning_rate": 7.915343915343916e-06, |
|
"loss": 0.251, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 42.00457142857143, |
|
"grad_norm": 0.9104766845703125, |
|
"learning_rate": 7.904761904761904e-06, |
|
"loss": 0.0002, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 42.00552380952381, |
|
"grad_norm": 0.05150100961327553, |
|
"learning_rate": 7.894179894179896e-06, |
|
"loss": 0.0002, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 42.00647619047619, |
|
"grad_norm": 0.032936934381723404, |
|
"learning_rate": 7.883597883597884e-06, |
|
"loss": 0.0131, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 42.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.6086132526397705, |
|
"eval_runtime": 38.9907, |
|
"eval_samples_per_second": 1.898, |
|
"eval_steps_per_second": 0.487, |
|
"step": 3053 |
|
}, |
|
{ |
|
"epoch": 43.00066666666667, |
|
"grad_norm": 0.007421289570629597, |
|
"learning_rate": 7.873015873015873e-06, |
|
"loss": 0.0003, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 43.001619047619045, |
|
"grad_norm": 0.005742072127759457, |
|
"learning_rate": 7.862433862433863e-06, |
|
"loss": 0.0946, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 43.00257142857143, |
|
"grad_norm": 0.00301670515909791, |
|
"learning_rate": 7.851851851851853e-06, |
|
"loss": 0.0103, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 43.00352380952381, |
|
"grad_norm": 62.63003158569336, |
|
"learning_rate": 7.841269841269843e-06, |
|
"loss": 0.2432, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 43.00447619047619, |
|
"grad_norm": 0.16569894552230835, |
|
"learning_rate": 7.830687830687831e-06, |
|
"loss": 0.0008, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 43.005428571428574, |
|
"grad_norm": 0.008355499245226383, |
|
"learning_rate": 7.820105820105821e-06, |
|
"loss": 0.0087, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 43.00638095238095, |
|
"grad_norm": 0.015924755483865738, |
|
"learning_rate": 7.809523809523811e-06, |
|
"loss": 0.0002, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 43.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.140007257461548, |
|
"eval_runtime": 17.6835, |
|
"eval_samples_per_second": 4.185, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 44.000571428571426, |
|
"grad_norm": 0.0032034190371632576, |
|
"learning_rate": 7.7989417989418e-06, |
|
"loss": 0.0001, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 44.00152380952381, |
|
"grad_norm": 0.0018916918197646737, |
|
"learning_rate": 7.78835978835979e-06, |
|
"loss": 0.0001, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 44.00247619047619, |
|
"grad_norm": 0.0014891589526087046, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.002, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 44.00342857142857, |
|
"grad_norm": 0.12912853062152863, |
|
"learning_rate": 7.767195767195767e-06, |
|
"loss": 0.0001, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 44.004380952380956, |
|
"grad_norm": 0.0015116170980036259, |
|
"learning_rate": 7.756613756613757e-06, |
|
"loss": 0.0001, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 44.00533333333333, |
|
"grad_norm": 0.0008536073728464544, |
|
"learning_rate": 7.746031746031747e-06, |
|
"loss": 0.0001, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 44.00628571428572, |
|
"grad_norm": 0.003453182987868786, |
|
"learning_rate": 7.735449735449736e-06, |
|
"loss": 0.0001, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 44.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.260770320892334, |
|
"eval_runtime": 17.2523, |
|
"eval_samples_per_second": 4.289, |
|
"eval_steps_per_second": 1.101, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 45.00047619047619, |
|
"grad_norm": 0.008949621580541134, |
|
"learning_rate": 7.724867724867726e-06, |
|
"loss": 0.0001, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 45.00142857142857, |
|
"grad_norm": 0.007500027772039175, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.0655, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 45.00238095238095, |
|
"grad_norm": 0.0022459605243057013, |
|
"learning_rate": 7.703703703703704e-06, |
|
"loss": 0.0006, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 45.00333333333333, |
|
"grad_norm": 0.002970959758386016, |
|
"learning_rate": 7.693121693121694e-06, |
|
"loss": 0.0, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 45.004285714285714, |
|
"grad_norm": 0.0018117021536454558, |
|
"learning_rate": 7.682539682539684e-06, |
|
"loss": 0.0004, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 45.0052380952381, |
|
"grad_norm": 0.01580795831978321, |
|
"learning_rate": 7.671957671957672e-06, |
|
"loss": 0.0001, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 45.006190476190476, |
|
"grad_norm": 0.0050562042742967606, |
|
"learning_rate": 7.661375661375662e-06, |
|
"loss": 0.0549, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 45.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.012903928756714, |
|
"eval_runtime": 20.0668, |
|
"eval_samples_per_second": 3.688, |
|
"eval_steps_per_second": 0.947, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 46.00038095238095, |
|
"grad_norm": 0.0019852747209370136, |
|
"learning_rate": 7.65079365079365e-06, |
|
"loss": 0.0001, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 46.001333333333335, |
|
"grad_norm": 0.0013264709850773215, |
|
"learning_rate": 7.64021164021164e-06, |
|
"loss": 0.2312, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 46.00228571428571, |
|
"grad_norm": 0.0050531113520264626, |
|
"learning_rate": 7.62962962962963e-06, |
|
"loss": 0.0002, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 46.003238095238096, |
|
"grad_norm": 0.002699656877666712, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 46.00419047619047, |
|
"grad_norm": 0.0009981177281588316, |
|
"learning_rate": 7.60846560846561e-06, |
|
"loss": 0.0001, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 46.00514285714286, |
|
"grad_norm": 0.009648758918046951, |
|
"learning_rate": 7.597883597883599e-06, |
|
"loss": 0.0, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 46.00609523809524, |
|
"grad_norm": 0.004754742607474327, |
|
"learning_rate": 7.587301587301588e-06, |
|
"loss": 0.0, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 46.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.001845598220825, |
|
"eval_runtime": 15.5897, |
|
"eval_samples_per_second": 4.747, |
|
"eval_steps_per_second": 1.219, |
|
"step": 3337 |
|
}, |
|
{ |
|
"epoch": 47.00028571428572, |
|
"grad_norm": 0.018193760886788368, |
|
"learning_rate": 7.576719576719578e-06, |
|
"loss": 0.0, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 47.001238095238094, |
|
"grad_norm": 0.0028342902660369873, |
|
"learning_rate": 7.566137566137567e-06, |
|
"loss": 0.0001, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 47.00219047619048, |
|
"grad_norm": 0.001791008049622178, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.0, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 47.003142857142855, |
|
"grad_norm": 0.05041654407978058, |
|
"learning_rate": 7.544973544973545e-06, |
|
"loss": 0.0833, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 47.00409523809524, |
|
"grad_norm": 0.0015246650436893106, |
|
"learning_rate": 7.534391534391535e-06, |
|
"loss": 0.0011, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 47.005047619047616, |
|
"grad_norm": 0.004255190957337618, |
|
"learning_rate": 7.523809523809524e-06, |
|
"loss": 0.2029, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 47.006, |
|
"grad_norm": 0.007833059877157211, |
|
"learning_rate": 7.5132275132275136e-06, |
|
"loss": 0.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 47.0067619047619, |
|
"eval_accuracy": 0.7837837837837838, |
|
"eval_loss": 1.7209464311599731, |
|
"eval_runtime": 16.3867, |
|
"eval_samples_per_second": 4.516, |
|
"eval_steps_per_second": 1.159, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 48.000190476190475, |
|
"grad_norm": 0.0008374308235943317, |
|
"learning_rate": 7.5026455026455035e-06, |
|
"loss": 0.0001, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 48.00114285714286, |
|
"grad_norm": 0.0011125396704301238, |
|
"learning_rate": 7.492063492063493e-06, |
|
"loss": 0.0006, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 48.00209523809524, |
|
"grad_norm": 52.88506317138672, |
|
"learning_rate": 7.481481481481482e-06, |
|
"loss": 0.2376, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 48.00304761904762, |
|
"grad_norm": 0.004130581393837929, |
|
"learning_rate": 7.470899470899472e-06, |
|
"loss": 0.0001, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 48.004, |
|
"grad_norm": 0.004906717222183943, |
|
"learning_rate": 7.460317460317461e-06, |
|
"loss": 0.0001, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 48.00495238095238, |
|
"grad_norm": 0.002401293022558093, |
|
"learning_rate": 7.44973544973545e-06, |
|
"loss": 0.0001, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 48.00590476190476, |
|
"grad_norm": 5.829977512359619, |
|
"learning_rate": 7.439153439153439e-06, |
|
"loss": 0.31, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 48.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.196157693862915, |
|
"eval_runtime": 15.4779, |
|
"eval_samples_per_second": 4.781, |
|
"eval_steps_per_second": 1.228, |
|
"step": 3479 |
|
}, |
|
{ |
|
"epoch": 49.00009523809524, |
|
"grad_norm": 500.178466796875, |
|
"learning_rate": 7.428571428571429e-06, |
|
"loss": 0.4734, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 49.00104761904762, |
|
"grad_norm": 0.0016342108137905598, |
|
"learning_rate": 7.417989417989418e-06, |
|
"loss": 0.0098, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 49.002, |
|
"grad_norm": 0.002513843821361661, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 49.00295238095238, |
|
"grad_norm": 0.00166873331181705, |
|
"learning_rate": 7.3968253968253975e-06, |
|
"loss": 0.0002, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 49.003904761904764, |
|
"grad_norm": 0.005831919610500336, |
|
"learning_rate": 7.386243386243387e-06, |
|
"loss": 0.0001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 49.00485714285714, |
|
"grad_norm": 0.0014722439227625728, |
|
"learning_rate": 7.375661375661376e-06, |
|
"loss": 0.149, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 49.005809523809525, |
|
"grad_norm": 0.01229450386017561, |
|
"learning_rate": 7.3650793650793666e-06, |
|
"loss": 0.0001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 49.0067619047619, |
|
"grad_norm": 0.23085883259773254, |
|
"learning_rate": 7.354497354497355e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 49.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 1.6649614572525024, |
|
"eval_runtime": 16.2384, |
|
"eval_samples_per_second": 4.557, |
|
"eval_steps_per_second": 1.17, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 50.000952380952384, |
|
"grad_norm": 0.011244424618780613, |
|
"learning_rate": 7.343915343915344e-06, |
|
"loss": 0.0001, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 50.00190476190476, |
|
"grad_norm": 0.006904429290443659, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 50.002857142857145, |
|
"grad_norm": 0.008236641064286232, |
|
"learning_rate": 7.322751322751324e-06, |
|
"loss": 0.1987, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 50.00380952380952, |
|
"grad_norm": 0.001221312559209764, |
|
"learning_rate": 7.312169312169313e-06, |
|
"loss": 0.0, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 50.00476190476191, |
|
"grad_norm": 0.0464463084936142, |
|
"learning_rate": 7.301587301587301e-06, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 50.005714285714284, |
|
"grad_norm": 0.002336872974410653, |
|
"learning_rate": 7.291005291005292e-06, |
|
"loss": 0.0, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 50.00666666666667, |
|
"grad_norm": 0.002747748512774706, |
|
"learning_rate": 7.280423280423281e-06, |
|
"loss": 0.0, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 50.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 1.884304165840149, |
|
"eval_runtime": 16.5472, |
|
"eval_samples_per_second": 4.472, |
|
"eval_steps_per_second": 1.148, |
|
"step": 3621 |
|
}, |
|
{ |
|
"epoch": 51.00085714285714, |
|
"grad_norm": 0.0009597976459190249, |
|
"learning_rate": 7.2698412698412705e-06, |
|
"loss": 0.0002, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 51.00180952380953, |
|
"grad_norm": 0.1065516397356987, |
|
"learning_rate": 7.2592592592592605e-06, |
|
"loss": 0.0001, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 51.002761904761904, |
|
"grad_norm": 0.022926034405827522, |
|
"learning_rate": 7.24867724867725e-06, |
|
"loss": 0.0002, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 51.00371428571429, |
|
"grad_norm": 0.001492603332735598, |
|
"learning_rate": 7.238095238095239e-06, |
|
"loss": 0.0001, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 51.004666666666665, |
|
"grad_norm": 0.000893523043487221, |
|
"learning_rate": 7.227513227513228e-06, |
|
"loss": 0.0004, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 51.00561904761905, |
|
"grad_norm": 0.0012513647088781, |
|
"learning_rate": 7.216931216931218e-06, |
|
"loss": 0.0005, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 51.00657142857143, |
|
"grad_norm": 0.001891888095997274, |
|
"learning_rate": 7.206349206349207e-06, |
|
"loss": 0.0, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 51.0067619047619, |
|
"eval_accuracy": 0.7702702702702703, |
|
"eval_loss": 1.9397999048233032, |
|
"eval_runtime": 16.7047, |
|
"eval_samples_per_second": 4.43, |
|
"eval_steps_per_second": 1.137, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 52.0007619047619, |
|
"grad_norm": 0.0006127876695245504, |
|
"learning_rate": 7.195767195767196e-06, |
|
"loss": 0.1558, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 52.001714285714286, |
|
"grad_norm": 0.0021178224124014378, |
|
"learning_rate": 7.185185185185186e-06, |
|
"loss": 0.0, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 52.00266666666667, |
|
"grad_norm": 0.0013722680741921067, |
|
"learning_rate": 7.174603174603175e-06, |
|
"loss": 0.0007, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 52.00361904761905, |
|
"grad_norm": 0.05817762389779091, |
|
"learning_rate": 7.1640211640211644e-06, |
|
"loss": 0.0002, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 52.00457142857143, |
|
"grad_norm": 0.0018984224880114198, |
|
"learning_rate": 7.1534391534391544e-06, |
|
"loss": 0.0, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 52.00552380952381, |
|
"grad_norm": 0.0462646409869194, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.2027, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 52.00647619047619, |
|
"grad_norm": 0.0014900796813890338, |
|
"learning_rate": 7.132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 52.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 1.7851362228393555, |
|
"eval_runtime": 16.9023, |
|
"eval_samples_per_second": 4.378, |
|
"eval_steps_per_second": 1.124, |
|
"step": 3763 |
|
}, |
|
{ |
|
"epoch": 53.00066666666667, |
|
"grad_norm": 0.005552320275455713, |
|
"learning_rate": 7.121693121693122e-06, |
|
"loss": 0.523, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 53.001619047619045, |
|
"grad_norm": 0.0013297455152496696, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.0, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 53.00257142857143, |
|
"grad_norm": 0.0035004790406674147, |
|
"learning_rate": 7.100529100529101e-06, |
|
"loss": 0.0035, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 53.00352380952381, |
|
"grad_norm": 0.0012106123613193631, |
|
"learning_rate": 7.08994708994709e-06, |
|
"loss": 0.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 53.00447619047619, |
|
"grad_norm": 0.001144262496381998, |
|
"learning_rate": 7.07936507936508e-06, |
|
"loss": 0.0001, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 53.005428571428574, |
|
"grad_norm": 0.0017582399304956198, |
|
"learning_rate": 7.068783068783069e-06, |
|
"loss": 0.0, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 53.00638095238095, |
|
"grad_norm": 2.3698835372924805, |
|
"learning_rate": 7.058201058201058e-06, |
|
"loss": 0.0001, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 53.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.9573771953582764, |
|
"eval_runtime": 17.5172, |
|
"eval_samples_per_second": 4.224, |
|
"eval_steps_per_second": 1.085, |
|
"step": 3834 |
|
}, |
|
{ |
|
"epoch": 54.000571428571426, |
|
"grad_norm": 0.001897272071801126, |
|
"learning_rate": 7.047619047619048e-06, |
|
"loss": 0.2065, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 54.00152380952381, |
|
"grad_norm": 0.003666786476969719, |
|
"learning_rate": 7.0370370370370375e-06, |
|
"loss": 0.0001, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 54.00247619047619, |
|
"grad_norm": 0.001953072496689856, |
|
"learning_rate": 7.026455026455027e-06, |
|
"loss": 0.0, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 54.00342857142857, |
|
"grad_norm": 159.1454315185547, |
|
"learning_rate": 7.015873015873016e-06, |
|
"loss": 0.1996, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 54.004380952380956, |
|
"grad_norm": 0.006261242087930441, |
|
"learning_rate": 7.005291005291006e-06, |
|
"loss": 0.0037, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 54.00533333333333, |
|
"grad_norm": 0.0007964144460856915, |
|
"learning_rate": 6.994708994708995e-06, |
|
"loss": 0.0, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 54.00628571428572, |
|
"grad_norm": 2.6199288368225098, |
|
"learning_rate": 6.984126984126984e-06, |
|
"loss": 0.0002, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 54.0067619047619, |
|
"eval_accuracy": 0.6351351351351351, |
|
"eval_loss": 2.6199848651885986, |
|
"eval_runtime": 19.2121, |
|
"eval_samples_per_second": 3.852, |
|
"eval_steps_per_second": 0.989, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 55.00047619047619, |
|
"grad_norm": 0.002870640717446804, |
|
"learning_rate": 6.973544973544975e-06, |
|
"loss": 0.0191, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 55.00142857142857, |
|
"grad_norm": 0.0009840029524639249, |
|
"learning_rate": 6.962962962962964e-06, |
|
"loss": 0.0, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 55.00238095238095, |
|
"grad_norm": 0.006166788749396801, |
|
"learning_rate": 6.952380952380952e-06, |
|
"loss": 0.0003, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 55.00333333333333, |
|
"grad_norm": 11.922721862792969, |
|
"learning_rate": 6.941798941798943e-06, |
|
"loss": 0.0008, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 55.004285714285714, |
|
"grad_norm": 0.000661016209051013, |
|
"learning_rate": 6.931216931216932e-06, |
|
"loss": 0.0051, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 55.0052380952381, |
|
"grad_norm": 0.4277324676513672, |
|
"learning_rate": 6.920634920634921e-06, |
|
"loss": 0.2065, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 55.006190476190476, |
|
"grad_norm": 0.0036654549185186625, |
|
"learning_rate": 6.9100529100529105e-06, |
|
"loss": 0.0, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 55.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.233295440673828, |
|
"eval_runtime": 19.0212, |
|
"eval_samples_per_second": 3.89, |
|
"eval_steps_per_second": 0.999, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 56.00038095238095, |
|
"grad_norm": 0.008012962527573109, |
|
"learning_rate": 6.8994708994709005e-06, |
|
"loss": 0.0003, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 56.001333333333335, |
|
"grad_norm": 0.2828165888786316, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 56.00228571428571, |
|
"grad_norm": 379.705322265625, |
|
"learning_rate": 6.878306878306879e-06, |
|
"loss": 0.2678, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 56.003238095238096, |
|
"grad_norm": 0.005230509676039219, |
|
"learning_rate": 6.867724867724869e-06, |
|
"loss": 0.0001, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 56.00419047619047, |
|
"grad_norm": 0.0008635453414171934, |
|
"learning_rate": 6.857142857142858e-06, |
|
"loss": 0.0, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 56.00514285714286, |
|
"grad_norm": 0.008102879859507084, |
|
"learning_rate": 6.846560846560847e-06, |
|
"loss": 0.0, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 56.00609523809524, |
|
"grad_norm": 0.0062914155423641205, |
|
"learning_rate": 6.835978835978837e-06, |
|
"loss": 0.0001, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 56.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.7799017429351807, |
|
"eval_runtime": 18.9614, |
|
"eval_samples_per_second": 3.903, |
|
"eval_steps_per_second": 1.002, |
|
"step": 4047 |
|
}, |
|
{ |
|
"epoch": 57.00028571428572, |
|
"grad_norm": 0.0031750891357660294, |
|
"learning_rate": 6.825396825396826e-06, |
|
"loss": 0.0, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 57.001238095238094, |
|
"grad_norm": 0.0014056439977139235, |
|
"learning_rate": 6.814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 57.00219047619048, |
|
"grad_norm": 0.0033215824514627457, |
|
"learning_rate": 6.8042328042328045e-06, |
|
"loss": 0.0, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 57.003142857142855, |
|
"grad_norm": 0.0037562695797532797, |
|
"learning_rate": 6.7936507936507944e-06, |
|
"loss": 0.0, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 57.00409523809524, |
|
"grad_norm": 0.001404767157509923, |
|
"learning_rate": 6.783068783068784e-06, |
|
"loss": 0.2013, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 57.005047619047616, |
|
"grad_norm": 0.002498056972399354, |
|
"learning_rate": 6.772486772486773e-06, |
|
"loss": 0.1808, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 57.006, |
|
"grad_norm": 0.006040900945663452, |
|
"learning_rate": 6.761904761904763e-06, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 57.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.193466901779175, |
|
"eval_runtime": 21.025, |
|
"eval_samples_per_second": 3.52, |
|
"eval_steps_per_second": 0.904, |
|
"step": 4118 |
|
}, |
|
{ |
|
"epoch": 58.000190476190475, |
|
"grad_norm": 0.001317343907430768, |
|
"learning_rate": 6.751322751322752e-06, |
|
"loss": 0.0001, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 58.00114285714286, |
|
"grad_norm": 0.017525408416986465, |
|
"learning_rate": 6.740740740740741e-06, |
|
"loss": 0.0001, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 58.00209523809524, |
|
"grad_norm": 0.004452765453606844, |
|
"learning_rate": 6.730158730158731e-06, |
|
"loss": 0.0001, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 58.00304761904762, |
|
"grad_norm": 0.0021483676973730326, |
|
"learning_rate": 6.71957671957672e-06, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 58.004, |
|
"grad_norm": 0.00330311874859035, |
|
"learning_rate": 6.708994708994709e-06, |
|
"loss": 0.0, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 58.00495238095238, |
|
"grad_norm": 0.004011472221463919, |
|
"learning_rate": 6.698412698412698e-06, |
|
"loss": 0.066, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 58.00590476190476, |
|
"grad_norm": 0.0005767460679635406, |
|
"learning_rate": 6.687830687830688e-06, |
|
"loss": 0.0188, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 58.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.227210760116577, |
|
"eval_runtime": 17.7138, |
|
"eval_samples_per_second": 4.178, |
|
"eval_steps_per_second": 1.073, |
|
"step": 4189 |
|
}, |
|
{ |
|
"epoch": 59.00009523809524, |
|
"grad_norm": 0.0010956120677292347, |
|
"learning_rate": 6.6772486772486775e-06, |
|
"loss": 0.0001, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 59.00104761904762, |
|
"grad_norm": 22.01203727722168, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 59.002, |
|
"grad_norm": 0.0007579278899356723, |
|
"learning_rate": 6.656084656084657e-06, |
|
"loss": 0.0001, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 59.00295238095238, |
|
"grad_norm": 0.00103019701782614, |
|
"learning_rate": 6.645502645502646e-06, |
|
"loss": 0.0, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 59.003904761904764, |
|
"grad_norm": 0.7090355753898621, |
|
"learning_rate": 6.634920634920635e-06, |
|
"loss": 0.0023, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 59.00485714285714, |
|
"grad_norm": 0.0037675583735108376, |
|
"learning_rate": 6.624338624338626e-06, |
|
"loss": 0.0001, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 59.005809523809525, |
|
"grad_norm": 0.016517408192157745, |
|
"learning_rate": 6.613756613756615e-06, |
|
"loss": 0.4185, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 59.0067619047619, |
|
"grad_norm": 0.00235711014829576, |
|
"learning_rate": 6.603174603174603e-06, |
|
"loss": 0.1013, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 59.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.3606715202331543, |
|
"eval_runtime": 17.3702, |
|
"eval_samples_per_second": 4.26, |
|
"eval_steps_per_second": 1.094, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 60.000952380952384, |
|
"grad_norm": 0.003475839737802744, |
|
"learning_rate": 6.592592592592592e-06, |
|
"loss": 0.0001, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 60.00190476190476, |
|
"grad_norm": 0.0010961840162053704, |
|
"learning_rate": 6.582010582010583e-06, |
|
"loss": 0.0015, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 60.002857142857145, |
|
"grad_norm": 0.001646665041334927, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.1763, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 60.00380952380952, |
|
"grad_norm": 0.00105653319042176, |
|
"learning_rate": 6.560846560846561e-06, |
|
"loss": 0.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 60.00476190476191, |
|
"grad_norm": 0.0037881555035710335, |
|
"learning_rate": 6.550264550264551e-06, |
|
"loss": 0.2071, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 60.005714285714284, |
|
"grad_norm": 0.0010041279019787908, |
|
"learning_rate": 6.5396825396825405e-06, |
|
"loss": 0.0, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 60.00666666666667, |
|
"grad_norm": 0.002599923172965646, |
|
"learning_rate": 6.52910052910053e-06, |
|
"loss": 0.0001, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 60.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.1222922801971436, |
|
"eval_runtime": 17.4695, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 1.088, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 61.00085714285714, |
|
"grad_norm": 0.0010362501488998532, |
|
"learning_rate": 6.51851851851852e-06, |
|
"loss": 0.0, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 61.00180952380953, |
|
"grad_norm": 0.0007642352138645947, |
|
"learning_rate": 6.507936507936509e-06, |
|
"loss": 0.1222, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 61.002761904761904, |
|
"grad_norm": 0.000809229037258774, |
|
"learning_rate": 6.497354497354498e-06, |
|
"loss": 0.2521, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 61.00371428571429, |
|
"grad_norm": 0.23083215951919556, |
|
"learning_rate": 6.486772486772487e-06, |
|
"loss": 0.0782, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 61.004666666666665, |
|
"grad_norm": 0.29525506496429443, |
|
"learning_rate": 6.476190476190477e-06, |
|
"loss": 0.0516, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 61.00561904761905, |
|
"grad_norm": 0.0027424772270023823, |
|
"learning_rate": 6.465608465608466e-06, |
|
"loss": 0.1534, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 61.00657142857143, |
|
"grad_norm": 0.0015537068247795105, |
|
"learning_rate": 6.455026455026455e-06, |
|
"loss": 0.0026, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 61.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 1.9220436811447144, |
|
"eval_runtime": 18.9971, |
|
"eval_samples_per_second": 3.895, |
|
"eval_steps_per_second": 1.0, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 62.0007619047619, |
|
"grad_norm": 0.0010604149429127574, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.1698, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 62.001714285714286, |
|
"grad_norm": 0.0010832214029505849, |
|
"learning_rate": 6.4338624338624345e-06, |
|
"loss": 0.0, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 62.00266666666667, |
|
"grad_norm": 0.02589496225118637, |
|
"learning_rate": 6.423280423280424e-06, |
|
"loss": 0.0001, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 62.00361904761905, |
|
"grad_norm": 0.0012127620866522193, |
|
"learning_rate": 6.412698412698414e-06, |
|
"loss": 0.0146, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 62.00457142857143, |
|
"grad_norm": 0.00289982371032238, |
|
"learning_rate": 6.402116402116403e-06, |
|
"loss": 0.0, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 62.00552380952381, |
|
"grad_norm": 0.0035972294863313437, |
|
"learning_rate": 6.391534391534392e-06, |
|
"loss": 0.1764, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 62.00647619047619, |
|
"grad_norm": 0.0023234295658767223, |
|
"learning_rate": 6.380952380952381e-06, |
|
"loss": 0.193, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 62.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.22542405128479, |
|
"eval_runtime": 17.4236, |
|
"eval_samples_per_second": 4.247, |
|
"eval_steps_per_second": 1.09, |
|
"step": 4473 |
|
}, |
|
{ |
|
"epoch": 63.00066666666667, |
|
"grad_norm": 0.010563456453382969, |
|
"learning_rate": 6.370370370370371e-06, |
|
"loss": 0.0, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 63.001619047619045, |
|
"grad_norm": 0.0025069634430110455, |
|
"learning_rate": 6.35978835978836e-06, |
|
"loss": 0.0, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 63.00257142857143, |
|
"grad_norm": 0.0028519683983176947, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.0797, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 63.00352380952381, |
|
"grad_norm": 0.0033148368820548058, |
|
"learning_rate": 6.338624338624339e-06, |
|
"loss": 0.0003, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 63.00447619047619, |
|
"grad_norm": 0.008310235105454922, |
|
"learning_rate": 6.328042328042328e-06, |
|
"loss": 0.4209, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 63.005428571428574, |
|
"grad_norm": 0.43833351135253906, |
|
"learning_rate": 6.3174603174603175e-06, |
|
"loss": 0.002, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 63.00638095238095, |
|
"grad_norm": 0.001988427247852087, |
|
"learning_rate": 6.3068783068783075e-06, |
|
"loss": 0.0002, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 63.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.2681643962860107, |
|
"eval_runtime": 17.5739, |
|
"eval_samples_per_second": 4.211, |
|
"eval_steps_per_second": 1.081, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 64.00057142857143, |
|
"grad_norm": 0.0018558625597506762, |
|
"learning_rate": 6.296296296296297e-06, |
|
"loss": 0.0, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 64.0015238095238, |
|
"grad_norm": 0.00036164221819490194, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.0605, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 64.00247619047619, |
|
"grad_norm": 0.001508195884525776, |
|
"learning_rate": 6.275132275132275e-06, |
|
"loss": 0.0007, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 64.00342857142857, |
|
"grad_norm": 0.01874958910048008, |
|
"learning_rate": 6.264550264550266e-06, |
|
"loss": 0.0391, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 64.00438095238096, |
|
"grad_norm": 0.0006183416116982698, |
|
"learning_rate": 6.253968253968254e-06, |
|
"loss": 0.1875, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 64.00533333333334, |
|
"grad_norm": 0.001555442693643272, |
|
"learning_rate": 6.243386243386243e-06, |
|
"loss": 0.0007, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 64.00628571428571, |
|
"grad_norm": 0.0006395932286977768, |
|
"learning_rate": 6.232804232804234e-06, |
|
"loss": 0.0, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 64.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.685673475265503, |
|
"eval_runtime": 17.963, |
|
"eval_samples_per_second": 4.12, |
|
"eval_steps_per_second": 1.058, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 65.00047619047619, |
|
"grad_norm": 0.002215348416939378, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.0, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 65.00142857142858, |
|
"grad_norm": 0.001222651218995452, |
|
"learning_rate": 6.211640211640212e-06, |
|
"loss": 0.0004, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 65.00238095238095, |
|
"grad_norm": 0.0009166031959466636, |
|
"learning_rate": 6.201058201058202e-06, |
|
"loss": 0.2341, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 65.00333333333333, |
|
"grad_norm": 0.0009040706208907068, |
|
"learning_rate": 6.1904761904761914e-06, |
|
"loss": 0.0013, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 65.00428571428571, |
|
"grad_norm": 0.003010386601090431, |
|
"learning_rate": 6.1798941798941806e-06, |
|
"loss": 0.0001, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 65.0052380952381, |
|
"grad_norm": 0.018778080120682716, |
|
"learning_rate": 6.16931216931217e-06, |
|
"loss": 0.0001, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 65.00619047619048, |
|
"grad_norm": 0.0012031777296215296, |
|
"learning_rate": 6.15873015873016e-06, |
|
"loss": 0.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 65.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.379077672958374, |
|
"eval_runtime": 17.2414, |
|
"eval_samples_per_second": 4.292, |
|
"eval_steps_per_second": 1.102, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 66.00038095238095, |
|
"grad_norm": 0.0038914321921765804, |
|
"learning_rate": 6.148148148148149e-06, |
|
"loss": 0.0, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 66.00133333333333, |
|
"grad_norm": 0.0008610020740889013, |
|
"learning_rate": 6.137566137566138e-06, |
|
"loss": 0.0001, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 66.00228571428572, |
|
"grad_norm": 0.00043805301538668573, |
|
"learning_rate": 6.126984126984128e-06, |
|
"loss": 0.3381, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 66.00323809523809, |
|
"grad_norm": 7.665761947631836, |
|
"learning_rate": 6.116402116402117e-06, |
|
"loss": 0.0306, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 66.00419047619047, |
|
"grad_norm": 0.006835469510406256, |
|
"learning_rate": 6.105820105820106e-06, |
|
"loss": 0.0002, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 66.00514285714286, |
|
"grad_norm": 0.0009099821327254176, |
|
"learning_rate": 6.095238095238096e-06, |
|
"loss": 0.0, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 66.00609523809524, |
|
"grad_norm": 0.0022552493028342724, |
|
"learning_rate": 6.084656084656085e-06, |
|
"loss": 0.0076, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 66.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.8393194675445557, |
|
"eval_runtime": 13.8869, |
|
"eval_samples_per_second": 5.329, |
|
"eval_steps_per_second": 1.368, |
|
"step": 4757 |
|
}, |
|
{ |
|
"epoch": 67.00028571428571, |
|
"grad_norm": 0.13802634179592133, |
|
"learning_rate": 6.0740740740740745e-06, |
|
"loss": 0.0001, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 67.0012380952381, |
|
"grad_norm": 0.003053755732253194, |
|
"learning_rate": 6.063492063492064e-06, |
|
"loss": 0.2751, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 67.00219047619048, |
|
"grad_norm": 0.060533616691827774, |
|
"learning_rate": 6.052910052910054e-06, |
|
"loss": 0.0002, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 67.00314285714286, |
|
"grad_norm": 0.003102941671386361, |
|
"learning_rate": 6.042328042328043e-06, |
|
"loss": 0.0, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 67.00409523809523, |
|
"grad_norm": 0.0014726222725585103, |
|
"learning_rate": 6.031746031746032e-06, |
|
"loss": 0.1433, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 67.00504761904762, |
|
"grad_norm": 0.012411821633577347, |
|
"learning_rate": 6.021164021164022e-06, |
|
"loss": 0.0953, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 67.006, |
|
"grad_norm": 0.003996816463768482, |
|
"learning_rate": 6.010582010582011e-06, |
|
"loss": 0.0043, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 67.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.9305188655853271, |
|
"eval_runtime": 13.8404, |
|
"eval_samples_per_second": 5.347, |
|
"eval_steps_per_second": 1.373, |
|
"step": 4828 |
|
}, |
|
{ |
|
"epoch": 68.00019047619048, |
|
"grad_norm": 0.0005873045884072781, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0001, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 68.00114285714285, |
|
"grad_norm": 0.028566883876919746, |
|
"learning_rate": 5.989417989417989e-06, |
|
"loss": 0.0, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 68.00209523809524, |
|
"grad_norm": 0.0005609308718703687, |
|
"learning_rate": 5.978835978835979e-06, |
|
"loss": 0.0, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 68.00304761904762, |
|
"grad_norm": 0.001683125738054514, |
|
"learning_rate": 5.968253968253968e-06, |
|
"loss": 0.0001, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 68.004, |
|
"grad_norm": 0.0006265339907258749, |
|
"learning_rate": 5.9576719576719576e-06, |
|
"loss": 0.0001, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 68.00495238095237, |
|
"grad_norm": 0.0011454337509348989, |
|
"learning_rate": 5.9470899470899475e-06, |
|
"loss": 0.0, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 68.00590476190476, |
|
"grad_norm": 3.5152859687805176, |
|
"learning_rate": 5.936507936507937e-06, |
|
"loss": 0.0003, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 68.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.9944276809692383, |
|
"eval_runtime": 15.8087, |
|
"eval_samples_per_second": 4.681, |
|
"eval_steps_per_second": 1.202, |
|
"step": 4899 |
|
}, |
|
{ |
|
"epoch": 69.00009523809524, |
|
"grad_norm": 0.0005341055803000927, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 69.00104761904763, |
|
"grad_norm": 0.0011177296983078122, |
|
"learning_rate": 5.915343915343917e-06, |
|
"loss": 0.0919, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 69.002, |
|
"grad_norm": 0.0007625820580869913, |
|
"learning_rate": 5.904761904761905e-06, |
|
"loss": 0.0, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 69.00295238095238, |
|
"grad_norm": 0.0006928302464075387, |
|
"learning_rate": 5.894179894179894e-06, |
|
"loss": 0.0, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 69.00390476190476, |
|
"grad_norm": 0.002256699139252305, |
|
"learning_rate": 5.883597883597883e-06, |
|
"loss": 0.0, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 69.00485714285715, |
|
"grad_norm": 0.0004958523204550147, |
|
"learning_rate": 5.873015873015874e-06, |
|
"loss": 0.0, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 69.00580952380952, |
|
"grad_norm": 0.00038925904664210975, |
|
"learning_rate": 5.862433862433863e-06, |
|
"loss": 0.0, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 69.0067619047619, |
|
"grad_norm": 0.002490544691681862, |
|
"learning_rate": 5.8518518518518515e-06, |
|
"loss": 0.0, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 69.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.58418345451355, |
|
"eval_runtime": 15.9651, |
|
"eval_samples_per_second": 4.635, |
|
"eval_steps_per_second": 1.19, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 70.00095238095238, |
|
"grad_norm": 0.0006307671428658068, |
|
"learning_rate": 5.841269841269842e-06, |
|
"loss": 0.0029, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 70.00190476190477, |
|
"grad_norm": 0.0025409061927348375, |
|
"learning_rate": 5.8306878306878314e-06, |
|
"loss": 0.0502, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 70.00285714285714, |
|
"grad_norm": 0.0017973057692870498, |
|
"learning_rate": 5.820105820105821e-06, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 70.00380952380952, |
|
"grad_norm": 0.0004459419578779489, |
|
"learning_rate": 5.8095238095238106e-06, |
|
"loss": 0.0, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 70.0047619047619, |
|
"grad_norm": 0.0011419616639614105, |
|
"learning_rate": 5.7989417989418e-06, |
|
"loss": 0.0, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 70.00571428571429, |
|
"grad_norm": 0.0005456113140098751, |
|
"learning_rate": 5.788359788359789e-06, |
|
"loss": 0.0001, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 70.00666666666666, |
|
"grad_norm": 0.0008488246239721775, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 70.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.650306463241577, |
|
"eval_runtime": 17.1662, |
|
"eval_samples_per_second": 4.311, |
|
"eval_steps_per_second": 1.107, |
|
"step": 5041 |
|
}, |
|
{ |
|
"epoch": 71.00085714285714, |
|
"grad_norm": 0.07173417508602142, |
|
"learning_rate": 5.767195767195768e-06, |
|
"loss": 0.0, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 71.00180952380953, |
|
"grad_norm": 0.0004941129591315985, |
|
"learning_rate": 5.756613756613757e-06, |
|
"loss": 0.0, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 71.00276190476191, |
|
"grad_norm": 0.0032803788781166077, |
|
"learning_rate": 5.746031746031746e-06, |
|
"loss": 0.0, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 71.00371428571428, |
|
"grad_norm": 0.0007884973310865462, |
|
"learning_rate": 5.735449735449736e-06, |
|
"loss": 0.0, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 71.00466666666667, |
|
"grad_norm": 0.00035243743332102895, |
|
"learning_rate": 5.724867724867725e-06, |
|
"loss": 0.0, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 71.00561904761905, |
|
"grad_norm": 0.001297764596529305, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 71.00657142857143, |
|
"grad_norm": 0.0007165080169215798, |
|
"learning_rate": 5.7037037037037045e-06, |
|
"loss": 0.0, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 71.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.725356340408325, |
|
"eval_runtime": 15.7698, |
|
"eval_samples_per_second": 4.693, |
|
"eval_steps_per_second": 1.205, |
|
"step": 5112 |
|
}, |
|
{ |
|
"epoch": 72.0007619047619, |
|
"grad_norm": 0.0005112860817462206, |
|
"learning_rate": 5.693121693121694e-06, |
|
"loss": 0.0, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 72.00171428571429, |
|
"grad_norm": 0.0010144360130652785, |
|
"learning_rate": 5.682539682539683e-06, |
|
"loss": 0.0, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 72.00266666666667, |
|
"grad_norm": 0.0012267096899449825, |
|
"learning_rate": 5.671957671957672e-06, |
|
"loss": 0.0, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 72.00361904761905, |
|
"grad_norm": 0.0029065243434160948, |
|
"learning_rate": 5.661375661375662e-06, |
|
"loss": 0.0, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 72.00457142857142, |
|
"grad_norm": 0.00046578419278375804, |
|
"learning_rate": 5.650793650793651e-06, |
|
"loss": 0.0, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 72.00552380952381, |
|
"grad_norm": 0.0018618660978972912, |
|
"learning_rate": 5.64021164021164e-06, |
|
"loss": 0.0, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 72.00647619047619, |
|
"grad_norm": 0.0006170138367451727, |
|
"learning_rate": 5.62962962962963e-06, |
|
"loss": 0.0002, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 72.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 3.0428946018218994, |
|
"eval_runtime": 18.5951, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 1.022, |
|
"step": 5183 |
|
}, |
|
{ |
|
"epoch": 73.00066666666666, |
|
"grad_norm": 0.10139445960521698, |
|
"learning_rate": 5.619047619047619e-06, |
|
"loss": 0.0, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 73.00161904761904, |
|
"grad_norm": 0.0012034185929223895, |
|
"learning_rate": 5.6084656084656084e-06, |
|
"loss": 0.3207, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 73.00257142857143, |
|
"grad_norm": 0.000532768142875284, |
|
"learning_rate": 5.597883597883598e-06, |
|
"loss": 0.2441, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 73.00352380952381, |
|
"grad_norm": 0.026747262105345726, |
|
"learning_rate": 5.5873015873015876e-06, |
|
"loss": 0.0, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 73.0044761904762, |
|
"grad_norm": 0.006560751236975193, |
|
"learning_rate": 5.576719576719577e-06, |
|
"loss": 0.0001, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 73.00542857142857, |
|
"grad_norm": 0.0026356203015893698, |
|
"learning_rate": 5.566137566137566e-06, |
|
"loss": 0.0, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 73.00638095238095, |
|
"grad_norm": 0.0004046234826091677, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.0, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 73.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.5715653896331787, |
|
"eval_runtime": 19.502, |
|
"eval_samples_per_second": 3.794, |
|
"eval_steps_per_second": 0.974, |
|
"step": 5254 |
|
}, |
|
{ |
|
"epoch": 74.00057142857143, |
|
"grad_norm": 0.000314861536026001, |
|
"learning_rate": 5.544973544973545e-06, |
|
"loss": 0.0, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 74.0015238095238, |
|
"grad_norm": 0.0012239968637004495, |
|
"learning_rate": 5.534391534391534e-06, |
|
"loss": 0.0, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 74.00247619047619, |
|
"grad_norm": 0.001302064280025661, |
|
"learning_rate": 5.523809523809525e-06, |
|
"loss": 0.0, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 74.00342857142857, |
|
"grad_norm": 0.000552400597371161, |
|
"learning_rate": 5.513227513227514e-06, |
|
"loss": 0.0, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 74.00438095238096, |
|
"grad_norm": 0.00045846428838558495, |
|
"learning_rate": 5.502645502645503e-06, |
|
"loss": 0.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 74.00533333333334, |
|
"grad_norm": 0.008412440307438374, |
|
"learning_rate": 5.492063492063493e-06, |
|
"loss": 0.0136, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 74.00628571428571, |
|
"grad_norm": 0.0009433178347535431, |
|
"learning_rate": 5.481481481481482e-06, |
|
"loss": 0.0671, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 74.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.5143699645996094, |
|
"eval_runtime": 16.2734, |
|
"eval_samples_per_second": 4.547, |
|
"eval_steps_per_second": 1.168, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 75.00047619047619, |
|
"grad_norm": 0.0010361479362472892, |
|
"learning_rate": 5.4708994708994715e-06, |
|
"loss": 0.0, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 75.00142857142858, |
|
"grad_norm": 0.0006854168605059385, |
|
"learning_rate": 5.460317460317461e-06, |
|
"loss": 0.0, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 75.00238095238095, |
|
"grad_norm": 0.00024168891832232475, |
|
"learning_rate": 5.449735449735451e-06, |
|
"loss": 0.0, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 75.00333333333333, |
|
"grad_norm": 0.0027299614157527685, |
|
"learning_rate": 5.43915343915344e-06, |
|
"loss": 0.0, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 75.00428571428571, |
|
"grad_norm": 0.019003285095095634, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.0, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 75.0052380952381, |
|
"grad_norm": 0.0002465677389409393, |
|
"learning_rate": 5.417989417989419e-06, |
|
"loss": 0.0, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 75.00619047619048, |
|
"grad_norm": 0.00031525909435003996, |
|
"learning_rate": 5.407407407407408e-06, |
|
"loss": 0.0, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 75.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.893791913986206, |
|
"eval_runtime": 16.5432, |
|
"eval_samples_per_second": 4.473, |
|
"eval_steps_per_second": 1.149, |
|
"step": 5396 |
|
}, |
|
{ |
|
"epoch": 76.00038095238095, |
|
"grad_norm": 0.000373604241758585, |
|
"learning_rate": 5.396825396825397e-06, |
|
"loss": 0.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 76.00133333333333, |
|
"grad_norm": 0.0354059673845768, |
|
"learning_rate": 5.386243386243387e-06, |
|
"loss": 0.0, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 76.00228571428572, |
|
"grad_norm": 0.0002337087207706645, |
|
"learning_rate": 5.375661375661376e-06, |
|
"loss": 0.0, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 76.00323809523809, |
|
"grad_norm": 0.000298203231068328, |
|
"learning_rate": 5.365079365079365e-06, |
|
"loss": 0.0, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 76.00419047619047, |
|
"grad_norm": 0.000580488471314311, |
|
"learning_rate": 5.3544973544973545e-06, |
|
"loss": 0.0, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 76.00514285714286, |
|
"grad_norm": 0.0010549610015004873, |
|
"learning_rate": 5.3439153439153445e-06, |
|
"loss": 0.0, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 76.00609523809524, |
|
"grad_norm": 0.0011401353403925896, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 76.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.850273847579956, |
|
"eval_runtime": 16.7983, |
|
"eval_samples_per_second": 4.405, |
|
"eval_steps_per_second": 1.131, |
|
"step": 5467 |
|
}, |
|
{ |
|
"epoch": 77.00028571428571, |
|
"grad_norm": 0.0005172080709598958, |
|
"learning_rate": 5.322751322751323e-06, |
|
"loss": 0.0, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 77.0012380952381, |
|
"grad_norm": 0.0014666365459561348, |
|
"learning_rate": 5.312169312169313e-06, |
|
"loss": 0.0, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 77.00219047619048, |
|
"grad_norm": 0.0014088664902374148, |
|
"learning_rate": 5.301587301587302e-06, |
|
"loss": 0.0, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 77.00314285714286, |
|
"grad_norm": 0.0003868502099066973, |
|
"learning_rate": 5.291005291005291e-06, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 77.00409523809523, |
|
"grad_norm": 0.001465518376789987, |
|
"learning_rate": 5.280423280423281e-06, |
|
"loss": 0.0, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 77.00504761904762, |
|
"grad_norm": 0.001903692027553916, |
|
"learning_rate": 5.26984126984127e-06, |
|
"loss": 0.0, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 77.006, |
|
"grad_norm": 0.0015759927919134498, |
|
"learning_rate": 5.259259259259259e-06, |
|
"loss": 0.0, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 77.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.88606858253479, |
|
"eval_runtime": 21.6506, |
|
"eval_samples_per_second": 3.418, |
|
"eval_steps_per_second": 0.878, |
|
"step": 5538 |
|
}, |
|
{ |
|
"epoch": 78.00019047619048, |
|
"grad_norm": 0.0005823720712214708, |
|
"learning_rate": 5.2486772486772485e-06, |
|
"loss": 0.0, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 78.00114285714285, |
|
"grad_norm": 0.00030079399584792554, |
|
"learning_rate": 5.2380952380952384e-06, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 78.00209523809524, |
|
"grad_norm": 0.0007412757840938866, |
|
"learning_rate": 5.227513227513228e-06, |
|
"loss": 0.0, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 78.00304761904762, |
|
"grad_norm": 0.000413477944675833, |
|
"learning_rate": 5.216931216931217e-06, |
|
"loss": 0.0, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 78.004, |
|
"grad_norm": 0.000860493048094213, |
|
"learning_rate": 5.2063492063492076e-06, |
|
"loss": 0.0, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 78.00495238095237, |
|
"grad_norm": 0.0003023779718205333, |
|
"learning_rate": 5.195767195767196e-06, |
|
"loss": 0.0, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 78.00590476190476, |
|
"grad_norm": 0.00046973678399808705, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 78.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.8524179458618164, |
|
"eval_runtime": 23.4181, |
|
"eval_samples_per_second": 3.16, |
|
"eval_steps_per_second": 0.811, |
|
"step": 5609 |
|
}, |
|
{ |
|
"epoch": 79.00009523809524, |
|
"grad_norm": 0.001756474724970758, |
|
"learning_rate": 5.174603174603176e-06, |
|
"loss": 0.0, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 79.00104761904763, |
|
"grad_norm": 0.004148818086832762, |
|
"learning_rate": 5.164021164021165e-06, |
|
"loss": 0.0, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 79.002, |
|
"grad_norm": 0.0009733652113936841, |
|
"learning_rate": 5.153439153439154e-06, |
|
"loss": 0.0, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 79.00295238095238, |
|
"grad_norm": 0.0004708434862550348, |
|
"learning_rate": 5.142857142857142e-06, |
|
"loss": 0.0, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 79.00390476190476, |
|
"grad_norm": 0.0010074286255985498, |
|
"learning_rate": 5.132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 79.00485714285715, |
|
"grad_norm": 0.0005253396811895072, |
|
"learning_rate": 5.121693121693122e-06, |
|
"loss": 0.0, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 79.00580952380952, |
|
"grad_norm": 0.0005340786301530898, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.0, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 79.0067619047619, |
|
"grad_norm": 0.0013357801362872124, |
|
"learning_rate": 5.1005291005291015e-06, |
|
"loss": 0.0, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 79.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.7961583137512207, |
|
"eval_runtime": 24.1533, |
|
"eval_samples_per_second": 3.064, |
|
"eval_steps_per_second": 0.787, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 80.00095238095238, |
|
"grad_norm": 0.0003452278324402869, |
|
"learning_rate": 5.089947089947091e-06, |
|
"loss": 0.1595, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 80.00190476190477, |
|
"grad_norm": 0.0025528387632220984, |
|
"learning_rate": 5.07936507936508e-06, |
|
"loss": 0.0003, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 80.00285714285714, |
|
"grad_norm": 0.5342845320701599, |
|
"learning_rate": 5.06878306878307e-06, |
|
"loss": 0.0001, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 80.00380952380952, |
|
"grad_norm": 0.0007849848479963839, |
|
"learning_rate": 5.058201058201059e-06, |
|
"loss": 0.0, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 80.0047619047619, |
|
"grad_norm": 0.0004529608122538775, |
|
"learning_rate": 5.047619047619048e-06, |
|
"loss": 0.0, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 80.00571428571429, |
|
"grad_norm": 0.00045702006900683045, |
|
"learning_rate": 5.037037037037037e-06, |
|
"loss": 0.0, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 80.00666666666666, |
|
"grad_norm": 0.0009125975775532424, |
|
"learning_rate": 5.026455026455027e-06, |
|
"loss": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 80.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 2.86403751373291, |
|
"eval_runtime": 27.5707, |
|
"eval_samples_per_second": 2.684, |
|
"eval_steps_per_second": 0.689, |
|
"step": 5751 |
|
}, |
|
{ |
|
"epoch": 81.00085714285714, |
|
"grad_norm": 0.0009199812775477767, |
|
"learning_rate": 5.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 81.00180952380953, |
|
"grad_norm": 0.0012672754237428308, |
|
"learning_rate": 5.005291005291005e-06, |
|
"loss": 0.0, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 81.00276190476191, |
|
"grad_norm": 0.0008347496041096747, |
|
"learning_rate": 4.9947089947089946e-06, |
|
"loss": 0.0, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 81.00371428571428, |
|
"grad_norm": 0.0005167628987692297, |
|
"learning_rate": 4.9841269841269845e-06, |
|
"loss": 0.0, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 81.00466666666667, |
|
"grad_norm": 0.000547485426068306, |
|
"learning_rate": 4.973544973544974e-06, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 81.00561904761905, |
|
"grad_norm": 0.0015901158330962062, |
|
"learning_rate": 4.962962962962964e-06, |
|
"loss": 0.0, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 81.00657142857143, |
|
"grad_norm": 0.006001343484967947, |
|
"learning_rate": 4.952380952380953e-06, |
|
"loss": 0.0, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 81.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.844578981399536, |
|
"eval_runtime": 27.4966, |
|
"eval_samples_per_second": 2.691, |
|
"eval_steps_per_second": 0.691, |
|
"step": 5822 |
|
}, |
|
{ |
|
"epoch": 82.0007619047619, |
|
"grad_norm": 0.0003850508655887097, |
|
"learning_rate": 4.941798941798942e-06, |
|
"loss": 0.0, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 82.00171428571429, |
|
"grad_norm": 0.0013694074004888535, |
|
"learning_rate": 4.931216931216932e-06, |
|
"loss": 0.0, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 82.00266666666667, |
|
"grad_norm": 0.0008942610002122819, |
|
"learning_rate": 4.920634920634921e-06, |
|
"loss": 0.0, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 82.00361904761905, |
|
"grad_norm": 0.0021071520168334246, |
|
"learning_rate": 4.91005291005291e-06, |
|
"loss": 0.0, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 82.00457142857142, |
|
"grad_norm": 0.0001672828511800617, |
|
"learning_rate": 4.8994708994709e-06, |
|
"loss": 0.0, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 82.00552380952381, |
|
"grad_norm": 0.051589980721473694, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.0, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 82.00647619047619, |
|
"grad_norm": 0.008057937026023865, |
|
"learning_rate": 4.8783068783068785e-06, |
|
"loss": 0.0, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 82.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.6401426792144775, |
|
"eval_runtime": 25.1047, |
|
"eval_samples_per_second": 2.948, |
|
"eval_steps_per_second": 0.757, |
|
"step": 5893 |
|
}, |
|
{ |
|
"epoch": 83.00066666666666, |
|
"grad_norm": 0.0004187853483017534, |
|
"learning_rate": 4.867724867724868e-06, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 83.00161904761904, |
|
"grad_norm": 0.0005931004998274148, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.0, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 83.00257142857143, |
|
"grad_norm": 0.0002631930401548743, |
|
"learning_rate": 4.846560846560847e-06, |
|
"loss": 0.0, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 83.00352380952381, |
|
"grad_norm": 0.01038403995335102, |
|
"learning_rate": 4.835978835978836e-06, |
|
"loss": 0.0004, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 83.0044761904762, |
|
"grad_norm": 0.018158361315727234, |
|
"learning_rate": 4.825396825396826e-06, |
|
"loss": 0.0, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 83.00542857142857, |
|
"grad_norm": 0.0009759682579897344, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 0.0032, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 83.00638095238095, |
|
"grad_norm": 0.0006013894453644753, |
|
"learning_rate": 4.804232804232805e-06, |
|
"loss": 0.0007, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 83.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.398712635040283, |
|
"eval_runtime": 25.0995, |
|
"eval_samples_per_second": 2.948, |
|
"eval_steps_per_second": 0.757, |
|
"step": 5964 |
|
}, |
|
{ |
|
"epoch": 84.00057142857143, |
|
"grad_norm": 0.0019183410331606865, |
|
"learning_rate": 4.793650793650794e-06, |
|
"loss": 0.0, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 84.0015238095238, |
|
"grad_norm": 0.000676738447509706, |
|
"learning_rate": 4.783068783068783e-06, |
|
"loss": 0.0, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 84.00247619047619, |
|
"grad_norm": 0.0023505811113864183, |
|
"learning_rate": 4.772486772486773e-06, |
|
"loss": 0.0, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 84.00342857142857, |
|
"grad_norm": 0.0005468825693242252, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 84.00438095238096, |
|
"grad_norm": 0.00032948973239399493, |
|
"learning_rate": 4.7513227513227515e-06, |
|
"loss": 0.0, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 84.00533333333334, |
|
"grad_norm": 0.0016405474161729217, |
|
"learning_rate": 4.7407407407407415e-06, |
|
"loss": 0.0, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 84.00628571428571, |
|
"grad_norm": 0.00029347886447794735, |
|
"learning_rate": 4.730158730158731e-06, |
|
"loss": 0.0, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 84.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.364161729812622, |
|
"eval_runtime": 26.3423, |
|
"eval_samples_per_second": 2.809, |
|
"eval_steps_per_second": 0.721, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 85.00047619047619, |
|
"grad_norm": 0.00021160613687243313, |
|
"learning_rate": 4.71957671957672e-06, |
|
"loss": 0.0, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 85.00142857142858, |
|
"grad_norm": 0.0007972092716954648, |
|
"learning_rate": 4.708994708994709e-06, |
|
"loss": 0.0, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 85.00238095238095, |
|
"grad_norm": 0.0047329687513411045, |
|
"learning_rate": 4.698412698412699e-06, |
|
"loss": 0.0002, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 85.00333333333333, |
|
"grad_norm": 574.7470092773438, |
|
"learning_rate": 4.687830687830688e-06, |
|
"loss": 0.1462, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 85.00428571428571, |
|
"grad_norm": 0.0005663606571033597, |
|
"learning_rate": 4.677248677248677e-06, |
|
"loss": 0.0, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 85.0052380952381, |
|
"grad_norm": 0.0019831659737974405, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.1749, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 85.00619047619048, |
|
"grad_norm": 0.0003156385209877044, |
|
"learning_rate": 4.656084656084656e-06, |
|
"loss": 0.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 85.0067619047619, |
|
"eval_accuracy": 0.6756756756756757, |
|
"eval_loss": 2.470996856689453, |
|
"eval_runtime": 26.7258, |
|
"eval_samples_per_second": 2.769, |
|
"eval_steps_per_second": 0.711, |
|
"step": 6106 |
|
}, |
|
{ |
|
"epoch": 86.00038095238095, |
|
"grad_norm": 0.001011149724945426, |
|
"learning_rate": 4.6455026455026454e-06, |
|
"loss": 0.1451, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 86.00133333333333, |
|
"grad_norm": 0.00027317553758621216, |
|
"learning_rate": 4.634920634920635e-06, |
|
"loss": 0.0001, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 86.00228571428572, |
|
"grad_norm": 0.0022837144788354635, |
|
"learning_rate": 4.6243386243386246e-06, |
|
"loss": 0.0, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 86.00323809523809, |
|
"grad_norm": 0.0005552778020501137, |
|
"learning_rate": 4.6137566137566145e-06, |
|
"loss": 0.0028, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 86.00419047619047, |
|
"grad_norm": 0.00046171335270628333, |
|
"learning_rate": 4.603174603174604e-06, |
|
"loss": 0.0, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 86.00514285714286, |
|
"grad_norm": 0.00023459379735868424, |
|
"learning_rate": 4.592592592592593e-06, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 86.00609523809524, |
|
"grad_norm": 0.00025353021919727325, |
|
"learning_rate": 4.582010582010583e-06, |
|
"loss": 0.0004, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 86.0067619047619, |
|
"eval_accuracy": 0.6486486486486487, |
|
"eval_loss": 3.032348155975342, |
|
"eval_runtime": 22.8767, |
|
"eval_samples_per_second": 3.235, |
|
"eval_steps_per_second": 0.831, |
|
"step": 6177 |
|
}, |
|
{ |
|
"epoch": 87.00028571428571, |
|
"grad_norm": 0.0003504717315081507, |
|
"learning_rate": 4.571428571428572e-06, |
|
"loss": 0.0, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 87.0012380952381, |
|
"grad_norm": 0.0009244528482668102, |
|
"learning_rate": 4.560846560846561e-06, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 87.00219047619048, |
|
"grad_norm": 0.0005004839040338993, |
|
"learning_rate": 4.55026455026455e-06, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 87.00314285714286, |
|
"grad_norm": 0.0009528248338028789, |
|
"learning_rate": 4.53968253968254e-06, |
|
"loss": 0.0, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 87.00409523809523, |
|
"grad_norm": 0.0006345040746964514, |
|
"learning_rate": 4.529100529100529e-06, |
|
"loss": 0.0, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 87.00504761904762, |
|
"grad_norm": 0.00041144120041280985, |
|
"learning_rate": 4.5185185185185185e-06, |
|
"loss": 0.0, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 87.006, |
|
"grad_norm": 0.00040222075767815113, |
|
"learning_rate": 4.5079365079365085e-06, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 87.0067619047619, |
|
"eval_accuracy": 0.6351351351351351, |
|
"eval_loss": 3.0862441062927246, |
|
"eval_runtime": 23.3262, |
|
"eval_samples_per_second": 3.172, |
|
"eval_steps_per_second": 0.815, |
|
"step": 6248 |
|
}, |
|
{ |
|
"epoch": 88.00019047619048, |
|
"grad_norm": 0.000379973032977432, |
|
"learning_rate": 4.497354497354498e-06, |
|
"loss": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 88.00114285714285, |
|
"grad_norm": 0.0004291079530958086, |
|
"learning_rate": 4.486772486772487e-06, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 88.00209523809524, |
|
"grad_norm": 0.0002368905406910926, |
|
"learning_rate": 4.476190476190477e-06, |
|
"loss": 0.0001, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 88.00304761904762, |
|
"grad_norm": 0.0003694745246320963, |
|
"learning_rate": 4.465608465608466e-06, |
|
"loss": 0.2122, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 88.004, |
|
"grad_norm": 0.00040288950549438596, |
|
"learning_rate": 4.455026455026456e-06, |
|
"loss": 0.0, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 88.00495238095237, |
|
"grad_norm": 0.0005140057182870805, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0333, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 88.00590476190476, |
|
"grad_norm": 0.0006567566306330264, |
|
"learning_rate": 4.433862433862434e-06, |
|
"loss": 0.2299, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 88.0067619047619, |
|
"eval_accuracy": 0.7702702702702703, |
|
"eval_loss": 2.028332233428955, |
|
"eval_runtime": 22.7378, |
|
"eval_samples_per_second": 3.254, |
|
"eval_steps_per_second": 0.836, |
|
"step": 6319 |
|
}, |
|
{ |
|
"epoch": 89.00009523809524, |
|
"grad_norm": 0.0009168223477900028, |
|
"learning_rate": 4.423280423280424e-06, |
|
"loss": 0.0002, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 89.00104761904763, |
|
"grad_norm": 0.002936070552095771, |
|
"learning_rate": 4.412698412698413e-06, |
|
"loss": 0.0403, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 89.002, |
|
"grad_norm": 0.000776287168264389, |
|
"learning_rate": 4.402116402116402e-06, |
|
"loss": 0.0, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 89.00295238095238, |
|
"grad_norm": 0.0005983594455756247, |
|
"learning_rate": 4.3915343915343915e-06, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 89.00390476190476, |
|
"grad_norm": 0.0002185263583669439, |
|
"learning_rate": 4.3809523809523815e-06, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 89.00485714285715, |
|
"grad_norm": 0.00029487276333384216, |
|
"learning_rate": 4.370370370370371e-06, |
|
"loss": 0.0, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 89.00580952380952, |
|
"grad_norm": 0.000874596182256937, |
|
"learning_rate": 4.35978835978836e-06, |
|
"loss": 0.0, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 89.0067619047619, |
|
"grad_norm": 0.0024972488172352314, |
|
"learning_rate": 4.34920634920635e-06, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 89.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.3751778602600098, |
|
"eval_runtime": 23.3451, |
|
"eval_samples_per_second": 3.17, |
|
"eval_steps_per_second": 0.814, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 90.00095238095238, |
|
"grad_norm": 329.7825927734375, |
|
"learning_rate": 4.338624338624339e-06, |
|
"loss": 0.428, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 90.00190476190477, |
|
"grad_norm": 0.00045626627979800105, |
|
"learning_rate": 4.328042328042328e-06, |
|
"loss": 0.1539, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 90.00285714285714, |
|
"grad_norm": 9.426961898803711, |
|
"learning_rate": 4.317460317460318e-06, |
|
"loss": 0.0005, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 90.00380952380952, |
|
"grad_norm": 0.0024752768222242594, |
|
"learning_rate": 4.306878306878307e-06, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 90.0047619047619, |
|
"grad_norm": 0.0005259969038888812, |
|
"learning_rate": 4.296296296296296e-06, |
|
"loss": 0.0, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 90.00571428571429, |
|
"grad_norm": 0.0007702509174123406, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 90.00666666666666, |
|
"grad_norm": 0.0006307198782451451, |
|
"learning_rate": 4.2751322751322754e-06, |
|
"loss": 0.1842, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 90.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 2.2107057571411133, |
|
"eval_runtime": 25.9576, |
|
"eval_samples_per_second": 2.851, |
|
"eval_steps_per_second": 0.732, |
|
"step": 6461 |
|
}, |
|
{ |
|
"epoch": 91.00085714285714, |
|
"grad_norm": 0.0012892925878986716, |
|
"learning_rate": 4.2645502645502654e-06, |
|
"loss": 0.1752, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 91.00180952380953, |
|
"grad_norm": 0.0017052206676453352, |
|
"learning_rate": 4.2539682539682546e-06, |
|
"loss": 0.0, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 91.00276190476191, |
|
"grad_norm": 0.0003308649465907365, |
|
"learning_rate": 4.243386243386244e-06, |
|
"loss": 0.0001, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 91.00371428571428, |
|
"grad_norm": 0.0016652209451422095, |
|
"learning_rate": 4.232804232804233e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 91.00466666666667, |
|
"grad_norm": 0.012959081679582596, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 91.00561904761905, |
|
"grad_norm": 0.001314148772507906, |
|
"learning_rate": 4.211640211640212e-06, |
|
"loss": 0.0, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 91.00657142857143, |
|
"grad_norm": 0.008198284544050694, |
|
"learning_rate": 4.201058201058201e-06, |
|
"loss": 0.0002, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 91.0067619047619, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 3.1361114978790283, |
|
"eval_runtime": 23.34, |
|
"eval_samples_per_second": 3.171, |
|
"eval_steps_per_second": 0.814, |
|
"step": 6532 |
|
}, |
|
{ |
|
"epoch": 92.0007619047619, |
|
"grad_norm": 0.005125410854816437, |
|
"learning_rate": 4.190476190476191e-06, |
|
"loss": 0.0004, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 92.00171428571429, |
|
"grad_norm": 0.0006265908596105874, |
|
"learning_rate": 4.17989417989418e-06, |
|
"loss": 0.0, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 92.00266666666667, |
|
"grad_norm": 0.0026438962668180466, |
|
"learning_rate": 4.169312169312169e-06, |
|
"loss": 0.0599, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 92.00361904761905, |
|
"grad_norm": 0.0007586319698020816, |
|
"learning_rate": 4.158730158730159e-06, |
|
"loss": 0.0, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 92.00457142857142, |
|
"grad_norm": 0.0017233211547136307, |
|
"learning_rate": 4.1481481481481485e-06, |
|
"loss": 0.0, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 92.00552380952381, |
|
"grad_norm": 0.0015403326833620667, |
|
"learning_rate": 4.137566137566138e-06, |
|
"loss": 0.0, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 92.00647619047619, |
|
"grad_norm": 0.013408493250608444, |
|
"learning_rate": 4.126984126984127e-06, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 92.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.7366185188293457, |
|
"eval_runtime": 22.9942, |
|
"eval_samples_per_second": 3.218, |
|
"eval_steps_per_second": 0.826, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 93.00066666666666, |
|
"grad_norm": 0.0003232085146009922, |
|
"learning_rate": 4.116402116402117e-06, |
|
"loss": 0.0, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 93.00161904761904, |
|
"grad_norm": 0.043059222400188446, |
|
"learning_rate": 4.105820105820107e-06, |
|
"loss": 0.0, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 93.00257142857143, |
|
"grad_norm": 0.00047642309800721705, |
|
"learning_rate": 4.095238095238096e-06, |
|
"loss": 0.0, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 93.00352380952381, |
|
"grad_norm": 0.0005419534863904119, |
|
"learning_rate": 4.084656084656085e-06, |
|
"loss": 0.0016, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 93.0044761904762, |
|
"grad_norm": 0.000353945535607636, |
|
"learning_rate": 4.074074074074074e-06, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 93.00542857142857, |
|
"grad_norm": 0.00030686677200719714, |
|
"learning_rate": 4.063492063492064e-06, |
|
"loss": 0.0003, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 93.00638095238095, |
|
"grad_norm": 0.00041551675531081855, |
|
"learning_rate": 4.052910052910053e-06, |
|
"loss": 0.0, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 93.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.6849799156188965, |
|
"eval_runtime": 23.033, |
|
"eval_samples_per_second": 3.213, |
|
"eval_steps_per_second": 0.825, |
|
"step": 6674 |
|
}, |
|
{ |
|
"epoch": 94.00057142857143, |
|
"grad_norm": 0.0002515302912797779, |
|
"learning_rate": 4.042328042328042e-06, |
|
"loss": 0.0, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 94.0015238095238, |
|
"grad_norm": 0.0007501939544454217, |
|
"learning_rate": 4.031746031746032e-06, |
|
"loss": 0.0, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 94.00247619047619, |
|
"grad_norm": 0.00030370696913450956, |
|
"learning_rate": 4.0211640211640215e-06, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 94.00342857142857, |
|
"grad_norm": 0.010980455204844475, |
|
"learning_rate": 4.010582010582011e-06, |
|
"loss": 0.0, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 94.00438095238096, |
|
"grad_norm": 0.0007709608762525022, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 94.00533333333334, |
|
"grad_norm": 0.0003361174603924155, |
|
"learning_rate": 3.98941798941799e-06, |
|
"loss": 0.0, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 94.00628571428571, |
|
"grad_norm": 0.00025106294197030365, |
|
"learning_rate": 3.978835978835979e-06, |
|
"loss": 0.0, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 94.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.696514129638672, |
|
"eval_runtime": 21.7843, |
|
"eval_samples_per_second": 3.397, |
|
"eval_steps_per_second": 0.872, |
|
"step": 6745 |
|
}, |
|
{ |
|
"epoch": 95.00047619047619, |
|
"grad_norm": 0.0017140108393505216, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.0, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 95.00142857142858, |
|
"grad_norm": 0.0003946751821786165, |
|
"learning_rate": 3.957671957671958e-06, |
|
"loss": 0.0, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 95.00238095238095, |
|
"grad_norm": 0.0016369846416637301, |
|
"learning_rate": 3.947089947089948e-06, |
|
"loss": 0.0582, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 95.00333333333333, |
|
"grad_norm": 0.0007478753686882555, |
|
"learning_rate": 3.936507936507936e-06, |
|
"loss": 0.0, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 95.00428571428571, |
|
"grad_norm": 0.00047938968054950237, |
|
"learning_rate": 3.925925925925926e-06, |
|
"loss": 0.0028, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 95.0052380952381, |
|
"grad_norm": 0.00035130296600982547, |
|
"learning_rate": 3.9153439153439155e-06, |
|
"loss": 0.1409, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 95.00619047619048, |
|
"grad_norm": 0.0006514904671348631, |
|
"learning_rate": 3.9047619047619055e-06, |
|
"loss": 0.1894, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 95.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.507014036178589, |
|
"eval_runtime": 19.8307, |
|
"eval_samples_per_second": 3.732, |
|
"eval_steps_per_second": 0.958, |
|
"step": 6816 |
|
}, |
|
{ |
|
"epoch": 96.00038095238095, |
|
"grad_norm": 0.001034559914842248, |
|
"learning_rate": 3.894179894179895e-06, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 96.00133333333333, |
|
"grad_norm": 0.003884747624397278, |
|
"learning_rate": 3.883597883597884e-06, |
|
"loss": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 96.00228571428572, |
|
"grad_norm": 0.00044443883234634995, |
|
"learning_rate": 3.873015873015874e-06, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 96.00323809523809, |
|
"grad_norm": 0.03588930517435074, |
|
"learning_rate": 3.862433862433863e-06, |
|
"loss": 0.0, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 96.00419047619047, |
|
"grad_norm": 0.000525305571500212, |
|
"learning_rate": 3.851851851851852e-06, |
|
"loss": 0.0, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 96.00514285714286, |
|
"grad_norm": 0.00042602582834661007, |
|
"learning_rate": 3.841269841269842e-06, |
|
"loss": 0.0, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 96.00609523809524, |
|
"grad_norm": 0.0003282624820712954, |
|
"learning_rate": 3.830687830687831e-06, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 96.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.5326974391937256, |
|
"eval_runtime": 20.8641, |
|
"eval_samples_per_second": 3.547, |
|
"eval_steps_per_second": 0.911, |
|
"step": 6887 |
|
}, |
|
{ |
|
"epoch": 97.00028571428571, |
|
"grad_norm": 0.0016416395083069801, |
|
"learning_rate": 3.82010582010582e-06, |
|
"loss": 0.0001, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 97.0012380952381, |
|
"grad_norm": 0.0002830391167663038, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 97.00219047619048, |
|
"grad_norm": 0.0003058542206417769, |
|
"learning_rate": 3.7989417989417994e-06, |
|
"loss": 0.1022, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 97.00314285714286, |
|
"grad_norm": 0.0010565044358372688, |
|
"learning_rate": 3.788359788359789e-06, |
|
"loss": 0.0, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 97.00409523809523, |
|
"grad_norm": 0.0004148608713876456, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.0, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 97.00504761904762, |
|
"grad_norm": 0.0002654260606504977, |
|
"learning_rate": 3.7671957671957676e-06, |
|
"loss": 0.0002, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 97.006, |
|
"grad_norm": 0.00963718444108963, |
|
"learning_rate": 3.7566137566137568e-06, |
|
"loss": 0.0, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 97.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.884532928466797, |
|
"eval_runtime": 20.1819, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.941, |
|
"step": 6958 |
|
}, |
|
{ |
|
"epoch": 98.00019047619048, |
|
"grad_norm": 0.0004373944248072803, |
|
"learning_rate": 3.7460317460317463e-06, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 98.00114285714285, |
|
"grad_norm": 0.0008808135171420872, |
|
"learning_rate": 3.735449735449736e-06, |
|
"loss": 0.0, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 98.00209523809524, |
|
"grad_norm": 0.004815933760255575, |
|
"learning_rate": 3.724867724867725e-06, |
|
"loss": 0.0, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 98.00304761904762, |
|
"grad_norm": 0.00041855985182337463, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.0861, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 98.004, |
|
"grad_norm": 0.00028756665415130556, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 98.00495238095237, |
|
"grad_norm": 0.0002812529855873436, |
|
"learning_rate": 3.6931216931216933e-06, |
|
"loss": 0.0046, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 98.00590476190476, |
|
"grad_norm": 0.0004412989073898643, |
|
"learning_rate": 3.6825396825396833e-06, |
|
"loss": 0.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 98.0067619047619, |
|
"eval_accuracy": 0.7837837837837838, |
|
"eval_loss": 2.003041982650757, |
|
"eval_runtime": 19.5773, |
|
"eval_samples_per_second": 3.78, |
|
"eval_steps_per_second": 0.971, |
|
"step": 7029 |
|
}, |
|
{ |
|
"epoch": 99.00009523809524, |
|
"grad_norm": 0.0005032554036006331, |
|
"learning_rate": 3.671957671957672e-06, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 99.00104761904763, |
|
"grad_norm": 368.5629577636719, |
|
"learning_rate": 3.661375661375662e-06, |
|
"loss": 0.0311, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 99.002, |
|
"grad_norm": 0.0009055473492480814, |
|
"learning_rate": 3.6507936507936507e-06, |
|
"loss": 0.0, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 99.00295238095238, |
|
"grad_norm": 0.005899870302528143, |
|
"learning_rate": 3.6402116402116407e-06, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 99.00390476190476, |
|
"grad_norm": 0.0005574446404352784, |
|
"learning_rate": 3.6296296296296302e-06, |
|
"loss": 0.0, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 99.00485714285715, |
|
"grad_norm": 0.0003847317711915821, |
|
"learning_rate": 3.6190476190476194e-06, |
|
"loss": 0.0006, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 99.00580952380952, |
|
"grad_norm": 0.0002375604526605457, |
|
"learning_rate": 3.608465608465609e-06, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 99.0067619047619, |
|
"grad_norm": 0.001055917702615261, |
|
"learning_rate": 3.597883597883598e-06, |
|
"loss": 0.1439, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 99.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.7892189025878906, |
|
"eval_runtime": 20.1076, |
|
"eval_samples_per_second": 3.68, |
|
"eval_steps_per_second": 0.945, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 100.00095238095238, |
|
"grad_norm": 0.0020569399930536747, |
|
"learning_rate": 3.5873015873015877e-06, |
|
"loss": 0.0, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 100.00190476190477, |
|
"grad_norm": 0.0007545605767518282, |
|
"learning_rate": 3.5767195767195772e-06, |
|
"loss": 0.0297, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 100.00285714285714, |
|
"grad_norm": 0.0006217532209120691, |
|
"learning_rate": 3.5661375661375664e-06, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 100.00380952380952, |
|
"grad_norm": 0.00022461486514657736, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 100.0047619047619, |
|
"grad_norm": 0.0004957873024977744, |
|
"learning_rate": 3.544973544973545e-06, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 100.00571428571429, |
|
"grad_norm": 0.0002601814630907029, |
|
"learning_rate": 3.5343915343915346e-06, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 100.00666666666666, |
|
"grad_norm": 0.0005272876587696373, |
|
"learning_rate": 3.523809523809524e-06, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 100.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.4621739387512207, |
|
"eval_runtime": 20.3266, |
|
"eval_samples_per_second": 3.641, |
|
"eval_steps_per_second": 0.935, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 101.00085714285714, |
|
"grad_norm": 0.0008107370231300592, |
|
"learning_rate": 3.5132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 101.00180952380953, |
|
"grad_norm": 0.0010733662638813257, |
|
"learning_rate": 3.502645502645503e-06, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 101.00276190476191, |
|
"grad_norm": 0.00023595021048095077, |
|
"learning_rate": 3.492063492063492e-06, |
|
"loss": 0.1378, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 101.00371428571428, |
|
"grad_norm": 0.00020432127348612994, |
|
"learning_rate": 3.481481481481482e-06, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 101.00466666666667, |
|
"grad_norm": 0.0006551428232342005, |
|
"learning_rate": 3.4708994708994716e-06, |
|
"loss": 0.0, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 101.00561904761905, |
|
"grad_norm": 0.00026041388628073037, |
|
"learning_rate": 3.4603174603174607e-06, |
|
"loss": 0.2107, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 101.00657142857143, |
|
"grad_norm": 0.00023115877411328256, |
|
"learning_rate": 3.4497354497354503e-06, |
|
"loss": 0.0016, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 101.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.4539666175842285, |
|
"eval_runtime": 20.1069, |
|
"eval_samples_per_second": 3.68, |
|
"eval_steps_per_second": 0.945, |
|
"step": 7242 |
|
}, |
|
{ |
|
"epoch": 102.0007619047619, |
|
"grad_norm": 0.00019595421326812357, |
|
"learning_rate": 3.4391534391534394e-06, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 102.00171428571429, |
|
"grad_norm": 0.00034865373163484037, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.2287, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 102.00266666666667, |
|
"grad_norm": 0.0010627711890265346, |
|
"learning_rate": 3.4179894179894185e-06, |
|
"loss": 0.0, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 102.00361904761905, |
|
"grad_norm": 0.0002950064663309604, |
|
"learning_rate": 3.4074074074074077e-06, |
|
"loss": 0.0, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 102.00457142857142, |
|
"grad_norm": 0.0019715323578566313, |
|
"learning_rate": 3.3968253968253972e-06, |
|
"loss": 0.0, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 102.00552380952381, |
|
"grad_norm": 0.0005743891815654933, |
|
"learning_rate": 3.3862433862433864e-06, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 102.00647619047619, |
|
"grad_norm": 0.0005560641875490546, |
|
"learning_rate": 3.375661375661376e-06, |
|
"loss": 0.0006, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 102.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.485344171524048, |
|
"eval_runtime": 18.7508, |
|
"eval_samples_per_second": 3.946, |
|
"eval_steps_per_second": 1.013, |
|
"step": 7313 |
|
}, |
|
{ |
|
"epoch": 103.00066666666666, |
|
"grad_norm": 0.00027370688621886075, |
|
"learning_rate": 3.3650793650793655e-06, |
|
"loss": 0.0, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 103.00161904761904, |
|
"grad_norm": 0.0003905274497810751, |
|
"learning_rate": 3.3544973544973546e-06, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 103.00257142857143, |
|
"grad_norm": 0.0002848069998435676, |
|
"learning_rate": 3.343915343915344e-06, |
|
"loss": 0.0104, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 103.00352380952381, |
|
"grad_norm": 0.00020756880985572934, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 103.0044761904762, |
|
"grad_norm": 0.4025360643863678, |
|
"learning_rate": 3.322751322751323e-06, |
|
"loss": 0.0001, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 103.00542857142857, |
|
"grad_norm": 0.0003499074373394251, |
|
"learning_rate": 3.312169312169313e-06, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 103.00638095238095, |
|
"grad_norm": 0.0005405242554843426, |
|
"learning_rate": 3.3015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 103.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.510071277618408, |
|
"eval_runtime": 18.4553, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 1.03, |
|
"step": 7384 |
|
}, |
|
{ |
|
"epoch": 104.00057142857143, |
|
"grad_norm": 0.0011253401171416044, |
|
"learning_rate": 3.2910052910052916e-06, |
|
"loss": 0.0, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 104.0015238095238, |
|
"grad_norm": 0.00038248911732807755, |
|
"learning_rate": 3.2804232804232807e-06, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 104.00247619047619, |
|
"grad_norm": 0.004601576831191778, |
|
"learning_rate": 3.2698412698412703e-06, |
|
"loss": 0.0, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 104.00342857142857, |
|
"grad_norm": 0.0004940856015309691, |
|
"learning_rate": 3.25925925925926e-06, |
|
"loss": 0.0, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 104.00438095238096, |
|
"grad_norm": 0.0004417779855430126, |
|
"learning_rate": 3.248677248677249e-06, |
|
"loss": 0.0, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 104.00533333333334, |
|
"grad_norm": 0.0007058187038637698, |
|
"learning_rate": 3.2380952380952385e-06, |
|
"loss": 0.0, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 104.00628571428571, |
|
"grad_norm": 0.0003210832073818892, |
|
"learning_rate": 3.2275132275132277e-06, |
|
"loss": 0.0, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 104.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.5136077404022217, |
|
"eval_runtime": 21.1895, |
|
"eval_samples_per_second": 3.492, |
|
"eval_steps_per_second": 0.897, |
|
"step": 7455 |
|
}, |
|
{ |
|
"epoch": 105.00047619047619, |
|
"grad_norm": 0.005918905604630709, |
|
"learning_rate": 3.2169312169312172e-06, |
|
"loss": 0.0, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 105.00142857142858, |
|
"grad_norm": 0.0002152614906663075, |
|
"learning_rate": 3.206349206349207e-06, |
|
"loss": 0.0, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 105.00238095238095, |
|
"grad_norm": 0.0005597418639808893, |
|
"learning_rate": 3.195767195767196e-06, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 105.00333333333333, |
|
"grad_norm": 0.0002892552292905748, |
|
"learning_rate": 3.1851851851851855e-06, |
|
"loss": 0.0, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 105.00428571428571, |
|
"grad_norm": 0.00044008262921124697, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 105.0052380952381, |
|
"grad_norm": 0.0002466421283315867, |
|
"learning_rate": 3.164021164021164e-06, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 105.00619047619048, |
|
"grad_norm": 0.0002469986502546817, |
|
"learning_rate": 3.1534391534391538e-06, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 105.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.5027740001678467, |
|
"eval_runtime": 19.69, |
|
"eval_samples_per_second": 3.758, |
|
"eval_steps_per_second": 0.965, |
|
"step": 7526 |
|
}, |
|
{ |
|
"epoch": 106.00038095238095, |
|
"grad_norm": 0.0006333022029139102, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.0, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 106.00133333333333, |
|
"grad_norm": 0.0038955979980528355, |
|
"learning_rate": 3.132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 106.00228571428572, |
|
"grad_norm": 0.0007543124374933541, |
|
"learning_rate": 3.1216931216931216e-06, |
|
"loss": 0.0, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 106.00323809523809, |
|
"grad_norm": 0.00026103874552063644, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.0, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 106.00419047619047, |
|
"grad_norm": 0.0006310672033578157, |
|
"learning_rate": 3.100529100529101e-06, |
|
"loss": 0.0, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 106.00514285714286, |
|
"grad_norm": 0.000212406026548706, |
|
"learning_rate": 3.0899470899470903e-06, |
|
"loss": 0.0, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 106.00609523809524, |
|
"grad_norm": 0.0003842598816845566, |
|
"learning_rate": 3.07936507936508e-06, |
|
"loss": 0.0039, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 106.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.6881797313690186, |
|
"eval_runtime": 18.8348, |
|
"eval_samples_per_second": 3.929, |
|
"eval_steps_per_second": 1.009, |
|
"step": 7597 |
|
}, |
|
{ |
|
"epoch": 107.00028571428571, |
|
"grad_norm": 0.0005781868239864707, |
|
"learning_rate": 3.068783068783069e-06, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 107.0012380952381, |
|
"grad_norm": 0.0003101880429312587, |
|
"learning_rate": 3.0582010582010585e-06, |
|
"loss": 0.0, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 107.00219047619048, |
|
"grad_norm": 0.0003724259731825441, |
|
"learning_rate": 3.047619047619048e-06, |
|
"loss": 0.0, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 107.00314285714286, |
|
"grad_norm": 0.0003032613603863865, |
|
"learning_rate": 3.0370370370370372e-06, |
|
"loss": 0.0007, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 107.00409523809523, |
|
"grad_norm": 0.0001887906837509945, |
|
"learning_rate": 3.026455026455027e-06, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 107.00504761904762, |
|
"grad_norm": 0.00022369824000634253, |
|
"learning_rate": 3.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 107.006, |
|
"grad_norm": 0.00019053922733291984, |
|
"learning_rate": 3.0052910052910055e-06, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 107.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.8377068042755127, |
|
"eval_runtime": 18.9954, |
|
"eval_samples_per_second": 3.896, |
|
"eval_steps_per_second": 1.0, |
|
"step": 7668 |
|
}, |
|
{ |
|
"epoch": 108.00019047619048, |
|
"grad_norm": 0.0004099408397451043, |
|
"learning_rate": 2.9947089947089946e-06, |
|
"loss": 0.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 108.00114285714285, |
|
"grad_norm": 0.0004890338750556111, |
|
"learning_rate": 2.984126984126984e-06, |
|
"loss": 0.0, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 108.00209523809524, |
|
"grad_norm": 0.00018718685896601528, |
|
"learning_rate": 2.9735449735449738e-06, |
|
"loss": 0.0, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 108.00304761904762, |
|
"grad_norm": 0.0004911802243441343, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 108.004, |
|
"grad_norm": 0.0003113812126684934, |
|
"learning_rate": 2.9523809523809525e-06, |
|
"loss": 0.0, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 108.00495238095237, |
|
"grad_norm": 0.00017770612612366676, |
|
"learning_rate": 2.9417989417989416e-06, |
|
"loss": 0.0, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 108.00590476190476, |
|
"grad_norm": 0.00023184904421214014, |
|
"learning_rate": 2.9312169312169316e-06, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 108.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.849548816680908, |
|
"eval_runtime": 19.7799, |
|
"eval_samples_per_second": 3.741, |
|
"eval_steps_per_second": 0.961, |
|
"step": 7739 |
|
}, |
|
{ |
|
"epoch": 109.00009523809524, |
|
"grad_norm": 0.00020311641856096685, |
|
"learning_rate": 2.920634920634921e-06, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 109.00104761904763, |
|
"grad_norm": 0.0002174510882468894, |
|
"learning_rate": 2.9100529100529103e-06, |
|
"loss": 0.0, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 109.002, |
|
"grad_norm": 0.00025807766360230744, |
|
"learning_rate": 2.8994708994709e-06, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 109.00295238095238, |
|
"grad_norm": 0.0005842253449372947, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.0, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 109.00390476190476, |
|
"grad_norm": 0.00022474676370620728, |
|
"learning_rate": 2.8783068783068786e-06, |
|
"loss": 0.0, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 109.00485714285715, |
|
"grad_norm": 0.00021344266133382916, |
|
"learning_rate": 2.867724867724868e-06, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 109.00580952380952, |
|
"grad_norm": 0.00037887351936660707, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 109.0067619047619, |
|
"grad_norm": 0.00037470136885531247, |
|
"learning_rate": 2.846560846560847e-06, |
|
"loss": 0.0073, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 109.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.6624884605407715, |
|
"eval_runtime": 22.2204, |
|
"eval_samples_per_second": 3.33, |
|
"eval_steps_per_second": 0.855, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 110.00095238095238, |
|
"grad_norm": 0.0041119703091681, |
|
"learning_rate": 2.835978835978836e-06, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 110.00190476190477, |
|
"grad_norm": 0.0002662258630152792, |
|
"learning_rate": 2.8253968253968255e-06, |
|
"loss": 0.0008, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 110.00285714285714, |
|
"grad_norm": 0.0008752596913836896, |
|
"learning_rate": 2.814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 110.00380952380952, |
|
"grad_norm": 0.00015781358524691314, |
|
"learning_rate": 2.8042328042328042e-06, |
|
"loss": 0.0, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 110.0047619047619, |
|
"grad_norm": 85.68650817871094, |
|
"learning_rate": 2.7936507936507938e-06, |
|
"loss": 0.0024, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 110.00571428571429, |
|
"grad_norm": 0.00019260364933870733, |
|
"learning_rate": 2.783068783068783e-06, |
|
"loss": 0.0, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 110.00666666666666, |
|
"grad_norm": 0.00015918267308734357, |
|
"learning_rate": 2.7724867724867725e-06, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 110.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.706251621246338, |
|
"eval_runtime": 22.8587, |
|
"eval_samples_per_second": 3.237, |
|
"eval_steps_per_second": 0.831, |
|
"step": 7881 |
|
}, |
|
{ |
|
"epoch": 111.00085714285714, |
|
"grad_norm": 0.00036700881901197135, |
|
"learning_rate": 2.7619047619047625e-06, |
|
"loss": 0.0, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 111.00180952380953, |
|
"grad_norm": 0.0001547165447846055, |
|
"learning_rate": 2.7513227513227516e-06, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 111.00276190476191, |
|
"grad_norm": 24.328468322753906, |
|
"learning_rate": 2.740740740740741e-06, |
|
"loss": 0.2405, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 111.00371428571428, |
|
"grad_norm": 0.0007024611113592982, |
|
"learning_rate": 2.7301587301587303e-06, |
|
"loss": 0.0, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 111.00466666666667, |
|
"grad_norm": 0.0004765796475112438, |
|
"learning_rate": 2.71957671957672e-06, |
|
"loss": 0.0, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 111.00561904761905, |
|
"grad_norm": 0.0002616800193209201, |
|
"learning_rate": 2.7089947089947094e-06, |
|
"loss": 0.0, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 111.00657142857143, |
|
"grad_norm": 0.0006567240925505757, |
|
"learning_rate": 2.6984126984126986e-06, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 111.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 2.3949179649353027, |
|
"eval_runtime": 13.9865, |
|
"eval_samples_per_second": 5.291, |
|
"eval_steps_per_second": 1.358, |
|
"step": 7952 |
|
}, |
|
{ |
|
"epoch": 112.0007619047619, |
|
"grad_norm": 0.00034727007732726634, |
|
"learning_rate": 2.687830687830688e-06, |
|
"loss": 0.0001, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 112.00171428571429, |
|
"grad_norm": 0.0005837412900291383, |
|
"learning_rate": 2.6772486772486773e-06, |
|
"loss": 0.0, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 112.00266666666667, |
|
"grad_norm": 0.00018690152501221746, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 112.00361904761905, |
|
"grad_norm": 0.000701416633091867, |
|
"learning_rate": 2.6560846560846564e-06, |
|
"loss": 0.0, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 112.00457142857142, |
|
"grad_norm": 0.00027452572248876095, |
|
"learning_rate": 2.6455026455026455e-06, |
|
"loss": 0.0001, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 112.00552380952381, |
|
"grad_norm": 0.00015516536950599402, |
|
"learning_rate": 2.634920634920635e-06, |
|
"loss": 0.0, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 112.00647619047619, |
|
"grad_norm": 0.00020343823416624218, |
|
"learning_rate": 2.6243386243386242e-06, |
|
"loss": 0.0, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 112.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.5955913066864014, |
|
"eval_runtime": 13.9752, |
|
"eval_samples_per_second": 5.295, |
|
"eval_steps_per_second": 1.36, |
|
"step": 8023 |
|
}, |
|
{ |
|
"epoch": 113.00066666666666, |
|
"grad_norm": 0.00027498166309669614, |
|
"learning_rate": 2.613756613756614e-06, |
|
"loss": 0.0, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 113.00161904761904, |
|
"grad_norm": 0.0004681613063439727, |
|
"learning_rate": 2.6031746031746038e-06, |
|
"loss": 0.0, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 113.00257142857143, |
|
"grad_norm": 0.0002042886335402727, |
|
"learning_rate": 2.5925925925925925e-06, |
|
"loss": 0.0, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 113.00352380952381, |
|
"grad_norm": 0.0007689885678701103, |
|
"learning_rate": 2.5820105820105825e-06, |
|
"loss": 0.0, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 113.0044761904762, |
|
"grad_norm": 0.00023886038979981095, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.0, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 113.00542857142857, |
|
"grad_norm": 0.00014404159446712583, |
|
"learning_rate": 2.560846560846561e-06, |
|
"loss": 0.1964, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 113.00638095238095, |
|
"grad_norm": 0.0002148894709534943, |
|
"learning_rate": 2.5502645502645507e-06, |
|
"loss": 0.0001, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 113.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.9212052822113037, |
|
"eval_runtime": 13.8012, |
|
"eval_samples_per_second": 5.362, |
|
"eval_steps_per_second": 1.377, |
|
"step": 8094 |
|
}, |
|
{ |
|
"epoch": 114.00057142857143, |
|
"grad_norm": 0.00036819567321799695, |
|
"learning_rate": 2.53968253968254e-06, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 114.0015238095238, |
|
"grad_norm": 0.0022056999150663614, |
|
"learning_rate": 2.5291005291005294e-06, |
|
"loss": 0.0, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 114.00247619047619, |
|
"grad_norm": 0.03204357624053955, |
|
"learning_rate": 2.5185185185185186e-06, |
|
"loss": 0.0, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 114.00342857142857, |
|
"grad_norm": 0.00015915023686829954, |
|
"learning_rate": 2.507936507936508e-06, |
|
"loss": 0.0, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 114.00438095238096, |
|
"grad_norm": 0.0010063733207061887, |
|
"learning_rate": 2.4973544973544973e-06, |
|
"loss": 0.0, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 114.00533333333334, |
|
"grad_norm": 0.0001527264976175502, |
|
"learning_rate": 2.486772486772487e-06, |
|
"loss": 0.0, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 114.00628571428571, |
|
"grad_norm": 0.00043166003888472915, |
|
"learning_rate": 2.4761904761904764e-06, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 114.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.821566343307495, |
|
"eval_runtime": 13.9047, |
|
"eval_samples_per_second": 5.322, |
|
"eval_steps_per_second": 1.366, |
|
"step": 8165 |
|
}, |
|
{ |
|
"epoch": 115.00047619047619, |
|
"grad_norm": 0.0003170575946569443, |
|
"learning_rate": 2.465608465608466e-06, |
|
"loss": 0.0, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 115.00142857142858, |
|
"grad_norm": 0.00025970794376917183, |
|
"learning_rate": 2.455026455026455e-06, |
|
"loss": 0.0, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 115.00238095238095, |
|
"grad_norm": 0.0002181291056331247, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.0, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 115.00333333333333, |
|
"grad_norm": 0.00014427877613343298, |
|
"learning_rate": 2.433862433862434e-06, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 115.00428571428571, |
|
"grad_norm": 0.00027777208015322685, |
|
"learning_rate": 2.4232804232804234e-06, |
|
"loss": 0.0, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 115.0052380952381, |
|
"grad_norm": 0.0002333878946956247, |
|
"learning_rate": 2.412698412698413e-06, |
|
"loss": 0.0, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 115.00619047619048, |
|
"grad_norm": 0.00015995455032680184, |
|
"learning_rate": 2.4021164021164025e-06, |
|
"loss": 0.0, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 115.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.840902328491211, |
|
"eval_runtime": 13.8639, |
|
"eval_samples_per_second": 5.338, |
|
"eval_steps_per_second": 1.37, |
|
"step": 8236 |
|
}, |
|
{ |
|
"epoch": 116.00038095238095, |
|
"grad_norm": 0.0001782690524123609, |
|
"learning_rate": 2.3915343915343916e-06, |
|
"loss": 0.0, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 116.00133333333333, |
|
"grad_norm": 0.00017185336037073284, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 116.00228571428572, |
|
"grad_norm": 0.002734607784077525, |
|
"learning_rate": 2.3703703703703707e-06, |
|
"loss": 0.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 116.00323809523809, |
|
"grad_norm": 0.00045948615297675133, |
|
"learning_rate": 2.35978835978836e-06, |
|
"loss": 0.0, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 116.00419047619047, |
|
"grad_norm": 0.00028406543424353004, |
|
"learning_rate": 2.3492063492063494e-06, |
|
"loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 116.00514285714286, |
|
"grad_norm": 0.00016468593094032258, |
|
"learning_rate": 2.3386243386243386e-06, |
|
"loss": 0.0, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 116.00609523809524, |
|
"grad_norm": 0.00022645114222541451, |
|
"learning_rate": 2.328042328042328e-06, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 116.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.8546435832977295, |
|
"eval_runtime": 16.3842, |
|
"eval_samples_per_second": 4.517, |
|
"eval_steps_per_second": 1.16, |
|
"step": 8307 |
|
}, |
|
{ |
|
"epoch": 117.00028571428571, |
|
"grad_norm": 0.00027977171703241765, |
|
"learning_rate": 2.3174603174603177e-06, |
|
"loss": 0.0, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 117.0012380952381, |
|
"grad_norm": 0.0003135088481940329, |
|
"learning_rate": 2.3068783068783073e-06, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 117.00219047619048, |
|
"grad_norm": 0.00021076586563140154, |
|
"learning_rate": 2.2962962962962964e-06, |
|
"loss": 0.0, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 117.00314285714286, |
|
"grad_norm": 0.00024846967426128685, |
|
"learning_rate": 2.285714285714286e-06, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 117.00409523809523, |
|
"grad_norm": 0.0003208005800843239, |
|
"learning_rate": 2.275132275132275e-06, |
|
"loss": 0.0, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 117.00504761904762, |
|
"grad_norm": 0.00039256789023056626, |
|
"learning_rate": 2.2645502645502647e-06, |
|
"loss": 0.0, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 117.006, |
|
"grad_norm": 0.00013160724483896047, |
|
"learning_rate": 2.2539682539682542e-06, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 117.0067619047619, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 2.8172342777252197, |
|
"eval_runtime": 19.032, |
|
"eval_samples_per_second": 3.888, |
|
"eval_steps_per_second": 0.998, |
|
"step": 8378 |
|
}, |
|
{ |
|
"epoch": 118.00019047619048, |
|
"grad_norm": 0.00013908334949519485, |
|
"learning_rate": 2.2433862433862434e-06, |
|
"loss": 0.0, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 118.00114285714285, |
|
"grad_norm": 0.0002135159884346649, |
|
"learning_rate": 2.232804232804233e-06, |
|
"loss": 0.0, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 118.00209523809524, |
|
"grad_norm": 0.00352920638397336, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 118.00304761904762, |
|
"grad_norm": 0.000681175384670496, |
|
"learning_rate": 2.211640211640212e-06, |
|
"loss": 0.0001, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 118.004, |
|
"grad_norm": 0.00010591221507638693, |
|
"learning_rate": 2.201058201058201e-06, |
|
"loss": 0.0, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 118.00495238095237, |
|
"grad_norm": 309.2093811035156, |
|
"learning_rate": 2.1904761904761908e-06, |
|
"loss": 0.2105, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 118.00590476190476, |
|
"grad_norm": 0.00028071904671378434, |
|
"learning_rate": 2.17989417989418e-06, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 118.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.4545764923095703, |
|
"eval_runtime": 20.2563, |
|
"eval_samples_per_second": 3.653, |
|
"eval_steps_per_second": 0.938, |
|
"step": 8449 |
|
}, |
|
{ |
|
"epoch": 119.00009523809524, |
|
"grad_norm": 0.0003477052669040859, |
|
"learning_rate": 2.1693121693121695e-06, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 119.00104761904763, |
|
"grad_norm": 0.0007623965502716601, |
|
"learning_rate": 2.158730158730159e-06, |
|
"loss": 0.0, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 119.002, |
|
"grad_norm": 0.00021526910131797194, |
|
"learning_rate": 2.148148148148148e-06, |
|
"loss": 0.0, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 119.00295238095238, |
|
"grad_norm": 0.0005041114636696875, |
|
"learning_rate": 2.1375661375661377e-06, |
|
"loss": 0.0, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 119.00390476190476, |
|
"grad_norm": 0.00018653657753020525, |
|
"learning_rate": 2.1269841269841273e-06, |
|
"loss": 0.0, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 119.00485714285715, |
|
"grad_norm": 0.00029102060943841934, |
|
"learning_rate": 2.1164021164021164e-06, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 119.00580952380952, |
|
"grad_norm": 0.00020990609482396394, |
|
"learning_rate": 2.105820105820106e-06, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 119.0067619047619, |
|
"grad_norm": 0.00012788939056918025, |
|
"learning_rate": 2.0952380952380955e-06, |
|
"loss": 0.0, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 119.0067619047619, |
|
"eval_accuracy": 0.7567567567567568, |
|
"eval_loss": 2.381497383117676, |
|
"eval_runtime": 21.6548, |
|
"eval_samples_per_second": 3.417, |
|
"eval_steps_per_second": 0.877, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 120.00095238095238, |
|
"grad_norm": 0.0004286629264242947, |
|
"learning_rate": 2.0846560846560847e-06, |
|
"loss": 0.0, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 120.00190476190477, |
|
"grad_norm": 0.00028043834026902914, |
|
"learning_rate": 2.0740740740740742e-06, |
|
"loss": 0.0, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 120.00285714285714, |
|
"grad_norm": 0.00023993337526917458, |
|
"learning_rate": 2.0634920634920634e-06, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 120.00380952380952, |
|
"grad_norm": 0.0008844914846122265, |
|
"learning_rate": 2.0529100529100534e-06, |
|
"loss": 0.0, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 120.0047619047619, |
|
"grad_norm": 0.0005346594844013453, |
|
"learning_rate": 2.0423280423280425e-06, |
|
"loss": 0.0, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 120.00571428571429, |
|
"grad_norm": 0.000264531176071614, |
|
"learning_rate": 2.031746031746032e-06, |
|
"loss": 0.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 120.00666666666666, |
|
"grad_norm": 0.00016072009748313576, |
|
"learning_rate": 2.021164021164021e-06, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 120.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.4006309509277344, |
|
"eval_runtime": 20.5345, |
|
"eval_samples_per_second": 3.604, |
|
"eval_steps_per_second": 0.925, |
|
"step": 8591 |
|
}, |
|
{ |
|
"epoch": 121.00085714285714, |
|
"grad_norm": 0.00020770763512700796, |
|
"learning_rate": 2.0105820105820108e-06, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 121.00180952380953, |
|
"grad_norm": 0.0001392192643834278, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 121.00276190476191, |
|
"grad_norm": 0.0002610778028611094, |
|
"learning_rate": 1.9894179894179895e-06, |
|
"loss": 0.0, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 121.00371428571428, |
|
"grad_norm": 0.0005038412055000663, |
|
"learning_rate": 1.978835978835979e-06, |
|
"loss": 0.0, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 121.00466666666667, |
|
"grad_norm": 0.00024570609093643725, |
|
"learning_rate": 1.968253968253968e-06, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 121.00561904761905, |
|
"grad_norm": 0.0002881147665902972, |
|
"learning_rate": 1.9576719576719577e-06, |
|
"loss": 0.0, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 121.00657142857143, |
|
"grad_norm": 0.000290913536446169, |
|
"learning_rate": 1.9470899470899473e-06, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 121.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.4198105335235596, |
|
"eval_runtime": 20.8054, |
|
"eval_samples_per_second": 3.557, |
|
"eval_steps_per_second": 0.913, |
|
"step": 8662 |
|
}, |
|
{ |
|
"epoch": 122.0007619047619, |
|
"grad_norm": 0.0002238577581010759, |
|
"learning_rate": 1.936507936507937e-06, |
|
"loss": 0.0, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 122.00171428571429, |
|
"grad_norm": 0.0014817883493378758, |
|
"learning_rate": 1.925925925925926e-06, |
|
"loss": 0.0, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 122.00266666666667, |
|
"grad_norm": 0.00022828546934761107, |
|
"learning_rate": 1.9153439153439156e-06, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 122.00361904761905, |
|
"grad_norm": 0.0003081039758399129, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 122.00457142857142, |
|
"grad_norm": 0.000204382959054783, |
|
"learning_rate": 1.8941798941798945e-06, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 122.00552380952381, |
|
"grad_norm": 0.0001458204205846414, |
|
"learning_rate": 1.8835978835978838e-06, |
|
"loss": 0.0, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 122.00647619047619, |
|
"grad_norm": 0.00013663896243087947, |
|
"learning_rate": 1.8730158730158732e-06, |
|
"loss": 0.0, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 122.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.4388883113861084, |
|
"eval_runtime": 19.5939, |
|
"eval_samples_per_second": 3.777, |
|
"eval_steps_per_second": 0.97, |
|
"step": 8733 |
|
}, |
|
{ |
|
"epoch": 123.00066666666666, |
|
"grad_norm": 0.00021870314958505332, |
|
"learning_rate": 1.8624338624338625e-06, |
|
"loss": 0.0, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 123.00161904761904, |
|
"grad_norm": 0.0002467467274982482, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 123.00257142857143, |
|
"grad_norm": 0.000263078574789688, |
|
"learning_rate": 1.8412698412698416e-06, |
|
"loss": 0.0, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 123.00352380952381, |
|
"grad_norm": 0.0003389718767721206, |
|
"learning_rate": 1.830687830687831e-06, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 123.0044761904762, |
|
"grad_norm": 0.00014280724280979484, |
|
"learning_rate": 1.8201058201058203e-06, |
|
"loss": 0.0, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 123.00542857142857, |
|
"grad_norm": 0.00016550095460843295, |
|
"learning_rate": 1.8095238095238097e-06, |
|
"loss": 0.0, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 123.00638095238095, |
|
"grad_norm": 0.0003188049013260752, |
|
"learning_rate": 1.798941798941799e-06, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 123.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.4763236045837402, |
|
"eval_runtime": 19.7757, |
|
"eval_samples_per_second": 3.742, |
|
"eval_steps_per_second": 0.961, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 124.00057142857143, |
|
"grad_norm": 0.00017588127229828387, |
|
"learning_rate": 1.7883597883597886e-06, |
|
"loss": 0.0, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 124.0015238095238, |
|
"grad_norm": 0.00025684619322419167, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 124.00247619047619, |
|
"grad_norm": 0.0003379395930096507, |
|
"learning_rate": 1.7671957671957673e-06, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 124.00342857142857, |
|
"grad_norm": 0.00019355901167728007, |
|
"learning_rate": 1.7566137566137567e-06, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 124.00438095238096, |
|
"grad_norm": 0.005061679054051638, |
|
"learning_rate": 1.746031746031746e-06, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 124.00533333333334, |
|
"grad_norm": 0.00023676594719290733, |
|
"learning_rate": 1.7354497354497358e-06, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 124.00628571428571, |
|
"grad_norm": 0.0005030606989748776, |
|
"learning_rate": 1.7248677248677251e-06, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 124.0067619047619, |
|
"eval_accuracy": 0.7432432432432432, |
|
"eval_loss": 2.494699716567993, |
|
"eval_runtime": 18.7406, |
|
"eval_samples_per_second": 3.949, |
|
"eval_steps_per_second": 1.014, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 125.00047619047619, |
|
"grad_norm": 0.00018102419562637806, |
|
"learning_rate": 1.7142857142857145e-06, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 125.00142857142858, |
|
"grad_norm": 0.0009986262302845716, |
|
"learning_rate": 1.7037037037037038e-06, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 125.00238095238095, |
|
"grad_norm": 0.00020767083333339542, |
|
"learning_rate": 1.6931216931216932e-06, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 125.00333333333333, |
|
"grad_norm": 0.0005012244218960404, |
|
"learning_rate": 1.6825396825396827e-06, |
|
"loss": 0.0, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 125.00428571428571, |
|
"grad_norm": 0.00021223600197117776, |
|
"learning_rate": 1.671957671957672e-06, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 125.0052380952381, |
|
"grad_norm": 0.00014842044038232416, |
|
"learning_rate": 1.6613756613756614e-06, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 125.00619047619048, |
|
"grad_norm": 0.0009348007733933628, |
|
"learning_rate": 1.6507936507936508e-06, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 125.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.512620687484741, |
|
"eval_runtime": 22.2948, |
|
"eval_samples_per_second": 3.319, |
|
"eval_steps_per_second": 0.852, |
|
"step": 8946 |
|
}, |
|
{ |
|
"epoch": 126.00038095238095, |
|
"grad_norm": 0.00016746499750297517, |
|
"learning_rate": 1.6402116402116404e-06, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 126.00133333333333, |
|
"grad_norm": 0.00012043194874422625, |
|
"learning_rate": 1.62962962962963e-06, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 126.00228571428572, |
|
"grad_norm": 0.0005607526400126517, |
|
"learning_rate": 1.6190476190476193e-06, |
|
"loss": 0.0, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 126.00323809523809, |
|
"grad_norm": 0.005011443514376879, |
|
"learning_rate": 1.6084656084656086e-06, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 126.00419047619047, |
|
"grad_norm": 0.0001848703541327268, |
|
"learning_rate": 1.597883597883598e-06, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 126.00514285714286, |
|
"grad_norm": 0.00013767703785561025, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 126.00609523809524, |
|
"grad_norm": 0.0002853712940122932, |
|
"learning_rate": 1.5767195767195769e-06, |
|
"loss": 0.0, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 126.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.531374931335449, |
|
"eval_runtime": 25.9752, |
|
"eval_samples_per_second": 2.849, |
|
"eval_steps_per_second": 0.731, |
|
"step": 9017 |
|
}, |
|
{ |
|
"epoch": 127.00028571428571, |
|
"grad_norm": 0.0004919490311294794, |
|
"learning_rate": 1.5661375661375664e-06, |
|
"loss": 0.0, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 127.0012380952381, |
|
"grad_norm": 0.000153199172927998, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 127.00219047619048, |
|
"grad_norm": 0.00021435142843984067, |
|
"learning_rate": 1.5449735449735451e-06, |
|
"loss": 0.0, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 127.00314285714286, |
|
"grad_norm": 0.0001620359835214913, |
|
"learning_rate": 1.5343915343915345e-06, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 127.00409523809523, |
|
"grad_norm": 0.00019414816051721573, |
|
"learning_rate": 1.523809523809524e-06, |
|
"loss": 0.0, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 127.00504761904762, |
|
"grad_norm": 0.00018438031838741153, |
|
"learning_rate": 1.5132275132275134e-06, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 127.006, |
|
"grad_norm": 0.00021760053641628474, |
|
"learning_rate": 1.5026455026455028e-06, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 127.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.542877197265625, |
|
"eval_runtime": 14.1474, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 1.343, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 128.00019047619048, |
|
"grad_norm": 0.004726231098175049, |
|
"learning_rate": 1.492063492063492e-06, |
|
"loss": 0.0, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 128.00114285714287, |
|
"grad_norm": 0.0004024128429591656, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 128.00209523809525, |
|
"grad_norm": 0.00015126141079235822, |
|
"learning_rate": 1.4708994708994708e-06, |
|
"loss": 0.0, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 128.0030476190476, |
|
"grad_norm": 0.0005607677157968283, |
|
"learning_rate": 1.4603174603174606e-06, |
|
"loss": 0.0, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 128.004, |
|
"grad_norm": 0.0008643632754683495, |
|
"learning_rate": 1.44973544973545e-06, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 128.00495238095237, |
|
"grad_norm": 0.00021982158068567514, |
|
"learning_rate": 1.4391534391534393e-06, |
|
"loss": 0.0, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 128.00590476190476, |
|
"grad_norm": 0.00013301124272402376, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 128.0067619047619, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 2.56596040725708, |
|
"eval_runtime": 13.96, |
|
"eval_samples_per_second": 5.301, |
|
"eval_steps_per_second": 1.361, |
|
"step": 9159 |
|
}, |
|
{ |
|
"epoch": 129.00009523809524, |
|
"grad_norm": 0.0001881965872598812, |
|
"learning_rate": 1.417989417989418e-06, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 129.00104761904763, |
|
"grad_norm": 0.00011493435158627108, |
|
"learning_rate": 1.4074074074074075e-06, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 129.002, |
|
"grad_norm": 0.00022677952074445784, |
|
"learning_rate": 1.3968253968253969e-06, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 129.0029523809524, |
|
"grad_norm": 0.0008895723149180412, |
|
"learning_rate": 1.3862433862433862e-06, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 129.00390476190475, |
|
"grad_norm": 0.00014621164882555604, |
|
"learning_rate": 1.3756613756613758e-06, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 129.00485714285713, |
|
"grad_norm": 0.0009619208867661655, |
|
"learning_rate": 1.3650793650793652e-06, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 129.00580952380952, |
|
"grad_norm": 0.0004876498715020716, |
|
"learning_rate": 1.3544973544973547e-06, |
|
"loss": 0.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 129.0067619047619, |
|
"grad_norm": 0.00033994432305917144, |
|
"learning_rate": 1.343915343915344e-06, |
|
"loss": 0.0, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 129.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.582768440246582, |
|
"eval_runtime": 14.486, |
|
"eval_samples_per_second": 5.108, |
|
"eval_steps_per_second": 1.312, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 130.00095238095238, |
|
"grad_norm": 0.00011261526378802955, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 130.00190476190477, |
|
"grad_norm": 0.00062142638489604, |
|
"learning_rate": 1.3227513227513228e-06, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 130.00285714285715, |
|
"grad_norm": 0.00016485525702591985, |
|
"learning_rate": 1.3121693121693121e-06, |
|
"loss": 0.0, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 130.00380952380954, |
|
"grad_norm": 0.00027830738690681756, |
|
"learning_rate": 1.3015873015873019e-06, |
|
"loss": 0.0, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 130.0047619047619, |
|
"grad_norm": 0.00020398409105837345, |
|
"learning_rate": 1.2910052910052912e-06, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 130.00571428571428, |
|
"grad_norm": 0.0006426956388168037, |
|
"learning_rate": 1.2804232804232806e-06, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 130.00666666666666, |
|
"grad_norm": 0.0002507289173081517, |
|
"learning_rate": 1.26984126984127e-06, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 130.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.599635124206543, |
|
"eval_runtime": 14.2587, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 1.333, |
|
"step": 9301 |
|
}, |
|
{ |
|
"epoch": 131.00085714285714, |
|
"grad_norm": 0.00013853039126843214, |
|
"learning_rate": 1.2592592592592593e-06, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 131.00180952380953, |
|
"grad_norm": 0.0019195597851648927, |
|
"learning_rate": 1.2486772486772486e-06, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 131.0027619047619, |
|
"grad_norm": 0.00017239370208699256, |
|
"learning_rate": 1.2380952380952382e-06, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 131.0037142857143, |
|
"grad_norm": 0.0006013477686792612, |
|
"learning_rate": 1.2275132275132276e-06, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 131.00466666666668, |
|
"grad_norm": 0.00041599702672101557, |
|
"learning_rate": 1.216931216931217e-06, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 131.00561904761904, |
|
"grad_norm": 0.00018188441754318774, |
|
"learning_rate": 1.2063492063492065e-06, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 131.00657142857142, |
|
"grad_norm": 0.0003828182816505432, |
|
"learning_rate": 1.1957671957671958e-06, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 131.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.608135223388672, |
|
"eval_runtime": 13.977, |
|
"eval_samples_per_second": 5.294, |
|
"eval_steps_per_second": 1.359, |
|
"step": 9372 |
|
}, |
|
{ |
|
"epoch": 132.0007619047619, |
|
"grad_norm": 0.0003586815728340298, |
|
"learning_rate": 1.1851851851851854e-06, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 132.00171428571429, |
|
"grad_norm": 9.796415542950854e-05, |
|
"learning_rate": 1.1746031746031747e-06, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 132.00266666666667, |
|
"grad_norm": 0.00012283321120776236, |
|
"learning_rate": 1.164021164021164e-06, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 132.00361904761905, |
|
"grad_norm": 0.0002064243599306792, |
|
"learning_rate": 1.1534391534391536e-06, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 132.00457142857144, |
|
"grad_norm": 0.00012249739666003734, |
|
"learning_rate": 1.142857142857143e-06, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 132.00552380952382, |
|
"grad_norm": 0.00016905261145439, |
|
"learning_rate": 1.1322751322751323e-06, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 132.00647619047618, |
|
"grad_norm": 0.00016746780602261424, |
|
"learning_rate": 1.1216931216931217e-06, |
|
"loss": 0.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 132.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.6265084743499756, |
|
"eval_runtime": 14.002, |
|
"eval_samples_per_second": 5.285, |
|
"eval_steps_per_second": 1.357, |
|
"step": 9443 |
|
}, |
|
{ |
|
"epoch": 133.00066666666666, |
|
"grad_norm": 0.00016834806592669338, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 133.00161904761904, |
|
"grad_norm": 0.00013323896564543247, |
|
"learning_rate": 1.1005291005291006e-06, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 133.00257142857143, |
|
"grad_norm": 0.0001283081219298765, |
|
"learning_rate": 1.08994708994709e-06, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 133.0035238095238, |
|
"grad_norm": 0.00014765470405109227, |
|
"learning_rate": 1.0793650793650795e-06, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 133.0044761904762, |
|
"grad_norm": 0.0001391248806612566, |
|
"learning_rate": 1.0687830687830689e-06, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 133.00542857142858, |
|
"grad_norm": 0.00020245308405719697, |
|
"learning_rate": 1.0582010582010582e-06, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 133.00638095238097, |
|
"grad_norm": 0.00014681214815936983, |
|
"learning_rate": 1.0476190476190478e-06, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 133.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.6523962020874023, |
|
"eval_runtime": 14.6803, |
|
"eval_samples_per_second": 5.041, |
|
"eval_steps_per_second": 1.294, |
|
"step": 9514 |
|
}, |
|
{ |
|
"epoch": 134.00057142857142, |
|
"grad_norm": 0.00017680463497526944, |
|
"learning_rate": 1.0370370370370371e-06, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 134.0015238095238, |
|
"grad_norm": 0.00021717413619626313, |
|
"learning_rate": 1.0264550264550267e-06, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 134.0024761904762, |
|
"grad_norm": 0.00017716505681164563, |
|
"learning_rate": 1.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 134.00342857142857, |
|
"grad_norm": 0.00026766807422973216, |
|
"learning_rate": 1.0052910052910054e-06, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 134.00438095238096, |
|
"grad_norm": 0.0002117603289661929, |
|
"learning_rate": 9.947089947089947e-07, |
|
"loss": 0.0, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 134.00533333333334, |
|
"grad_norm": 0.00016684371803421527, |
|
"learning_rate": 9.84126984126984e-07, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 134.00628571428572, |
|
"grad_norm": 0.00020365270029287785, |
|
"learning_rate": 9.735449735449736e-07, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 134.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.663364887237549, |
|
"eval_runtime": 13.7653, |
|
"eval_samples_per_second": 5.376, |
|
"eval_steps_per_second": 1.38, |
|
"step": 9585 |
|
}, |
|
{ |
|
"epoch": 135.00047619047618, |
|
"grad_norm": 0.0001860986085375771, |
|
"learning_rate": 9.62962962962963e-07, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 135.00142857142856, |
|
"grad_norm": 0.00012733951734844595, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 135.00238095238095, |
|
"grad_norm": 0.00011300836922600865, |
|
"learning_rate": 9.417989417989419e-07, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 135.00333333333333, |
|
"grad_norm": 0.0004088705172762275, |
|
"learning_rate": 9.312169312169313e-07, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 135.00428571428571, |
|
"grad_norm": 0.0001340518647339195, |
|
"learning_rate": 9.206349206349208e-07, |
|
"loss": 0.0, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 135.0052380952381, |
|
"grad_norm": 0.00015032911323942244, |
|
"learning_rate": 9.100529100529102e-07, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 135.00619047619048, |
|
"grad_norm": 0.00011336953321006149, |
|
"learning_rate": 8.994708994708995e-07, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 135.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.6924996376037598, |
|
"eval_runtime": 13.8914, |
|
"eval_samples_per_second": 5.327, |
|
"eval_steps_per_second": 1.368, |
|
"step": 9656 |
|
}, |
|
{ |
|
"epoch": 136.00038095238097, |
|
"grad_norm": 0.0001399925968144089, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 136.00133333333332, |
|
"grad_norm": 0.00018644209194462746, |
|
"learning_rate": 8.783068783068783e-07, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 136.0022857142857, |
|
"grad_norm": 0.00031613794271834195, |
|
"learning_rate": 8.677248677248679e-07, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 136.0032380952381, |
|
"grad_norm": 0.0002503306313883513, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.0, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 136.00419047619047, |
|
"grad_norm": 0.00015492939564865083, |
|
"learning_rate": 8.465608465608466e-07, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 136.00514285714286, |
|
"grad_norm": 0.00011654701665975153, |
|
"learning_rate": 8.35978835978836e-07, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 136.00609523809524, |
|
"grad_norm": 0.0013862367486581206, |
|
"learning_rate": 8.253968253968254e-07, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 136.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.7700693607330322, |
|
"eval_runtime": 15.7687, |
|
"eval_samples_per_second": 4.693, |
|
"eval_steps_per_second": 1.205, |
|
"step": 9727 |
|
}, |
|
{ |
|
"epoch": 137.00028571428572, |
|
"grad_norm": 0.00044550379971042275, |
|
"learning_rate": 8.14814814814815e-07, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 137.0012380952381, |
|
"grad_norm": 0.00019741806318052113, |
|
"learning_rate": 8.042328042328043e-07, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 137.00219047619046, |
|
"grad_norm": 9.011803922476247e-05, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 137.00314285714285, |
|
"grad_norm": 0.00011569274647627026, |
|
"learning_rate": 7.830687830687832e-07, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 137.00409523809523, |
|
"grad_norm": 0.00016636776854284108, |
|
"learning_rate": 7.724867724867726e-07, |
|
"loss": 0.0, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 137.00504761904762, |
|
"grad_norm": 0.0002474163193255663, |
|
"learning_rate": 7.61904761904762e-07, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 137.006, |
|
"grad_norm": 0.00030764579423703253, |
|
"learning_rate": 7.513227513227514e-07, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 137.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.777440309524536, |
|
"eval_runtime": 75.651, |
|
"eval_samples_per_second": 0.978, |
|
"eval_steps_per_second": 0.251, |
|
"step": 9798 |
|
}, |
|
{ |
|
"epoch": 138.00019047619048, |
|
"grad_norm": 0.0002310011041117832, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 138.00114285714287, |
|
"grad_norm": 0.00014927572919987142, |
|
"learning_rate": 7.301587301587303e-07, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 138.00209523809525, |
|
"grad_norm": 0.0002394427574472502, |
|
"learning_rate": 7.195767195767196e-07, |
|
"loss": 0.0, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 138.0030476190476, |
|
"grad_norm": 0.0006395941600203514, |
|
"learning_rate": 7.08994708994709e-07, |
|
"loss": 0.0, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 138.004, |
|
"grad_norm": 0.00022553169401362538, |
|
"learning_rate": 6.984126984126984e-07, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 138.00495238095237, |
|
"grad_norm": 0.0001349856611341238, |
|
"learning_rate": 6.878306878306879e-07, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 138.00590476190476, |
|
"grad_norm": 0.00011537998216226697, |
|
"learning_rate": 6.772486772486774e-07, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 138.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.7755815982818604, |
|
"eval_runtime": 18.245, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 1.041, |
|
"step": 9869 |
|
}, |
|
{ |
|
"epoch": 139.00009523809524, |
|
"grad_norm": 0.00015412215725518763, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 139.00104761904763, |
|
"grad_norm": 0.000286016525933519, |
|
"learning_rate": 6.560846560846561e-07, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 139.002, |
|
"grad_norm": 0.0003684030089061707, |
|
"learning_rate": 6.455026455026456e-07, |
|
"loss": 0.0, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 139.0029523809524, |
|
"grad_norm": 9.316956857219338e-05, |
|
"learning_rate": 6.34920634920635e-07, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 139.00390476190475, |
|
"grad_norm": 0.00022484293731395155, |
|
"learning_rate": 6.243386243386243e-07, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 139.00485714285713, |
|
"grad_norm": 0.00016070107812993228, |
|
"learning_rate": 6.137566137566138e-07, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 139.00580952380952, |
|
"grad_norm": 0.00013297729310579598, |
|
"learning_rate": 6.031746031746032e-07, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 139.0067619047619, |
|
"grad_norm": 0.0006777092348784208, |
|
"learning_rate": 5.925925925925927e-07, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 139.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.7789177894592285, |
|
"eval_runtime": 16.5578, |
|
"eval_samples_per_second": 4.469, |
|
"eval_steps_per_second": 1.147, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 140.00095238095238, |
|
"grad_norm": 0.00020421307999640703, |
|
"learning_rate": 5.82010582010582e-07, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 140.00190476190477, |
|
"grad_norm": 0.00014328365796245635, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 140.00285714285715, |
|
"grad_norm": 0.00010416995792184025, |
|
"learning_rate": 5.608465608465608e-07, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 140.00380952380954, |
|
"grad_norm": 0.00011329493281664327, |
|
"learning_rate": 5.502645502645503e-07, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 140.0047619047619, |
|
"grad_norm": 0.00016567722195759416, |
|
"learning_rate": 5.396825396825398e-07, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 140.00571428571428, |
|
"grad_norm": 0.00014367059338837862, |
|
"learning_rate": 5.291005291005291e-07, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 140.00666666666666, |
|
"grad_norm": 0.00011329939297866076, |
|
"learning_rate": 5.185185185185186e-07, |
|
"loss": 0.0, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 140.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.7818410396575928, |
|
"eval_runtime": 17.1649, |
|
"eval_samples_per_second": 4.311, |
|
"eval_steps_per_second": 1.107, |
|
"step": 10011 |
|
}, |
|
{ |
|
"epoch": 141.00085714285714, |
|
"grad_norm": 0.00012502900790423155, |
|
"learning_rate": 5.07936507936508e-07, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 141.00180952380953, |
|
"grad_norm": 0.0004600577231030911, |
|
"learning_rate": 4.973544973544974e-07, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 141.0027619047619, |
|
"grad_norm": 0.00015750101010780782, |
|
"learning_rate": 4.867724867724868e-07, |
|
"loss": 0.0, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 141.0037142857143, |
|
"grad_norm": 0.00011517933307914063, |
|
"learning_rate": 4.7619047619047623e-07, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 141.00466666666668, |
|
"grad_norm": 0.00012730502930935472, |
|
"learning_rate": 4.6560846560846563e-07, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 141.00561904761904, |
|
"grad_norm": 0.00028043414931744337, |
|
"learning_rate": 4.550264550264551e-07, |
|
"loss": 0.0, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 141.00657142857142, |
|
"grad_norm": 0.00022784181055612862, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 141.0067619047619, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 2.716393232345581, |
|
"eval_runtime": 19.2663, |
|
"eval_samples_per_second": 3.841, |
|
"eval_steps_per_second": 0.986, |
|
"step": 10082 |
|
}, |
|
{ |
|
"epoch": 142.0007619047619, |
|
"grad_norm": 0.00018083921167999506, |
|
"learning_rate": 4.3386243386243395e-07, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 142.00171428571429, |
|
"grad_norm": 0.00011656145215965807, |
|
"learning_rate": 4.232804232804233e-07, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 142.00266666666667, |
|
"grad_norm": 0.0002118592383340001, |
|
"learning_rate": 4.126984126984127e-07, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 142.00361904761905, |
|
"grad_norm": 0.00029809624538756907, |
|
"learning_rate": 4.0211640211640215e-07, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 142.00457142857144, |
|
"grad_norm": 0.0001748933136695996, |
|
"learning_rate": 3.915343915343916e-07, |
|
"loss": 0.0, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 142.00552380952382, |
|
"grad_norm": 0.0005882106488570571, |
|
"learning_rate": 3.80952380952381e-07, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 142.00647619047618, |
|
"grad_norm": 0.00013971776934340596, |
|
"learning_rate": 3.7037037037037036e-07, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 142.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.9570682048797607, |
|
"eval_runtime": 18.9008, |
|
"eval_samples_per_second": 3.915, |
|
"eval_steps_per_second": 1.005, |
|
"step": 10153 |
|
}, |
|
{ |
|
"epoch": 143.00066666666666, |
|
"grad_norm": 0.0005308242398314178, |
|
"learning_rate": 3.597883597883598e-07, |
|
"loss": 0.0, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 143.00161904761904, |
|
"grad_norm": 0.00011192076635779813, |
|
"learning_rate": 3.492063492063492e-07, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 143.00257142857143, |
|
"grad_norm": 9.586880332790315e-05, |
|
"learning_rate": 3.386243386243387e-07, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 143.0035238095238, |
|
"grad_norm": 0.0002831014571711421, |
|
"learning_rate": 3.2804232804232803e-07, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 143.0044761904762, |
|
"grad_norm": 0.0001229299232363701, |
|
"learning_rate": 3.174603174603175e-07, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 143.00542857142858, |
|
"grad_norm": 9.351440530736e-05, |
|
"learning_rate": 3.068783068783069e-07, |
|
"loss": 0.0, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 143.00638095238097, |
|
"grad_norm": 0.00017162870790343732, |
|
"learning_rate": 2.9629629629629634e-07, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 143.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.9561750888824463, |
|
"eval_runtime": 16.3241, |
|
"eval_samples_per_second": 4.533, |
|
"eval_steps_per_second": 1.164, |
|
"step": 10224 |
|
}, |
|
{ |
|
"epoch": 144.00057142857142, |
|
"grad_norm": 0.00039424237911589444, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 144.0015238095238, |
|
"grad_norm": 0.0001173912751255557, |
|
"learning_rate": 2.7513227513227515e-07, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 144.0024761904762, |
|
"grad_norm": 0.0003806989989243448, |
|
"learning_rate": 2.6455026455026455e-07, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 144.00342857142857, |
|
"grad_norm": 0.00013932188448961824, |
|
"learning_rate": 2.53968253968254e-07, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 144.00438095238096, |
|
"grad_norm": 0.00012908896314911544, |
|
"learning_rate": 2.433862433862434e-07, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 144.00533333333334, |
|
"grad_norm": 0.00014965585432946682, |
|
"learning_rate": 2.3280423280423281e-07, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 144.00628571428572, |
|
"grad_norm": 0.03986010327935219, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 144.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.95378041267395, |
|
"eval_runtime": 16.0055, |
|
"eval_samples_per_second": 4.623, |
|
"eval_steps_per_second": 1.187, |
|
"step": 10295 |
|
}, |
|
{ |
|
"epoch": 145.00047619047618, |
|
"grad_norm": 0.00011041080870199949, |
|
"learning_rate": 2.1164021164021165e-07, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 145.00142857142856, |
|
"grad_norm": 0.0001778493751771748, |
|
"learning_rate": 2.0105820105820108e-07, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 145.00238095238095, |
|
"grad_norm": 0.00011482149420771748, |
|
"learning_rate": 1.904761904761905e-07, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 145.00333333333333, |
|
"grad_norm": 9.02183455764316e-05, |
|
"learning_rate": 1.798941798941799e-07, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 145.00428571428571, |
|
"grad_norm": 0.0003151354903820902, |
|
"learning_rate": 1.6931216931216934e-07, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 145.0052380952381, |
|
"grad_norm": 0.00024107014178298414, |
|
"learning_rate": 1.5873015873015874e-07, |
|
"loss": 0.0, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 145.00619047619048, |
|
"grad_norm": 8.7294916738756e-05, |
|
"learning_rate": 1.4814814814814817e-07, |
|
"loss": 0.0, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 145.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.9513986110687256, |
|
"eval_runtime": 16.1391, |
|
"eval_samples_per_second": 4.585, |
|
"eval_steps_per_second": 1.177, |
|
"step": 10366 |
|
}, |
|
{ |
|
"epoch": 146.00038095238097, |
|
"grad_norm": 0.0003729898016899824, |
|
"learning_rate": 1.3756613756613757e-07, |
|
"loss": 0.0, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 146.00133333333332, |
|
"grad_norm": 0.0001791256363503635, |
|
"learning_rate": 1.26984126984127e-07, |
|
"loss": 0.0, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 146.0022857142857, |
|
"grad_norm": 0.0001602688425919041, |
|
"learning_rate": 1.1640211640211641e-07, |
|
"loss": 0.0, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 146.0032380952381, |
|
"grad_norm": 9.487938223173842e-05, |
|
"learning_rate": 1.0582010582010582e-07, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 146.00419047619047, |
|
"grad_norm": 0.00012544992205221206, |
|
"learning_rate": 9.523809523809525e-08, |
|
"loss": 0.0, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 146.00514285714286, |
|
"grad_norm": 0.00013564463006332517, |
|
"learning_rate": 8.465608465608467e-08, |
|
"loss": 0.0, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 146.00609523809524, |
|
"grad_norm": 0.00021179339091759175, |
|
"learning_rate": 7.407407407407409e-08, |
|
"loss": 0.0, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 146.0067619047619, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.9517064094543457, |
|
"eval_runtime": 14.0509, |
|
"eval_samples_per_second": 5.267, |
|
"eval_steps_per_second": 1.352, |
|
"step": 10437 |
|
}, |
|
{ |
|
"epoch": 147.00028571428572, |
|
"grad_norm": 0.00010382410255260766, |
|
"learning_rate": 6.34920634920635e-08, |
|
"loss": 0.0, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 147.0012380952381, |
|
"grad_norm": 0.0006173243164084852, |
|
"learning_rate": 5.291005291005291e-08, |
|
"loss": 0.0, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 147.00219047619046, |
|
"grad_norm": 0.0001852709538070485, |
|
"learning_rate": 4.2328042328042335e-08, |
|
"loss": 0.0, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 147.00314285714285, |
|
"grad_norm": 0.00010010774713009596, |
|
"learning_rate": 3.174603174603175e-08, |
|
"loss": 0.0, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 147.00409523809523, |
|
"grad_norm": 0.00010709642083384097, |
|
"learning_rate": 2.1164021164021167e-08, |
|
"loss": 0.0, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 147.00504761904762, |
|
"grad_norm": 0.00024684463278390467, |
|
"learning_rate": 1.0582010582010584e-08, |
|
"loss": 0.0, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"grad_norm": 0.00019418797455728054, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 2.9517529010772705, |
|
"eval_runtime": 15.1931, |
|
"eval_samples_per_second": 4.871, |
|
"eval_steps_per_second": 1.251, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"step": 10500, |
|
"total_flos": 1.8248666263741838e+20, |
|
"train_loss": 0.08116123437592995, |
|
"train_runtime": 21876.7284, |
|
"train_samples_per_second": 1.92, |
|
"train_steps_per_second": 0.48 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"eval_accuracy": 0.7837837837837838, |
|
"eval_loss": 0.49420419335365295, |
|
"eval_runtime": 15.3425, |
|
"eval_samples_per_second": 4.823, |
|
"eval_steps_per_second": 1.238, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"eval_accuracy": 0.7837837837837838, |
|
"eval_loss": 0.49420419335365295, |
|
"eval_runtime": 13.6975, |
|
"eval_samples_per_second": 5.402, |
|
"eval_steps_per_second": 1.387, |
|
"step": 10500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8248666263741838e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|