|
{ |
|
"best_metric": 3.9567816257476807, |
|
"best_model_checkpoint": "chinese-roberta-wwm-ext-finetuned-MC-hyper/checkpoint-1375", |
|
"epoch": 5.0, |
|
"global_step": 6875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.970909090909091e-05, |
|
"loss": 1.3244, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.941818181818182e-05, |
|
"loss": 0.9671, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.912727272727273e-05, |
|
"loss": 0.8194, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.883636363636364e-05, |
|
"loss": 0.7545, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.854545454545455e-05, |
|
"loss": 0.7631, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.825454545454546e-05, |
|
"loss": 0.6651, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.796363636363637e-05, |
|
"loss": 0.5622, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.767272727272728e-05, |
|
"loss": 0.6096, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.738181818181819e-05, |
|
"loss": 0.4859, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.70909090909091e-05, |
|
"loss": 0.4879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.680000000000001e-05, |
|
"loss": 0.4388, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.650909090909092e-05, |
|
"loss": 0.478, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.621818181818181e-05, |
|
"loss": 0.3661, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.592727272727274e-05, |
|
"loss": 0.4131, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.563636363636365e-05, |
|
"loss": 0.3415, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.534545454545456e-05, |
|
"loss": 0.3695, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.505454545454546e-05, |
|
"loss": 0.3059, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.476363636363636e-05, |
|
"loss": 0.268, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.447272727272728e-05, |
|
"loss": 0.3019, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.418181818181818e-05, |
|
"loss": 0.2147, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.38909090909091e-05, |
|
"loss": 0.3197, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.360000000000001e-05, |
|
"loss": 0.2764, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.330909090909091e-05, |
|
"loss": 0.2022, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.301818181818183e-05, |
|
"loss": 0.2326, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.272727272727273e-05, |
|
"loss": 0.3168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.243636363636365e-05, |
|
"loss": 0.2157, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.214545454545455e-05, |
|
"loss": 0.2783, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.185454545454545e-05, |
|
"loss": 0.1759, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.156363636363636e-05, |
|
"loss": 0.169, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.127272727272727e-05, |
|
"loss": 0.1731, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.09818181818182e-05, |
|
"loss": 0.2469, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.069090909090909e-05, |
|
"loss": 0.1617, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.04e-05, |
|
"loss": 0.1841, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.010909090909091e-05, |
|
"loss": 0.1549, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.981818181818182e-05, |
|
"loss": 0.1393, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.952727272727273e-05, |
|
"loss": 0.1853, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.923636363636364e-05, |
|
"loss": 0.1609, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.894545454545455e-05, |
|
"loss": 0.1493, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.865454545454546e-05, |
|
"loss": 0.1436, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.836363636363637e-05, |
|
"loss": 0.1626, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.807272727272728e-05, |
|
"loss": 0.2139, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.778181818181819e-05, |
|
"loss": 0.1661, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 8.74909090909091e-05, |
|
"loss": 0.1579, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.72e-05, |
|
"loss": 0.1424, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.690909090909091e-05, |
|
"loss": 0.073, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 8.661818181818182e-05, |
|
"loss": 0.1764, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 8.632727272727273e-05, |
|
"loss": 0.1296, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.603636363636364e-05, |
|
"loss": 0.144, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.574545454545455e-05, |
|
"loss": 0.0955, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.545454545454545e-05, |
|
"loss": 0.1262, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.516363636363637e-05, |
|
"loss": 0.1599, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.487272727272728e-05, |
|
"loss": 0.0835, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.458181818181819e-05, |
|
"loss": 0.1509, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.42909090909091e-05, |
|
"loss": 0.0916, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.4e-05, |
|
"loss": 0.0863, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.370909090909092e-05, |
|
"loss": 0.1302, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.341818181818181e-05, |
|
"loss": 0.1324, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.312727272727274e-05, |
|
"loss": 0.0838, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.283636363636365e-05, |
|
"loss": 0.1127, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.254545454545454e-05, |
|
"loss": 0.1193, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.225454545454547e-05, |
|
"loss": 0.1404, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.196363636363636e-05, |
|
"loss": 0.125, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.167272727272728e-05, |
|
"loss": 0.0519, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.138181818181818e-05, |
|
"loss": 0.1256, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.109090909090909e-05, |
|
"loss": 0.1016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 0.0944, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.050909090909091e-05, |
|
"loss": 0.0666, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.021818181818183e-05, |
|
"loss": 0.0903, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.1940000057220459, |
|
"eval_loss": 3.9567816257476807, |
|
"eval_runtime": 3.3624, |
|
"eval_samples_per_second": 148.705, |
|
"eval_steps_per_second": 4.759, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.992727272727273e-05, |
|
"loss": 0.1101, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 7.963636363636364e-05, |
|
"loss": 0.0603, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 7.934545454545455e-05, |
|
"loss": 0.0622, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.905454545454546e-05, |
|
"loss": 0.0463, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.876363636363638e-05, |
|
"loss": 0.0404, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 7.847272727272727e-05, |
|
"loss": 0.0476, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 7.818181818181818e-05, |
|
"loss": 0.0763, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 7.789090909090909e-05, |
|
"loss": 0.0363, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 7.76e-05, |
|
"loss": 0.0697, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 7.730909090909091e-05, |
|
"loss": 0.0838, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.701818181818182e-05, |
|
"loss": 0.0587, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.672727272727273e-05, |
|
"loss": 0.0246, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.643636363636364e-05, |
|
"loss": 0.0461, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.614545454545455e-05, |
|
"loss": 0.0695, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.585454545454546e-05, |
|
"loss": 0.0736, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.556363636363637e-05, |
|
"loss": 0.0261, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.527272727272728e-05, |
|
"loss": 0.0438, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.498181818181819e-05, |
|
"loss": 0.0858, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.469090909090908e-05, |
|
"loss": 0.0248, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.44e-05, |
|
"loss": 0.0615, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.410909090909092e-05, |
|
"loss": 0.0123, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.381818181818182e-05, |
|
"loss": 0.0451, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.352727272727273e-05, |
|
"loss": 0.05, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.323636363636363e-05, |
|
"loss": 0.0415, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.294545454545455e-05, |
|
"loss": 0.0734, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.265454545454545e-05, |
|
"loss": 0.037, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.236363636363637e-05, |
|
"loss": 0.045, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.207272727272728e-05, |
|
"loss": 0.0914, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.178181818181818e-05, |
|
"loss": 0.0288, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.14909090909091e-05, |
|
"loss": 0.0289, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.12e-05, |
|
"loss": 0.0355, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.090909090909092e-05, |
|
"loss": 0.0376, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.061818181818181e-05, |
|
"loss": 0.0375, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.032727272727272e-05, |
|
"loss": 0.0695, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.003636363636365e-05, |
|
"loss": 0.0414, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.974545454545454e-05, |
|
"loss": 0.0308, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.945454545454547e-05, |
|
"loss": 0.0209, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.916363636363636e-05, |
|
"loss": 0.0731, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.887272727272727e-05, |
|
"loss": 0.0409, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.858181818181818e-05, |
|
"loss": 0.0544, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.829090909090909e-05, |
|
"loss": 0.0677, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 0.0345, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.770909090909091e-05, |
|
"loss": 0.0262, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.741818181818182e-05, |
|
"loss": 0.066, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.712727272727273e-05, |
|
"loss": 0.0578, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.683636363636364e-05, |
|
"loss": 0.066, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.654545454545455e-05, |
|
"loss": 0.0565, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.625454545454546e-05, |
|
"loss": 0.0428, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.596363636363637e-05, |
|
"loss": 0.0387, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.567272727272727e-05, |
|
"loss": 0.0294, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.538181818181818e-05, |
|
"loss": 0.0332, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.50909090909091e-05, |
|
"loss": 0.0401, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.48e-05, |
|
"loss": 0.0324, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.450909090909091e-05, |
|
"loss": 0.0162, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.421818181818182e-05, |
|
"loss": 0.0501, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.392727272727273e-05, |
|
"loss": 0.0335, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 6.363636363636364e-05, |
|
"loss": 0.025, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 6.334545454545455e-05, |
|
"loss": 0.0402, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.305454545454546e-05, |
|
"loss": 0.0624, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.276363636363637e-05, |
|
"loss": 0.0337, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.247272727272728e-05, |
|
"loss": 0.0264, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.218181818181819e-05, |
|
"loss": 0.057, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.18909090909091e-05, |
|
"loss": 0.0381, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.16e-05, |
|
"loss": 0.0316, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.130909090909092e-05, |
|
"loss": 0.0492, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.101818181818182e-05, |
|
"loss": 0.0405, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.0727272727272735e-05, |
|
"loss": 0.0594, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.043636363636364e-05, |
|
"loss": 0.0263, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.014545454545455e-05, |
|
"loss": 0.0432, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.1720000058412552, |
|
"eval_loss": 4.625512599945068, |
|
"eval_runtime": 3.3701, |
|
"eval_samples_per_second": 148.362, |
|
"eval_steps_per_second": 4.748, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.985454545454545e-05, |
|
"loss": 0.0235, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.9563636363636366e-05, |
|
"loss": 0.0293, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.927272727272728e-05, |
|
"loss": 0.0326, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.8981818181818184e-05, |
|
"loss": 0.0124, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.8690909090909094e-05, |
|
"loss": 0.0157, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.8399999999999997e-05, |
|
"loss": 0.0138, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 5.810909090909091e-05, |
|
"loss": 0.0291, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.7818181818181815e-05, |
|
"loss": 0.0047, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.752727272727273e-05, |
|
"loss": 0.0188, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.723636363636364e-05, |
|
"loss": 0.0277, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.6945454545454544e-05, |
|
"loss": 0.0118, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.665454545454546e-05, |
|
"loss": 0.0277, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.636363636363636e-05, |
|
"loss": 0.006, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.607272727272728e-05, |
|
"loss": 0.0214, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.578181818181818e-05, |
|
"loss": 0.0086, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.549090909090909e-05, |
|
"loss": 0.0091, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.520000000000001e-05, |
|
"loss": 0.0055, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.490909090909091e-05, |
|
"loss": 0.0326, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 5.4618181818181826e-05, |
|
"loss": 0.0145, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 5.432727272727273e-05, |
|
"loss": 0.0087, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 5.403636363636364e-05, |
|
"loss": 0.0084, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 5.374545454545454e-05, |
|
"loss": 0.0116, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.3454545454545457e-05, |
|
"loss": 0.0254, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.316363636363637e-05, |
|
"loss": 0.0033, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.2872727272727275e-05, |
|
"loss": 0.0246, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.2581818181818185e-05, |
|
"loss": 0.0222, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.229090909090909e-05, |
|
"loss": 0.0093, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 0.0112, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.1709090909090906e-05, |
|
"loss": 0.0335, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.141818181818182e-05, |
|
"loss": 0.0382, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.112727272727273e-05, |
|
"loss": 0.0264, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.0836363636363634e-05, |
|
"loss": 0.0131, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.054545454545455e-05, |
|
"loss": 0.0198, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.025454545454545e-05, |
|
"loss": 0.0027, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.996363636363637e-05, |
|
"loss": 0.0116, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.967272727272728e-05, |
|
"loss": 0.0066, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.938181818181818e-05, |
|
"loss": 0.04, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.909090909090909e-05, |
|
"loss": 0.0097, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.88e-05, |
|
"loss": 0.0119, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.850909090909091e-05, |
|
"loss": 0.0111, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.821818181818182e-05, |
|
"loss": 0.0311, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.792727272727273e-05, |
|
"loss": 0.0234, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.763636363636364e-05, |
|
"loss": 0.0124, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.734545454545455e-05, |
|
"loss": 0.0068, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.705454545454546e-05, |
|
"loss": 0.0349, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.6763636363636366e-05, |
|
"loss": 0.0112, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 4.6472727272727276e-05, |
|
"loss": 0.0122, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.618181818181818e-05, |
|
"loss": 0.0071, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.5890909090909094e-05, |
|
"loss": 0.0504, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.5600000000000004e-05, |
|
"loss": 0.0307, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.530909090909091e-05, |
|
"loss": 0.0048, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.501818181818182e-05, |
|
"loss": 0.0136, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.472727272727273e-05, |
|
"loss": 0.038, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.4436363636363635e-05, |
|
"loss": 0.0089, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.4145454545454544e-05, |
|
"loss": 0.0069, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.385454545454546e-05, |
|
"loss": 0.0161, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.356363636363637e-05, |
|
"loss": 0.013, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.327272727272728e-05, |
|
"loss": 0.0049, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.298181818181818e-05, |
|
"loss": 0.0304, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.269090909090909e-05, |
|
"loss": 0.03, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.24e-05, |
|
"loss": 0.0313, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 4.210909090909091e-05, |
|
"loss": 0.0079, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.181818181818182e-05, |
|
"loss": 0.0189, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.152727272727273e-05, |
|
"loss": 0.0019, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.123636363636364e-05, |
|
"loss": 0.0325, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.094545454545455e-05, |
|
"loss": 0.0276, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.065454545454546e-05, |
|
"loss": 0.0137, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.0363636363636367e-05, |
|
"loss": 0.0169, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.0072727272727276e-05, |
|
"loss": 0.0087, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.17599999904632568, |
|
"eval_loss": 6.114068508148193, |
|
"eval_runtime": 3.4252, |
|
"eval_samples_per_second": 145.975, |
|
"eval_steps_per_second": 4.671, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.978181818181818e-05, |
|
"loss": 0.0005, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.9490909090909095e-05, |
|
"loss": 0.0109, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.0128, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.8909090909090914e-05, |
|
"loss": 0.0016, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.861818181818182e-05, |
|
"loss": 0.0009, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.8327272727272726e-05, |
|
"loss": 0.0036, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.8036363636363635e-05, |
|
"loss": 0.0124, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.7745454545454544e-05, |
|
"loss": 0.0128, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.745454545454546e-05, |
|
"loss": 0.0029, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.716363636363637e-05, |
|
"loss": 0.0088, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.687272727272727e-05, |
|
"loss": 0.0129, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.658181818181818e-05, |
|
"loss": 0.0012, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.629090909090909e-05, |
|
"loss": 0.0001, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0116, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.570909090909091e-05, |
|
"loss": 0.0005, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.541818181818182e-05, |
|
"loss": 0.0013, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.512727272727273e-05, |
|
"loss": 0.0349, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.483636363636364e-05, |
|
"loss": 0.0013, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.454545454545455e-05, |
|
"loss": 0.0025, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.425454545454546e-05, |
|
"loss": 0.0106, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.396363636363637e-05, |
|
"loss": 0.0107, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.367272727272727e-05, |
|
"loss": 0.0055, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.338181818181818e-05, |
|
"loss": 0.0021, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.3090909090909095e-05, |
|
"loss": 0.0187, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.2800000000000004e-05, |
|
"loss": 0.0005, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.2509090909090914e-05, |
|
"loss": 0.0126, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.2218181818181816e-05, |
|
"loss": 0.0001, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.1927272727272726e-05, |
|
"loss": 0.0022, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.1636363636363635e-05, |
|
"loss": 0.0018, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.1345454545454545e-05, |
|
"loss": 0.001, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.105454545454546e-05, |
|
"loss": 0.0048, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.0763636363636364e-05, |
|
"loss": 0.0025, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.0472727272727276e-05, |
|
"loss": 0.0032, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.0181818181818182e-05, |
|
"loss": 0.0003, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.9890909090909092e-05, |
|
"loss": 0.0125, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.96e-05, |
|
"loss": 0.0009, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.9309090909090907e-05, |
|
"loss": 0.0007, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.9018181818181823e-05, |
|
"loss": 0.0206, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.872727272727273e-05, |
|
"loss": 0.007, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.843636363636364e-05, |
|
"loss": 0.0048, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.8145454545454548e-05, |
|
"loss": 0.0023, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.7854545454545454e-05, |
|
"loss": 0.0015, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.7563636363636364e-05, |
|
"loss": 0.0081, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.0094, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.6981818181818186e-05, |
|
"loss": 0.0001, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.6690909090909095e-05, |
|
"loss": 0.01, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.64e-05, |
|
"loss": 0.0074, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.610909090909091e-05, |
|
"loss": 0.0017, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.581818181818182e-05, |
|
"loss": 0.0112, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.5527272727272726e-05, |
|
"loss": 0.0022, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.5236363636363636e-05, |
|
"loss": 0.0004, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.494545454545455e-05, |
|
"loss": 0.0, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.4654545454545454e-05, |
|
"loss": 0.0134, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.4363636363636364e-05, |
|
"loss": 0.0014, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.4072727272727273e-05, |
|
"loss": 0.0001, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.3781818181818183e-05, |
|
"loss": 0.0075, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.3490909090909092e-05, |
|
"loss": 0.0021, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.32e-05, |
|
"loss": 0.0159, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.290909090909091e-05, |
|
"loss": 0.0001, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.261818181818182e-05, |
|
"loss": 0.0011, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.2327272727272726e-05, |
|
"loss": 0.0052, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.203636363636364e-05, |
|
"loss": 0.0162, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.1745454545454545e-05, |
|
"loss": 0.0066, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.1454545454545455e-05, |
|
"loss": 0.0022, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.1163636363636367e-05, |
|
"loss": 0.0049, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.0872727272727273e-05, |
|
"loss": 0.0147, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.0581818181818183e-05, |
|
"loss": 0.0015, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.0290909090909092e-05, |
|
"loss": 0.0129, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.001, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.17399999499320984, |
|
"eval_loss": 4.792283058166504, |
|
"eval_runtime": 3.3921, |
|
"eval_samples_per_second": 147.401, |
|
"eval_steps_per_second": 4.717, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.970909090909091e-05, |
|
"loss": 0.0, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.9418181818181817e-05, |
|
"loss": 0.0005, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.9127272727272726e-05, |
|
"loss": 0.0, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.883636363636364e-05, |
|
"loss": 0.0, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.8545454545454545e-05, |
|
"loss": 0.0011, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.8254545454545455e-05, |
|
"loss": 0.0021, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.7963636363636364e-05, |
|
"loss": 0.001, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7672727272727274e-05, |
|
"loss": 0.0001, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.7381818181818183e-05, |
|
"loss": 0.0044, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7090909090909092e-05, |
|
"loss": 0.0018, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.0012, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.650909090909091e-05, |
|
"loss": 0.0001, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.6218181818181817e-05, |
|
"loss": 0.0013, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.5927272727272727e-05, |
|
"loss": 0.0, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.563636363636364e-05, |
|
"loss": 0.0003, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.5345454545454545e-05, |
|
"loss": 0.0001, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.5054545454545455e-05, |
|
"loss": 0.0003, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.4763636363636366e-05, |
|
"loss": 0.019, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4472727272727274e-05, |
|
"loss": 0.001, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.4181818181818181e-05, |
|
"loss": 0.0001, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.389090909090909e-05, |
|
"loss": 0.0001, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.0, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.330909090909091e-05, |
|
"loss": 0.0, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.3018181818181819e-05, |
|
"loss": 0.0064, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.2727272727272727e-05, |
|
"loss": 0.0033, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.2436363636363636e-05, |
|
"loss": 0.0002, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.2145454545454546e-05, |
|
"loss": 0.0001, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.1854545454545455e-05, |
|
"loss": 0.0014, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.1563636363636364e-05, |
|
"loss": 0.002, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.1272727272727274e-05, |
|
"loss": 0.0003, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.0981818181818182e-05, |
|
"loss": 0.0127, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.0690909090909091e-05, |
|
"loss": 0.0007, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.0086, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.010909090909091e-05, |
|
"loss": 0.0, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 9.818181818181818e-06, |
|
"loss": 0.0072, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 9.527272727272727e-06, |
|
"loss": 0.0002, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.236363636363638e-06, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 8.945454545454546e-06, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.654545454545455e-06, |
|
"loss": 0.001, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.363636363636365e-06, |
|
"loss": 0.0001, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.072727272727274e-06, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.781818181818182e-06, |
|
"loss": 0.001, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.490909090909091e-06, |
|
"loss": 0.0017, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 7.2e-06, |
|
"loss": 0.0, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.909090909090909e-06, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.618181818181818e-06, |
|
"loss": 0.0143, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.327272727272728e-06, |
|
"loss": 0.0, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.0363636363636365e-06, |
|
"loss": 0.0, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.745454545454546e-06, |
|
"loss": 0.0029, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.0018, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.163636363636364e-06, |
|
"loss": 0.0, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.872727272727273e-06, |
|
"loss": 0.0001, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.581818181818182e-06, |
|
"loss": 0.0001, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.290909090909091e-06, |
|
"loss": 0.0, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0019, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.7090909090909092e-06, |
|
"loss": 0.0001, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.4181818181818182e-06, |
|
"loss": 0.0003, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.127272727272727e-06, |
|
"loss": 0.0004, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.8363636363636366e-06, |
|
"loss": 0.0037, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.5454545454545456e-06, |
|
"loss": 0.0015, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.2545454545454546e-06, |
|
"loss": 0.0002, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.9636363636363636e-06, |
|
"loss": 0.0144, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.6727272727272728e-06, |
|
"loss": 0.0052, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.3818181818181818e-06, |
|
"loss": 0.0005, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.090909090909091e-06, |
|
"loss": 0.0003, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.0082, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 5.090909090909092e-07, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.181818181818182e-07, |
|
"loss": 0.0008, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.15800000727176666, |
|
"eval_loss": 4.613570213317871, |
|
"eval_runtime": 3.3639, |
|
"eval_samples_per_second": 148.638, |
|
"eval_steps_per_second": 4.756, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 6875, |
|
"total_flos": 2.323364869172544e+16, |
|
"train_loss": 0.06812760998129988, |
|
"train_runtime": 4390.0209, |
|
"train_samples_per_second": 50.114, |
|
"train_steps_per_second": 1.566 |
|
} |
|
], |
|
"max_steps": 6875, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.323364869172544e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|