|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.999956610404825, |
|
"global_step": 17285, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 8.2501, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.936416184971099e-07, |
|
"loss": 7.9965, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.0057803468208094e-06, |
|
"loss": 8.0268, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.317919075144509e-06, |
|
"loss": 7.9878, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.398843930635839e-06, |
|
"loss": 7.6933, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.710982658959537e-06, |
|
"loss": 7.4624, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2023121387283238e-05, |
|
"loss": 7.3907, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4335260115606938e-05, |
|
"loss": 7.0312, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6647398843930635e-05, |
|
"loss": 6.6125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.8959537572254336e-05, |
|
"loss": 6.3013, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.1271676300578036e-05, |
|
"loss": 6.068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3583815028901734e-05, |
|
"loss": 5.8308, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5895953757225434e-05, |
|
"loss": 5.7656, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.8208092485549138e-05, |
|
"loss": 5.5955, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.0520231213872835e-05, |
|
"loss": 5.3842, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.283236994219653e-05, |
|
"loss": 5.2866, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.514450867052023e-05, |
|
"loss": 5.0532, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7456647398843934e-05, |
|
"loss": 4.9624, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.976878612716764e-05, |
|
"loss": 4.8342, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.2080924855491335e-05, |
|
"loss": 4.6055, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.439306358381503e-05, |
|
"loss": 4.5276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.670520231213873e-05, |
|
"loss": 4.3676, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9017341040462426e-05, |
|
"loss": 4.2029, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.1329479768786124e-05, |
|
"loss": 4.0336, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.364161849710983e-05, |
|
"loss": 3.8993, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.595375722543353e-05, |
|
"loss": 3.834, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.8265895953757235e-05, |
|
"loss": 3.7466, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.057803468208093e-05, |
|
"loss": 3.6144, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.289017341040462e-05, |
|
"loss": 3.4977, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.520231213872833e-05, |
|
"loss": 3.4428, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.751445086705203e-05, |
|
"loss": 3.2823, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.982658959537573e-05, |
|
"loss": 3.2296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.213872832369943e-05, |
|
"loss": 3.1029, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.445086705202312e-05, |
|
"loss": 3.1717, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.676300578034682e-05, |
|
"loss": 3.0968, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.907514450867053e-05, |
|
"loss": 2.9926, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.138728323699423e-05, |
|
"loss": 3.0021, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.369942196531792e-05, |
|
"loss": 2.891, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.601156069364162e-05, |
|
"loss": 2.8498, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.832369942196532e-05, |
|
"loss": 2.8172, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.063583815028902e-05, |
|
"loss": 2.8302, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.294797687861271e-05, |
|
"loss": 2.7333, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.526011560693642e-05, |
|
"loss": 2.7135, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.757225433526012e-05, |
|
"loss": 2.6811, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.988439306358382e-05, |
|
"loss": 2.6537, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00010219653179190752, |
|
"loss": 2.6031, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00010450867052023121, |
|
"loss": 2.6037, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00010682080924855491, |
|
"loss": 2.5387, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00010913294797687861, |
|
"loss": 2.5393, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00011144508670520233, |
|
"loss": 2.5387, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00011375722543352603, |
|
"loss": 2.4848, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00011606936416184973, |
|
"loss": 2.4773, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00011838150289017342, |
|
"loss": 2.4453, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00012069364161849712, |
|
"loss": 2.3941, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00012300578034682083, |
|
"loss": 2.431, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00012531791907514453, |
|
"loss": 2.4208, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00012763005780346823, |
|
"loss": 2.4313, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00012971098265895952, |
|
"loss": 2.3427, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013202312138728322, |
|
"loss": 2.3415, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013433526011560694, |
|
"loss": 2.2621, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013664739884393064, |
|
"loss": 2.3606, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00013895953757225434, |
|
"loss": 2.3175, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00014127167630057804, |
|
"loss": 2.2297, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00014358381502890176, |
|
"loss": 2.1856, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00014589595375722546, |
|
"loss": 2.2633, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00014820809248554915, |
|
"loss": 2.2474, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00015052023121387285, |
|
"loss": 2.2024, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00015260115606936415, |
|
"loss": 2.1947, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00015491329479768785, |
|
"loss": 2.234, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00015722543352601157, |
|
"loss": 2.2061, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001593063583815029, |
|
"loss": 2.1553, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001616184971098266, |
|
"loss": 2.2286, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001639306358381503, |
|
"loss": 2.161, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000166242774566474, |
|
"loss": 2.1628, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00016855491329479768, |
|
"loss": 2.1371, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017063583815028904, |
|
"loss": 2.2181, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017271676300578033, |
|
"loss": 2.154, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017502890173410406, |
|
"loss": 2.1695, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017734104046242776, |
|
"loss": 2.1685, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017965317919075145, |
|
"loss": 2.192, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018196531791907515, |
|
"loss": 2.1567, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018427745664739887, |
|
"loss": 2.0987, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018658959537572257, |
|
"loss": 2.1687, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018890173410404627, |
|
"loss": 2.0736, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019121387283236997, |
|
"loss": 2.0683, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019352601156069366, |
|
"loss": 2.0872, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019583815028901736, |
|
"loss": 2.1029, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019815028901734106, |
|
"loss": 2.0301, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019999999267878048, |
|
"loss": 2.0957, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001999997364362091, |
|
"loss": 2.0484, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019999911413373273, |
|
"loss": 2.0489, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019999812577362934, |
|
"loss": 2.0073, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001999967713595169, |
|
"loss": 2.0241, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019999505089635347, |
|
"loss": 2.0097, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001999929643904369, |
|
"loss": 2.0251, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019999051184940516, |
|
"loss": 1.9893, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019998769328223598, |
|
"loss": 1.9893, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019998450869924703, |
|
"loss": 1.9321, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019998095811209587, |
|
"loss": 2.0008, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019997704153377978, |
|
"loss": 2.0254, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001999727589786358, |
|
"loss": 1.873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.0050371885299683, |
|
"eval_runtime": 62.5449, |
|
"eval_samples_per_second": 8.378, |
|
"eval_steps_per_second": 0.528, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019996811046234077, |
|
"loss": 1.9664, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019996309600191098, |
|
"loss": 1.9404, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019995771561570248, |
|
"loss": 1.969, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019995196932341073, |
|
"loss": 1.9545, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019994585714607066, |
|
"loss": 1.9141, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019993937910605658, |
|
"loss": 1.9299, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019993253522708205, |
|
"loss": 1.9305, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001999253255341998, |
|
"loss": 1.902, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019991775005380173, |
|
"loss": 1.9416, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019991061939600934, |
|
"loss": 1.9164, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019990234899683635, |
|
"loss": 1.947, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019989371289425568, |
|
"loss": 1.9242, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019988471111988062, |
|
"loss": 1.9037, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019987534370666328, |
|
"loss": 1.915, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000199865610688894, |
|
"loss": 1.9268, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019985551210220158, |
|
"loss": 1.9268, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019984611084327463, |
|
"loss": 1.9629, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019983531777857815, |
|
"loss": 1.854, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019982415925584902, |
|
"loss": 1.9051, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019981263531593422, |
|
"loss": 1.8801, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019980074600101837, |
|
"loss": 1.8322, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019978849135462366, |
|
"loss": 1.8857, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019977587142160945, |
|
"loss": 1.8805, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019976288624817248, |
|
"loss": 1.8511, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019974953588184632, |
|
"loss": 1.8872, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019973582037150148, |
|
"loss": 1.8636, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019972173976734507, |
|
"loss": 1.8701, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019970729412092063, |
|
"loss": 1.8454, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019969248348510808, |
|
"loss": 1.8941, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019967730791412328, |
|
"loss": 1.8561, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019966176746351818, |
|
"loss": 1.8992, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019964586219018018, |
|
"loss": 1.8372, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001996295921523323, |
|
"loss": 1.8278, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019961295740953278, |
|
"loss": 1.8311, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019959595802267492, |
|
"loss": 1.8281, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001995785940539868, |
|
"loss": 1.8188, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019956086556703113, |
|
"loss": 1.8156, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019954277262670495, |
|
"loss": 1.7751, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019952431529923949, |
|
"loss": 1.832, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019950549365219968, |
|
"loss": 1.8475, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019948630775448433, |
|
"loss": 1.8329, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019946675767632544, |
|
"loss": 1.8352, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019944684348928822, |
|
"loss": 1.8325, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019942860946808643, |
|
"loss": 1.8484, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019940800367611585, |
|
"loss": 1.837, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019938703399034234, |
|
"loss": 1.8295, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019936570048752775, |
|
"loss": 1.8153, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019934400324576564, |
|
"loss": 1.7925, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001993219423444811, |
|
"loss": 1.8383, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001992995178644305, |
|
"loss": 1.8135, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019927672988770105, |
|
"loss": 1.8036, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019925357849771066, |
|
"loss": 1.8035, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019923243159839809, |
|
"loss": 1.8135, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019920858995779232, |
|
"loss": 1.7839, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019918438515335927, |
|
"loss": 1.7759, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019915981727370316, |
|
"loss": 1.7933, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019913488640875744, |
|
"loss": 1.7977, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019910959264978422, |
|
"loss": 1.7797, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019908393608937406, |
|
"loss": 1.7656, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019905791682144557, |
|
"loss": 1.798, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019903153494124518, |
|
"loss": 1.7618, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019900479054534652, |
|
"loss": 1.7879, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019897768373165046, |
|
"loss": 1.7972, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019895297781409127, |
|
"loss": 1.7738, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019892518268104788, |
|
"loss": 1.7901, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019889702542162026, |
|
"loss": 1.7489, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019887137435523912, |
|
"loss": 1.8051, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019884252934074216, |
|
"loss": 1.7428, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001988133225024225, |
|
"loss": 1.7961, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019878375394719502, |
|
"loss": 1.7779, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019875382378329857, |
|
"loss": 1.8037, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001987235321202958, |
|
"loss": 1.7767, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019869287906907265, |
|
"loss": 1.8044, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001986618647418379, |
|
"loss": 1.7517, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001986304892521229, |
|
"loss": 1.8253, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019859875271478102, |
|
"loss": 1.7588, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019856665524598733, |
|
"loss": 1.7948, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019853419696323806, |
|
"loss": 1.8023, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019850467611100676, |
|
"loss": 1.7663, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019847153261017426, |
|
"loss": 1.7566, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019843802864359298, |
|
"loss": 1.7882, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019840416433390782, |
|
"loss": 1.782, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019836993980508268, |
|
"loss": 1.7849, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019833535518240031, |
|
"loss": 1.7793, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001983004105924614, |
|
"loss": 1.7761, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019826865279520944, |
|
"loss": 1.7676, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001982330246209872, |
|
"loss": 1.7274, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019819703685410058, |
|
"loss": 1.7513, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001981606896262867, |
|
"loss": 1.7478, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019812398307059856, |
|
"loss": 1.781, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019808691732140448, |
|
"loss": 1.7504, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019804949251438767, |
|
"loss": 1.7552, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001980117087865457, |
|
"loss": 1.8154, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019797356627619, |
|
"loss": 1.7762, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019793506512294542, |
|
"loss": 1.7263, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019789620546774956, |
|
"loss": 1.7446, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019786485971773587, |
|
"loss": 1.694, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001978253551183793, |
|
"loss": 1.7198, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019778549241867687, |
|
"loss": 1.7423, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001977452717645503, |
|
"loss": 1.7434, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.90117347240448, |
|
"eval_runtime": 61.8399, |
|
"eval_samples_per_second": 8.473, |
|
"eval_steps_per_second": 0.534, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019770469330323174, |
|
"loss": 1.7791, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019766375718326297, |
|
"loss": 1.7459, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019762246355449516, |
|
"loss": 1.7342, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019758081256808816, |
|
"loss": 1.7564, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019753880437650985, |
|
"loss": 1.7394, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019749643913353582, |
|
"loss": 1.7663, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019745371699424864, |
|
"loss": 1.7222, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019741063811503734, |
|
"loss": 1.7046, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001973759182648501, |
|
"loss": 1.6904, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019733219765204383, |
|
"loss": 1.6956, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019728812074414819, |
|
"loss": 1.7511, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019724368770251155, |
|
"loss": 1.7262, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001971988986897858, |
|
"loss": 1.7114, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019715375386992608, |
|
"loss": 1.7182, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019710825340818987, |
|
"loss": 1.7034, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019706239747113656, |
|
"loss": 1.7282, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019701618622662678, |
|
"loss": 1.74, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019696961984382182, |
|
"loss": 1.6854, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019692269849318303, |
|
"loss": 1.756, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019687542234647106, |
|
"loss": 1.7159, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019682779157674537, |
|
"loss": 1.7095, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019677980635836363, |
|
"loss": 1.7071, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019673146686698093, |
|
"loss": 1.7077, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019668277327954917, |
|
"loss": 1.7144, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019663372577431663, |
|
"loss": 1.6873, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001965892805682537, |
|
"loss": 1.7081, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019653956111491275, |
|
"loss": 1.6979, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019648948826801467, |
|
"loss": 1.698, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019644412070578336, |
|
"loss": 1.692, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019639337691717884, |
|
"loss": 1.6938, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019634228027014033, |
|
"loss": 1.7152, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019629083095171264, |
|
"loss": 1.7155, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001962390291502316, |
|
"loss": 1.6839, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019618687505532334, |
|
"loss": 1.6888, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001961343688579036, |
|
"loss": 1.6962, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000196081510750177, |
|
"loss": 1.6784, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019602830092563643, |
|
"loss": 1.672, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019597473957906224, |
|
"loss": 1.6769, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019592082690652148, |
|
"loss": 1.6975, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019586656310536743, |
|
"loss": 1.7687, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019581194837423857, |
|
"loss": 1.685, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019575698291305813, |
|
"loss": 1.6858, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001957016669230331, |
|
"loss": 1.6883, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019565158299718013, |
|
"loss": 1.6471, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001955956015612708, |
|
"loss": 1.6831, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001955392701872709, |
|
"loss": 1.6686, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019548258908138753, |
|
"loss": 1.7006, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019542555845110805, |
|
"loss": 1.7317, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019536817850519927, |
|
"loss": 1.6572, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001953104494537067, |
|
"loss": 1.6916, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001952523715079538, |
|
"loss": 1.6533, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019519394488054127, |
|
"loss": 1.6463, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019513516978534608, |
|
"loss": 1.6984, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019508197443751353, |
|
"loss": 1.6643, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001950284971627635, |
|
"loss": 1.6853, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019496874750645754, |
|
"loss": 1.6864, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019490865020672837, |
|
"loss": 1.6562, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019484820548356873, |
|
"loss": 1.6825, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019478741355824313, |
|
"loss": 1.7296, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019472627465328692, |
|
"loss": 1.7077, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001946647889925058, |
|
"loss": 1.7098, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019460915560757066, |
|
"loss": 1.6647, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000194547011731852, |
|
"loss": 1.6919, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019448452175651983, |
|
"loss": 1.6805, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001944216859103255, |
|
"loss": 1.7276, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019435850442328637, |
|
"loss": 1.6987, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019429497752668516, |
|
"loss": 1.6923, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019423110545306908, |
|
"loss": 1.6908, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019416688843624873, |
|
"loss": 1.6799, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019410232671129745, |
|
"loss": 1.7065, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001940374205145505, |
|
"loss": 1.682, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019397217008360404, |
|
"loss": 1.654, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001939065756573144, |
|
"loss": 1.6809, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019384063747579706, |
|
"loss": 1.6426, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019377435578042592, |
|
"loss": 1.6453, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019370773081383235, |
|
"loss": 1.6419, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019364076281990427, |
|
"loss": 1.7025, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001935734520437853, |
|
"loss": 1.6897, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019350579873187384, |
|
"loss": 1.652, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001934378031318222, |
|
"loss": 1.6461, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019336946549253567, |
|
"loss": 1.6482, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019330078606417164, |
|
"loss": 1.684, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019323176509813855, |
|
"loss": 1.7247, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001931624028470952, |
|
"loss": 1.6417, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019309269956494963, |
|
"loss": 1.688, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019302967524028727, |
|
"loss": 1.6498, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019295932470303454, |
|
"loss": 1.6872, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019289571826614754, |
|
"loss": 1.6668, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019283183638479643, |
|
"loss": 1.652, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019276053369488895, |
|
"loss": 1.6777, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000192688891444965, |
|
"loss": 1.6466, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019261690989727875, |
|
"loss": 1.6432, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019254458931532655, |
|
"loss": 1.6499, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019247192996384572, |
|
"loss": 1.6599, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019239893210881373, |
|
"loss": 1.6458, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019232559601744712, |
|
"loss": 1.69, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019225192195820067, |
|
"loss": 1.6294, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019217791020076627, |
|
"loss": 1.7088, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.000192103561016072, |
|
"loss": 1.6531, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019202887467628115, |
|
"loss": 1.6708, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.862983226776123, |
|
"eval_runtime": 61.7517, |
|
"eval_samples_per_second": 8.486, |
|
"eval_steps_per_second": 0.534, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019195385145479116, |
|
"loss": 1.6732, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001918784916262327, |
|
"loss": 1.6961, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001918027954664686, |
|
"loss": 1.6361, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019172676325259288, |
|
"loss": 1.708, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019165039526292975, |
|
"loss": 1.6377, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001915736917770325, |
|
"loss": 1.667, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019149665307568263, |
|
"loss": 1.6649, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019141927944088863, |
|
"loss": 1.6981, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001913415711558851, |
|
"loss": 1.6095, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019127916377084718, |
|
"loss": 1.6629, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019120085383312737, |
|
"loss": 1.6908, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019112221004476872, |
|
"loss": 1.6602, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019104323269365537, |
|
"loss": 1.6378, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019096392206889248, |
|
"loss": 1.642, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019088427846080527, |
|
"loss": 1.6605, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019080430216093778, |
|
"loss": 1.6055, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019072399346205197, |
|
"loss": 1.6423, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019064335265812652, |
|
"loss": 1.6856, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019056238004435593, |
|
"loss": 1.6115, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001904810759171492, |
|
"loss": 1.6145, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019042396593693816, |
|
"loss": 1.6301, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019034209892058318, |
|
"loss": 1.6615, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019025990119715506, |
|
"loss": 1.6515, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019017737306754754, |
|
"loss": 1.7024, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019009451483386375, |
|
"loss": 1.6598, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001900113267994153, |
|
"loss": 1.6995, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018992780926872102, |
|
"loss": 1.684, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018984396254750593, |
|
"loss": 1.6553, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018975978694270003, |
|
"loss": 1.6515, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018967528276243734, |
|
"loss": 1.6754, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018959045031605453, |
|
"loss": 1.6483, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018950528991409, |
|
"loss": 1.6569, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018941980186828263, |
|
"loss": 1.6626, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001893339864915708, |
|
"loss": 1.6508, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018924784409809093, |
|
"loss": 1.6662, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001891613750031767, |
|
"loss": 1.6426, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018907457952335754, |
|
"loss": 1.6468, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001889874579763578, |
|
"loss": 1.6326, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018890001068109534, |
|
"loss": 1.6034, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018881223795768053, |
|
"loss": 1.6951, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018872414012741494, |
|
"loss": 1.5691, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001886445743803333, |
|
"loss": 1.6343, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018857362860914253, |
|
"loss": 1.6143, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018848465460459042, |
|
"loss": 1.663, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018839535669234195, |
|
"loss": 1.617, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018830573519928195, |
|
"loss": 1.6374, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001882157904534795, |
|
"loss": 1.6472, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018812552278418726, |
|
"loss": 1.6326, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018803493252183976, |
|
"loss": 1.6444, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018794401999805248, |
|
"loss": 1.6167, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018785278554562065, |
|
"loss": 1.6498, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018776122949851792, |
|
"loss": 1.6605, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018766935219189507, |
|
"loss": 1.6455, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018757715396207903, |
|
"loss": 1.671, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018748463514657146, |
|
"loss": 1.6176, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018739179608404747, |
|
"loss": 1.6459, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018729863711435457, |
|
"loss": 1.6481, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018720515857851132, |
|
"loss": 1.6823, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018711136081870605, |
|
"loss": 1.6239, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018701724417829565, |
|
"loss": 1.6209, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001869228090018043, |
|
"loss": 1.6381, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018682805563492225, |
|
"loss": 1.6498, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018673298442450448, |
|
"loss": 1.6377, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018663759571856952, |
|
"loss": 1.6513, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001865514747131347, |
|
"loss": 1.6385, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018646509707450926, |
|
"loss": 1.6137, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018636882124247248, |
|
"loss": 1.6402, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001862819026646694, |
|
"loss": 1.6949, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001861850264262445, |
|
"loss": 1.6283, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018608783469816221, |
|
"loss": 1.633, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018599032783620342, |
|
"loss": 1.6442, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018589250619730253, |
|
"loss": 1.629, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018580419788394125, |
|
"loss": 1.599, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018570577915633075, |
|
"loss": 1.6526, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018560704669339962, |
|
"loss": 1.6345, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018550800085656875, |
|
"loss": 1.6609, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018540864200840615, |
|
"loss": 1.6241, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001853089705126257, |
|
"loss": 1.6081, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018520898673408576, |
|
"loss": 1.6591, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018510869103878781, |
|
"loss": 1.6196, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018500808379387515, |
|
"loss": 1.6015, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018490716536763153, |
|
"loss": 1.6196, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018480593612947978, |
|
"loss": 1.6504, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018470439644998062, |
|
"loss": 1.6474, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018460254670083103, |
|
"loss": 1.6038, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018450038725486306, |
|
"loss": 1.712, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018439791848604253, |
|
"loss": 1.6311, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018429514076946746, |
|
"loss": 1.626, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018419205448136686, |
|
"loss": 1.6255, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018408865999909932, |
|
"loss": 1.6269, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018398495770115153, |
|
"loss": 1.5649, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001838809479671371, |
|
"loss": 1.6243, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001837766311777949, |
|
"loss": 1.6366, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018367200771498787, |
|
"loss": 1.6387, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00018356707796170161, |
|
"loss": 1.6256, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00018346184230204292, |
|
"loss": 1.6158, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001833563011212383, |
|
"loss": 1.6103, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00018325045480563273, |
|
"loss": 1.6038, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00018314430374268817, |
|
"loss": 1.5909, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001830378483209821, |
|
"loss": 1.612, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.8420035243034363, |
|
"eval_runtime": 62.0042, |
|
"eval_samples_per_second": 8.451, |
|
"eval_steps_per_second": 0.532, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001829310889302062, |
|
"loss": 1.6545, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00018284546282243836, |
|
"loss": 1.6391, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00018273815727291054, |
|
"loss": 1.6237, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00018263054885136454, |
|
"loss": 1.6281, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00018252263795171263, |
|
"loss": 1.6102, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00018241442496897444, |
|
"loss": 1.6246, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00018230591029927537, |
|
"loss": 1.5991, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00018219709433984512, |
|
"loss": 1.6252, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00018208797748901637, |
|
"loss": 1.6047, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001819785601462232, |
|
"loss": 1.6173, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00018186884271199967, |
|
"loss": 1.5678, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001817588255879784, |
|
"loss": 1.6143, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.000181648509176889, |
|
"loss": 1.6248, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00018153789388255677, |
|
"loss": 1.6552, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001814269801099009, |
|
"loss": 1.626, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00018131576826493337, |
|
"loss": 1.6096, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00018120425875475723, |
|
"loss": 1.6182, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018109245198756518, |
|
"loss": 1.6014, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018099157208059183, |
|
"loss": 1.5923, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001808792016535363, |
|
"loss": 1.5841, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018076653515937166, |
|
"loss": 1.5837, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018065357301052593, |
|
"loss": 1.6354, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018054031562050928, |
|
"loss": 1.6433, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001804267634039127, |
|
"loss": 1.6109, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001803243146757791, |
|
"loss": 1.6059, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00018021020343474294, |
|
"loss": 1.6127, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001800957985755384, |
|
"loss": 1.6006, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017998110051695688, |
|
"loss": 1.5927, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001798661096788631, |
|
"loss": 1.6142, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017975082648219356, |
|
"loss": 1.6272, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001796352513489549, |
|
"loss": 1.6459, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017951938470222247, |
|
"loss": 1.6373, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001794032269661387, |
|
"loss": 1.6331, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017928677856591163, |
|
"loss": 1.6007, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001791700399278133, |
|
"loss": 1.6359, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017905301147917816, |
|
"loss": 1.5939, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017893569364840154, |
|
"loss": 1.5889, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001788180868649382, |
|
"loss": 1.6206, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017870019155930047, |
|
"loss": 1.5902, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017858200816305697, |
|
"loss": 1.6394, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017846353710883087, |
|
"loss": 1.6193, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017835666757086383, |
|
"loss": 1.6162, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017823765116211767, |
|
"loss": 1.6329, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001781183483559451, |
|
"loss": 1.6248, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017799875958906703, |
|
"loss": 1.6109, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001778788852992512, |
|
"loss": 1.5499, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001777587259253104, |
|
"loss": 1.6107, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017763828190710113, |
|
"loss": 1.5865, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017751755368552178, |
|
"loss": 1.6013, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017739654170251116, |
|
"loss": 1.5829, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017727524640104674, |
|
"loss": 1.6356, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017715366822514318, |
|
"loss": 1.6237, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017703180761985063, |
|
"loss": 1.5802, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017690966503125307, |
|
"loss": 1.5659, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001767872409064667, |
|
"loss": 1.61, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017666453569363836, |
|
"loss": 1.6184, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017654154984194382, |
|
"loss": 1.5797, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017641828380158612, |
|
"loss": 1.6256, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017629473802379403, |
|
"loss": 1.5783, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017617091296082032, |
|
"loss": 1.5988, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001760468090659401, |
|
"loss": 1.5904, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001759224267934491, |
|
"loss": 1.611, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017579776659866218, |
|
"loss": 1.6066, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017569783864540068, |
|
"loss": 1.5947, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017557267934112085, |
|
"loss": 1.576, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017544724339483368, |
|
"loss": 1.6143, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017532153126571107, |
|
"loss": 1.5985, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017519554341393593, |
|
"loss": 1.5992, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017506928030070054, |
|
"loss": 1.5891, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017494274238820468, |
|
"loss": 1.5622, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017482862369640954, |
|
"loss": 1.5646, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017470156494228082, |
|
"loss": 1.6121, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001745742327349537, |
|
"loss": 1.5766, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017444662754054156, |
|
"loss": 1.557, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017431874982615708, |
|
"loss": 1.5716, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00017419060005991054, |
|
"loss": 1.5992, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0001740621787109081, |
|
"loss": 1.6036, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00017393348624925004, |
|
"loss": 1.6121, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00017380452314602916, |
|
"loss": 1.6076, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00017367528987332885, |
|
"loss": 1.5798, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00017354578690422157, |
|
"loss": 1.5597, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00017341601471276708, |
|
"loss": 1.5834, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001732859737740105, |
|
"loss": 1.6169, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00017315566456398086, |
|
"loss": 1.5933, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00017303815729724509, |
|
"loss": 1.6006, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00017292043346556449, |
|
"loss": 1.6013, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001727893756367969, |
|
"loss": 1.6042, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00017265805135460778, |
|
"loss": 1.5738, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00017252646109972383, |
|
"loss": 1.6376, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00017240780286177955, |
|
"loss": 1.5904, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001722757085866635, |
|
"loss": 1.5651, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00017214334973845988, |
|
"loss": 1.5923, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001720107268016827, |
|
"loss": 1.6032, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00017187784026181265, |
|
"loss": 1.5859, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00017174469060529527, |
|
"loss": 1.5376, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00017161127831953946, |
|
"loss": 1.5445, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001714776038929153, |
|
"loss": 1.5652, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00017134366781475262, |
|
"loss": 1.5267, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00017120947057533897, |
|
"loss": 1.5805, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00017108847017299018, |
|
"loss": 1.5876, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.826651930809021, |
|
"eval_runtime": 61.8972, |
|
"eval_samples_per_second": 8.466, |
|
"eval_steps_per_second": 0.533, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00017095377808136445, |
|
"loss": 1.6163, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00017083233311224484, |
|
"loss": 1.629, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001706971479483343, |
|
"loss": 1.5993, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00017056170398982906, |
|
"loss": 1.6056, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00017042600173253645, |
|
"loss": 1.5728, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00017029004167320926, |
|
"loss": 1.6298, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00017015382430954413, |
|
"loss": 1.5792, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00017001735014017955, |
|
"loss": 1.5615, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.000169880619664694, |
|
"loss": 1.6449, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016974363338360425, |
|
"loss": 1.5903, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001696063917983635, |
|
"loss": 1.5682, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016946889541135946, |
|
"loss": 1.5754, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016933114472591262, |
|
"loss": 1.6168, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016919314024627436, |
|
"loss": 1.5977, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016906871963807865, |
|
"loss": 1.6037, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016894409423469082, |
|
"loss": 1.582, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016880538182183466, |
|
"loss": 1.5863, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016866641753939926, |
|
"loss": 1.5792, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016852720189607857, |
|
"loss": 1.5481, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016838773540148655, |
|
"loss": 1.594, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016824801856615547, |
|
"loss": 1.5484, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016810805190153397, |
|
"loss": 1.551, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001679678359199853, |
|
"loss": 1.6115, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001678273711347852, |
|
"loss": 1.5713, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016770074052593968, |
|
"loss": 1.532, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016755980443113736, |
|
"loss": 1.6103, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016741862102632728, |
|
"loss": 1.5881, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016727719082832666, |
|
"loss": 1.5909, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016713551435485608, |
|
"loss": 1.5632, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001669935921245377, |
|
"loss": 1.5794, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016685142465689326, |
|
"loss": 1.5779, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016670901247234224, |
|
"loss": 1.615, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001665663560921999, |
|
"loss": 1.6188, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016642345603867545, |
|
"loss": 1.5351, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016628031283487006, |
|
"loss": 1.6056, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016613692700477494, |
|
"loss": 1.6137, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001659932990732696, |
|
"loss": 1.5894, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016584942956611963, |
|
"loss": 1.6144, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016570531900997497, |
|
"loss": 1.5606, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016556096793236805, |
|
"loss": 1.5671, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016541637686171167, |
|
"loss": 1.5839, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016527154632729713, |
|
"loss": 1.5991, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016512647685929235, |
|
"loss": 1.5912, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001649811689887399, |
|
"loss": 1.578, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00016483562324755502, |
|
"loss": 1.5861, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00016468984016852374, |
|
"loss": 1.5587, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001645438202853008, |
|
"loss": 1.5391, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00016439756413240793, |
|
"loss": 1.5762, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00016425107224523168, |
|
"loss": 1.6125, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001641190284371531, |
|
"loss": 1.5776, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016397209013291726, |
|
"loss": 1.5469, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016382491765189186, |
|
"loss": 1.5636, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016367751153281774, |
|
"loss": 1.5732, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016352987231529103, |
|
"loss": 1.547, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016338200053976108, |
|
"loss": 1.6132, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016323389674752868, |
|
"loss": 1.5797, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00016308556148074378, |
|
"loss": 1.5448, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00016293699528240386, |
|
"loss": 1.5573, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001627881986963515, |
|
"loss": 1.5472, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00016263917226727286, |
|
"loss": 1.5518, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001624899165406954, |
|
"loss": 1.5813, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00016234043206298586, |
|
"loss": 1.5408, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00016219071938134845, |
|
"loss": 1.5346, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001620557833064539, |
|
"loss": 1.5452, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00016192066323037722, |
|
"loss": 1.5825, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00016177031449597098, |
|
"loss": 1.5871, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001616197396446142, |
|
"loss": 1.5518, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001614689392275025, |
|
"loss": 1.5645, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00016131791379665717, |
|
"loss": 1.549, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00016116666390492325, |
|
"loss": 1.5868, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00016101519010596743, |
|
"loss": 1.5491, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00016086349295427595, |
|
"loss": 1.5768, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00016072677501010647, |
|
"loss": 1.5492, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00016057465501875367, |
|
"loss": 1.5695, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00016042231328729185, |
|
"loss": 1.5693, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001602697503733844, |
|
"loss": 1.5273, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00016011696683550456, |
|
"loss": 1.4587, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015996396323293295, |
|
"loss": 1.4827, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015981074012575593, |
|
"loss": 1.4734, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0001596572980748634, |
|
"loss": 1.4913, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00015950363764194662, |
|
"loss": 1.4804, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0001593497593894963, |
|
"loss": 1.4434, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00015919566388080048, |
|
"loss": 1.4501, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00015904135167994264, |
|
"loss": 1.4815, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015888682335179924, |
|
"loss": 1.4588, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015873207946203802, |
|
"loss": 1.4268, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015857712057711592, |
|
"loss": 1.4502, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015843747422863421, |
|
"loss": 1.4486, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015828210841631188, |
|
"loss": 1.4331, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001581265292559965, |
|
"loss": 1.4565, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00015797073731720253, |
|
"loss": 1.453, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00015781473317022333, |
|
"loss": 1.45, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00015765851738612895, |
|
"loss": 1.4356, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00015750209053676432, |
|
"loss": 1.4915, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00015734545319474693, |
|
"loss": 1.4443, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00015718860593346473, |
|
"loss": 1.4245, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001570315493270742, |
|
"loss": 1.4927, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00015687428395049814, |
|
"loss": 1.4698, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00015671681037942355, |
|
"loss": 1.4568, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00015655912919029953, |
|
"loss": 1.4498, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 0.8124380707740784, |
|
"eval_runtime": 62.0134, |
|
"eval_samples_per_second": 8.45, |
|
"eval_steps_per_second": 0.532, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00015640124096033526, |
|
"loss": 1.4347, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001562431462674977, |
|
"loss": 1.4754, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00015608484569050975, |
|
"loss": 1.4716, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00015592633980884778, |
|
"loss": 1.4523, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00015578350945939874, |
|
"loss": 1.4578, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00015562461509800382, |
|
"loss": 1.4311, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001554814360610988, |
|
"loss": 1.4419, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00015532215531972608, |
|
"loss": 1.449, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001551786294874456, |
|
"loss": 1.4604, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001550189644709561, |
|
"loss": 1.4673, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00015485909805156665, |
|
"loss": 1.4787, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001546990308144857, |
|
"loss": 1.4571, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001545387633456568, |
|
"loss": 1.4773, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00015437829623175637, |
|
"loss": 1.4816, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00015421763006019177, |
|
"loss": 1.5024, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00015405676541909897, |
|
"loss": 1.4956, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00015389570289734046, |
|
"loss": 1.4515, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00015373444308450313, |
|
"loss": 1.4745, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00015357298657089606, |
|
"loss": 1.4185, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015341133394754838, |
|
"loss": 1.4462, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015324948580620703, |
|
"loss": 1.4664, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015308744273933477, |
|
"loss": 1.4747, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015292520534010784, |
|
"loss": 1.4091, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015277902601747382, |
|
"loss": 1.4634, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001526164210235197, |
|
"loss": 1.4462, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001524536234214371, |
|
"loss": 1.4771, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001522906338071643, |
|
"loss": 1.4639, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00015212745277734259, |
|
"loss": 1.4302, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00015196408092931383, |
|
"loss": 1.4412, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001518005188611184, |
|
"loss": 1.4518, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00015163676717149308, |
|
"loss": 1.4316, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00015147282645986866, |
|
"loss": 1.4317, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00015130869732636804, |
|
"loss": 1.4911, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00015114438037180364, |
|
"loss": 1.4359, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00015097987619767556, |
|
"loss": 1.4517, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001508316628659255, |
|
"loss": 1.4638, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.000150666804634212, |
|
"loss": 1.4206, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0001505017609311527, |
|
"loss": 1.4665, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00015033653236090806, |
|
"loss": 1.4544, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00015018766908612838, |
|
"loss": 1.4356, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00015002209093511546, |
|
"loss": 1.4497, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00014985632967280134, |
|
"loss": 1.4469, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014969038590597315, |
|
"loss": 1.4334, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001495242602420861, |
|
"loss": 1.4714, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014935795328926125, |
|
"loss": 1.4577, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014919146565628327, |
|
"loss": 1.4247, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014902479795259822, |
|
"loss": 1.4811, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014885795078831132, |
|
"loss": 1.4309, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00014869092477418482, |
|
"loss": 1.4853, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00014852372052163553, |
|
"loss": 1.4507, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00014835633864273287, |
|
"loss": 1.4455, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001482055435875876, |
|
"loss": 1.4421, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001480378259071914, |
|
"loss": 1.4758, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014786993237911187, |
|
"loss": 1.4564, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014771867836201847, |
|
"loss": 1.4497, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014755045241707308, |
|
"loss": 1.4975, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014738205240852806, |
|
"loss": 1.4897, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014721347895282978, |
|
"loss": 1.4567, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001470447326670598, |
|
"loss": 1.4813, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014687581416893218, |
|
"loss": 1.4861, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001467067240767915, |
|
"loss": 1.4759, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014653746300961038, |
|
"loss": 1.4753, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014636803158698738, |
|
"loss": 1.464, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014619843042914466, |
|
"loss": 1.4849, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014602866015692563, |
|
"loss": 1.4503, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00014587572283276284, |
|
"loss": 1.4429, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00014572264948280539, |
|
"loss": 1.481, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00014555240899848083, |
|
"loss": 1.4358, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00014538200176461162, |
|
"loss": 1.4236, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00014521142840499203, |
|
"loss": 1.4508, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001450406895440244, |
|
"loss": 1.4858, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001448697858067168, |
|
"loss": 1.4396, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00014469871781868098, |
|
"loss": 1.4119, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00014452748620612992, |
|
"loss": 1.4862, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00014435609159587555, |
|
"loss": 1.4585, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001442016976021512, |
|
"loss": 1.4725, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00014402999502526254, |
|
"loss": 1.4497, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014385813127179106, |
|
"loss": 1.4455, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014368610697086277, |
|
"loss": 1.4252, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014351392275219134, |
|
"loss": 1.4629, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014334157924607578, |
|
"loss": 1.4628, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014316907708339822, |
|
"loss": 1.4321, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014299641689562156, |
|
"loss": 1.4558, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001428235993147873, |
|
"loss": 1.4762, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014265062497351285, |
|
"loss": 1.4601, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014247749450498962, |
|
"loss": 1.4782, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014230420854298054, |
|
"loss": 1.4407, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014213076772181767, |
|
"loss": 1.4164, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014195717267640004, |
|
"loss": 1.4223, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014178342404219118, |
|
"loss": 1.3949, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014160952245521682, |
|
"loss": 1.4718, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014145288077845185, |
|
"loss": 1.4722, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014127869033547745, |
|
"loss": 1.4446, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014112178972372757, |
|
"loss": 1.4475, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00014094731272664267, |
|
"loss": 1.4202, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00014077268583746858, |
|
"loss": 1.4854, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001405979096954461, |
|
"loss": 1.4541, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00014042298494036228, |
|
"loss": 1.4756, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00014024791221254815, |
|
"loss": 1.435, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001400726921528765, |
|
"loss": 1.3868, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 0.8074263334274292, |
|
"eval_runtime": 61.8502, |
|
"eval_samples_per_second": 8.472, |
|
"eval_steps_per_second": 0.534, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001398973254027594, |
|
"loss": 1.4526, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00013972181260414585, |
|
"loss": 1.4434, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001395461543995196, |
|
"loss": 1.4733, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00013937035143189657, |
|
"loss": 1.4456, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00013919440434482266, |
|
"loss": 1.4451, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00013901831378237124, |
|
"loss": 1.4572, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.000138842080389141, |
|
"loss": 1.4455, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00013866570481025346, |
|
"loss": 1.438, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00013848918769135055, |
|
"loss": 1.4261, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00013831252967859238, |
|
"loss": 1.436, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00013813573141865484, |
|
"loss": 1.4295, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013797649360826399, |
|
"loss": 1.4416, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013779943066211437, |
|
"loss": 1.4494, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001376222293470401, |
|
"loss": 1.4558, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013744489031170578, |
|
"loss": 1.4371, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001372851679656103, |
|
"loss": 1.4876, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013710756905065686, |
|
"loss": 1.4465, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013692983429941337, |
|
"loss": 1.5011, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013675196436249725, |
|
"loss": 1.4399, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013657395989102067, |
|
"loss": 1.4586, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013639582153658842, |
|
"loss": 1.431, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013621754995129522, |
|
"loss": 1.4681, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001360391457877237, |
|
"loss": 1.47, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001358606096989416, |
|
"loss": 1.4658, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001356819423384997, |
|
"loss": 1.4789, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013550314436042932, |
|
"loss": 1.4218, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001353242164192399, |
|
"loss": 1.4351, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013514515916991657, |
|
"loss": 1.4711, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00013496597326791786, |
|
"loss": 1.4263, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00013478665936917332, |
|
"loss": 1.4692, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00013460721813008086, |
|
"loss": 1.457, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001344276502075047, |
|
"loss": 1.4114, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00013424795625877276, |
|
"loss": 1.395, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001340681369416742, |
|
"loss": 1.4456, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013388819291445723, |
|
"loss": 1.4459, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001337081248358265, |
|
"loss": 1.4642, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001335279333649408, |
|
"loss": 1.4538, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013334761916141064, |
|
"loss": 1.4443, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013316718288529567, |
|
"loss": 1.4769, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013300468641063172, |
|
"loss": 1.4329, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013282402001666874, |
|
"loss": 1.4563, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013264323346681258, |
|
"loss": 1.487, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013246232742285206, |
|
"loss": 1.4135, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013228130254701342, |
|
"loss": 1.485, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001321182791051834, |
|
"loss": 1.4554, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013193703027476557, |
|
"loss": 1.4543, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013175566453537692, |
|
"loss": 1.4419, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001315741825509265, |
|
"loss": 1.4353, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013139258498574873, |
|
"loss": 1.4382, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013121087250460132, |
|
"loss": 1.4579, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013102904577266255, |
|
"loss": 1.4475, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013084710545552893, |
|
"loss": 1.442, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013066505221921273, |
|
"loss": 1.4578, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013048288673013966, |
|
"loss": 1.4778, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013030060965514632, |
|
"loss": 1.4279, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013011822166147767, |
|
"loss": 1.4175, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00012993572341678483, |
|
"loss": 1.4537, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00012975311558912248, |
|
"loss": 1.473, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00012957039884694638, |
|
"loss": 1.4041, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00012938757385911104, |
|
"loss": 1.4453, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00012920464129486723, |
|
"loss": 1.4795, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00012903991056267166, |
|
"loss": 1.4592, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001288567754484459, |
|
"loss": 1.4767, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00012867353470085696, |
|
"loss": 1.4769, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00012849018899067748, |
|
"loss": 1.4212, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00012830673898906435, |
|
"loss": 1.4932, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00012812318536755622, |
|
"loss": 1.4644, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00012795789906852118, |
|
"loss": 1.4359, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001277741504206582, |
|
"loss": 1.4672, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00012759030010249867, |
|
"loss": 1.4161, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00012740634878704655, |
|
"loss": 1.4479, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00012722229714767566, |
|
"loss": 1.5016, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00012703814585812706, |
|
"loss": 1.4459, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00012685389559250655, |
|
"loss": 1.4491, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00012666954702528224, |
|
"loss": 1.4229, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00012648510083128212, |
|
"loss": 1.4286, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00012631901634382203, |
|
"loss": 1.4043, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00012613438651937683, |
|
"loss": 1.44, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00012594966102716905, |
|
"loss": 1.4456, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00012576484054340636, |
|
"loss": 1.4206, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00012557992574464428, |
|
"loss": 1.4273, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00012539491730778355, |
|
"loss": 1.4658, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001252098159100676, |
|
"loss": 1.423, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00012502462222908025, |
|
"loss": 1.4591, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001248393369427431, |
|
"loss": 1.3987, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00012465396072931307, |
|
"loss": 1.4278, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00012446849426737996, |
|
"loss": 1.4273, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00012428293823586387, |
|
"loss": 1.4464, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00012409729331401288, |
|
"loss": 1.4407, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001239115601814004, |
|
"loss": 1.4192, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00012372573951792271, |
|
"loss": 1.4327, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00012355842664409558, |
|
"loss": 1.4286, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00012337244154623397, |
|
"loss": 1.4381, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001231863708910095, |
|
"loss": 1.4084, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00012300021535955412, |
|
"loss": 1.4431, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001228139756333103, |
|
"loss": 1.4226, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00012262765239402884, |
|
"loss": 1.3949, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001224412463237662, |
|
"loss": 1.4388, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00012225475810488206, |
|
"loss": 1.4102, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00012208684903502762, |
|
"loss": 1.4059, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.8009569048881531, |
|
"eval_runtime": 62.0166, |
|
"eval_samples_per_second": 8.449, |
|
"eval_steps_per_second": 0.532, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00012190020661473858, |
|
"loss": 1.4513, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00012171348402636268, |
|
"loss": 1.4368, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00012152668195341832, |
|
"loss": 1.4503, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00012133980107971474, |
|
"loss": 1.4039, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00012115284208934969, |
|
"loss": 1.4467, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00012096580566670692, |
|
"loss": 1.4028, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00012077869249645357, |
|
"loss": 1.4299, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00012059150326353772, |
|
"loss": 1.4264, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00012040423865318591, |
|
"loss": 1.453, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001202168993509006, |
|
"loss": 1.4452, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00012002948604245768, |
|
"loss": 1.4251, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00011984199941390392, |
|
"loss": 1.4419, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00011965444015155452, |
|
"loss": 1.4453, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00011946680894199054, |
|
"loss": 1.4178, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00011927910647205644, |
|
"loss": 1.4655, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00011909133342885747, |
|
"loss": 1.4289, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00011890349049975729, |
|
"loss": 1.4673, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00011871557837237537, |
|
"loss": 1.4313, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00011852759773458446, |
|
"loss": 1.4631, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00011835835715290196, |
|
"loss": 1.4695, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00011817024824131962, |
|
"loss": 1.4487, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00011798207281556853, |
|
"loss": 1.4025, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00011779383156448527, |
|
"loss": 1.4665, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00011760552517714743, |
|
"loss": 1.4005, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00011741715434287097, |
|
"loss": 1.4488, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00011722871975120782, |
|
"loss": 1.4511, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00011705907467624817, |
|
"loss": 1.4192, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00011687052084609971, |
|
"loss": 1.4309, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001166819052595759, |
|
"loss": 1.4495, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00011649322860712455, |
|
"loss": 1.4375, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00011630449157941714, |
|
"loss": 1.4502, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00011611569486734603, |
|
"loss": 1.4179, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00011592683916202211, |
|
"loss": 1.4581, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00011573792515477222, |
|
"loss": 1.4211, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00011554895353713662, |
|
"loss": 1.4118, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00011535992500086643, |
|
"loss": 1.4308, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001151708402379212, |
|
"loss": 1.429, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00011498169994046621, |
|
"loss": 1.4262, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00011479250480087011, |
|
"loss": 1.4375, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00011462218285760746, |
|
"loss": 1.4508, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00011443288542613578, |
|
"loss": 1.4318, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00011424353516151814, |
|
"loss": 1.4416, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00011405413275689179, |
|
"loss": 1.4296, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001138646789055848, |
|
"loss": 1.4192, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00011367517430111365, |
|
"loss": 1.4411, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001134856196371805, |
|
"loss": 1.407, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00011329601560767078, |
|
"loss": 1.447, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001131063629066507, |
|
"loss": 1.4344, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00011291666222836454, |
|
"loss": 1.4774, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00011274589117127904, |
|
"loss": 1.4209, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00011255610124945745, |
|
"loss": 1.3907, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00011236626536466241, |
|
"loss": 1.4373, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00011217638421180883, |
|
"loss": 1.4305, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00011198645848597729, |
|
"loss": 1.4338, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00011179648888241155, |
|
"loss": 1.4363, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00011160647609651597, |
|
"loss": 1.4053, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00011141642082385304, |
|
"loss": 1.4481, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00011122632376014078, |
|
"loss": 1.3928, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00011103618560125007, |
|
"loss": 1.3817, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00011084600704320238, |
|
"loss": 1.4496, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00011065578878216696, |
|
"loss": 1.4582, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00011046553151445844, |
|
"loss": 1.451, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001102752359365342, |
|
"loss": 1.469, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00011008490274499193, |
|
"loss": 1.4299, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00010989453263656697, |
|
"loss": 1.4298, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00010972316855101048, |
|
"loss": 1.4192, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00010953273022049615, |
|
"loss": 1.4405, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00010934225699438665, |
|
"loss": 1.4636, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00010917080183142705, |
|
"loss": 1.4231, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00010898026422470837, |
|
"loss": 1.4357, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00010878969374475633, |
|
"loss": 1.4173, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00010859909108917496, |
|
"loss": 1.4286, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00010840845695568593, |
|
"loss": 1.4365, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00010821779204212623, |
|
"loss": 1.4292, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00010804616787981517, |
|
"loss": 1.4429, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00010785544640706349, |
|
"loss": 1.43, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001076646961785964, |
|
"loss": 1.4428, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001074739178926758, |
|
"loss": 1.4511, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010728311224766634, |
|
"loss": 1.4283, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010709227994203286, |
|
"loss": 1.4041, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010690142167433773, |
|
"loss": 1.4574, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010671053814323834, |
|
"loss": 1.4421, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010651963004748471, |
|
"loss": 1.453, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010632869808591662, |
|
"loss": 1.4239, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00010613774295746124, |
|
"loss": 1.4069, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001059467653611306, |
|
"loss": 1.4335, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00010575576599601895, |
|
"loss": 1.4341, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00010556474556130025, |
|
"loss": 1.3864, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00010537370475622554, |
|
"loss": 1.4389, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00010518264428012043, |
|
"loss": 1.4895, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00010499156483238262, |
|
"loss": 1.4252, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00010480046711247918, |
|
"loss": 1.4495, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00010460935181994404, |
|
"loss": 1.4292, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00010441821965437556, |
|
"loss": 1.405, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00010422707131543377, |
|
"loss": 1.4666, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00010405502456046876, |
|
"loss": 1.4412, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001038638474198912, |
|
"loss": 1.3975, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00010367265613528012, |
|
"loss": 1.4423, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00010348145140651204, |
|
"loss": 1.4614, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00010329023393351272, |
|
"loss": 1.4521, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 0.7902594208717346, |
|
"eval_runtime": 61.869, |
|
"eval_samples_per_second": 8.47, |
|
"eval_steps_per_second": 0.533, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00010309900441625435, |
|
"loss": 1.4428, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001029077635547535, |
|
"loss": 1.4417, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00010271651204906811, |
|
"loss": 1.4228, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001025252505992951, |
|
"loss": 1.4302, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00010233397990556775, |
|
"loss": 1.4544, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001021618289563197, |
|
"loss": 1.4378, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00010198967159704729, |
|
"loss": 1.4494, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00010179837874523537, |
|
"loss": 1.4467, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00010160707931026259, |
|
"loss": 1.4443, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00010143490476895921, |
|
"loss": 1.4272, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.000101243594755249, |
|
"loss": 1.4492, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00010105228018922502, |
|
"loss": 1.4289, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00010086096177121504, |
|
"loss": 1.3822, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00010066964020156091, |
|
"loss": 1.413, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.000100478316180616, |
|
"loss": 1.4419, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00010028699040874277, |
|
"loss": 1.3911, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00010009566358630991, |
|
"loss": 1.4321, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.990433641369012e-05, |
|
"loss": 1.4358, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.971300959125727e-05, |
|
"loss": 1.4307, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.952168381938401e-05, |
|
"loss": 1.4235, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.933035979843912e-05, |
|
"loss": 1.4123, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.913903822878499e-05, |
|
"loss": 1.4267, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.8947719810775e-05, |
|
"loss": 1.4151, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.875640524475103e-05, |
|
"loss": 1.4215, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.856509523104083e-05, |
|
"loss": 1.3905, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.83737904699555e-05, |
|
"loss": 1.4147, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.820162125476466e-05, |
|
"loss": 1.4225, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.802945737193441e-05, |
|
"loss": 1.4273, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.783817104368033e-05, |
|
"loss": 1.4644, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.764689262903611e-05, |
|
"loss": 1.4026, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.74556228281972e-05, |
|
"loss": 1.4154, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.726436234132755e-05, |
|
"loss": 1.438, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.707311186855684e-05, |
|
"loss": 1.4191, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.68818721099783e-05, |
|
"loss": 1.4027, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.669064376564584e-05, |
|
"loss": 1.4297, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.64994275355716e-05, |
|
"loss": 1.4164, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.630822411972336e-05, |
|
"loss": 1.3995, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.611703421802204e-05, |
|
"loss": 1.4077, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.592585853033905e-05, |
|
"loss": 1.4506, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.573469775649374e-05, |
|
"loss": 1.4236, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.554355259625092e-05, |
|
"loss": 1.3765, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.535242374931823e-05, |
|
"loss": 1.4404, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.516131191534359e-05, |
|
"loss": 1.4332, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.497021779391265e-05, |
|
"loss": 1.4082, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.477914208454618e-05, |
|
"loss": 1.4542, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.458808548669762e-05, |
|
"loss": 1.4153, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.439704869975043e-05, |
|
"loss": 1.4286, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.426333511085766e-05, |
|
"loss": 1.3663, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.407233360732119e-05, |
|
"loss": 1.389, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.388135380265187e-05, |
|
"loss": 1.4096, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.369039639595209e-05, |
|
"loss": 1.4111, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.349946208624212e-05, |
|
"loss": 1.3776, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.330855157245775e-05, |
|
"loss": 1.4375, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.31176655534477e-05, |
|
"loss": 1.4131, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.292680472797101e-05, |
|
"loss": 1.3917, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.273596979469446e-05, |
|
"loss": 1.4346, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.254516145219005e-05, |
|
"loss": 1.4056, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.235438039893248e-05, |
|
"loss": 1.4045, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.216362733329655e-05, |
|
"loss": 1.4248, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.197290295355454e-05, |
|
"loss": 1.4291, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.17822079578738e-05, |
|
"loss": 1.4162, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.159154304431409e-05, |
|
"loss": 1.4189, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.140090891082506e-05, |
|
"loss": 1.4231, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.121030625524365e-05, |
|
"loss": 1.4008, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.103879135550087e-05, |
|
"loss": 1.4321, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.084825043007008e-05, |
|
"loss": 1.4719, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.065774300561337e-05, |
|
"loss": 1.4252, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.04672697795039e-05, |
|
"loss": 1.4116, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.029587369187029e-05, |
|
"loss": 1.421, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.010546736343308e-05, |
|
"loss": 1.4574, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.991509725500809e-05, |
|
"loss": 1.3972, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.972476406346583e-05, |
|
"loss": 1.3886, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.953446848554158e-05, |
|
"loss": 1.4333, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.934421121783305e-05, |
|
"loss": 1.3839, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.915399295679763e-05, |
|
"loss": 1.4411, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.896381439874992e-05, |
|
"loss": 1.4303, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.877367623985927e-05, |
|
"loss": 1.453, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.858357917614699e-05, |
|
"loss": 1.4066, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.839352390348404e-05, |
|
"loss": 1.3924, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.820351111758849e-05, |
|
"loss": 1.3878, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.801354151402274e-05, |
|
"loss": 1.3849, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.782361578819118e-05, |
|
"loss": 1.4135, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.763373463533758e-05, |
|
"loss": 1.4187, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.748186227269857e-05, |
|
"loss": 1.4153, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.729206310269713e-05, |
|
"loss": 1.4083, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.710231045148006e-05, |
|
"loss": 1.4055, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.691260501365754e-05, |
|
"loss": 1.42, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.672294748366692e-05, |
|
"loss": 1.438, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.653333855577024e-05, |
|
"loss": 1.4077, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.634377892405157e-05, |
|
"loss": 1.4448, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.615426928241457e-05, |
|
"loss": 1.3921, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.596481032457986e-05, |
|
"loss": 1.4282, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.577540274408256e-05, |
|
"loss": 1.4084, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.558604723426972e-05, |
|
"loss": 1.4007, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.539674448829775e-05, |
|
"loss": 1.4371, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.520749519912991e-05, |
|
"loss": 1.4304, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.501830005953381e-05, |
|
"loss": 1.3866, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.482915976207883e-05, |
|
"loss": 1.4156, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.464007499913359e-05, |
|
"loss": 1.4735, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.445104646286339e-05, |
|
"loss": 1.3907, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 0.7795238494873047, |
|
"eval_runtime": 61.8635, |
|
"eval_samples_per_second": 8.47, |
|
"eval_steps_per_second": 0.533, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.428096942593624e-05, |
|
"loss": 1.3785, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.409204962652496e-05, |
|
"loss": 1.4134, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.392207157637791e-05, |
|
"loss": 1.3956, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.373326301036039e-05, |
|
"loss": 1.3801, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.354451399050185e-05, |
|
"loss": 1.3976, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.335582520773848e-05, |
|
"loss": 1.4065, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.316719735278616e-05, |
|
"loss": 1.4058, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.29786311161376e-05, |
|
"loss": 1.3986, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.279012718806004e-05, |
|
"loss": 1.3801, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.260168625859259e-05, |
|
"loss": 1.4051, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.241330901754376e-05, |
|
"loss": 1.4178, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.222499615448894e-05, |
|
"loss": 1.3994, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.203674835876778e-05, |
|
"loss": 1.3948, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.184856631948184e-05, |
|
"loss": 1.412, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.16604507254919e-05, |
|
"loss": 1.4018, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.147240226541555e-05, |
|
"loss": 1.395, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.128442162762465e-05, |
|
"loss": 1.4177, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.109650950024272e-05, |
|
"loss": 1.4684, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.090866657114254e-05, |
|
"loss": 1.3745, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.072089352794361e-05, |
|
"loss": 1.4243, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.055195810962145e-05, |
|
"loss": 1.3984, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.036431974310813e-05, |
|
"loss": 1.4003, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.017675325513676e-05, |
|
"loss": 1.4197, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.000800543960246e-05, |
|
"loss": 1.389, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.9820577412245e-05, |
|
"loss": 1.3928, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.96332232538574e-05, |
|
"loss": 1.4402, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.94459436502699e-05, |
|
"loss": 1.4059, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.925873928703986e-05, |
|
"loss": 1.4186, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.90716108494492e-05, |
|
"loss": 1.4009, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.888455902250194e-05, |
|
"loss": 1.4537, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.869758449092155e-05, |
|
"loss": 1.4237, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.851068793914867e-05, |
|
"loss": 1.4158, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.832387005133845e-05, |
|
"loss": 1.3977, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.813713151135805e-05, |
|
"loss": 1.4253, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.795047300278422e-05, |
|
"loss": 1.3966, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.776389520890071e-05, |
|
"loss": 1.4211, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.757739881269582e-05, |
|
"loss": 1.3777, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.739098449685987e-05, |
|
"loss": 1.3922, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.720465294378272e-05, |
|
"loss": 1.3884, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.70184048355513e-05, |
|
"loss": 1.4122, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.683224085394702e-05, |
|
"loss": 1.403, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.664616168044339e-05, |
|
"loss": 1.433, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.646016799620345e-05, |
|
"loss": 1.3929, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.631143505862324e-05, |
|
"loss": 1.4136, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.614417685784577e-05, |
|
"loss": 1.3835, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.595841739456996e-05, |
|
"loss": 1.3911, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.577274593812058e-05, |
|
"loss": 1.3981, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.558716316816814e-05, |
|
"loss": 1.4022, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.54016697640586e-05, |
|
"loss": 1.4368, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.521626640481061e-05, |
|
"loss": 1.4221, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.503095376911342e-05, |
|
"loss": 1.4189, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.484573253532406e-05, |
|
"loss": 1.4389, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.466060338146498e-05, |
|
"loss": 1.3616, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.447556698522156e-05, |
|
"loss": 1.4295, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.429062402393965e-05, |
|
"loss": 1.3832, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.410577517462307e-05, |
|
"loss": 1.3762, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.392102111393116e-05, |
|
"loss": 1.4565, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.373636251817615e-05, |
|
"loss": 1.4553, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.355180006332097e-05, |
|
"loss": 1.4171, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.336733442497654e-05, |
|
"loss": 1.3761, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.318296627839935e-05, |
|
"loss": 1.3928, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.299869629848908e-05, |
|
"loss": 1.3833, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.281452515978599e-05, |
|
"loss": 1.3991, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.263045353646861e-05, |
|
"loss": 1.4247, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.248326834170777e-05, |
|
"loss": 1.473, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.229937754384992e-05, |
|
"loss": 1.3753, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.211558814713165e-05, |
|
"loss": 1.41, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.195026494412065e-05, |
|
"loss": 1.4259, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.176667006277049e-05, |
|
"loss": 1.4327, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.158317853259342e-05, |
|
"loss": 1.397, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.13997910252802e-05, |
|
"loss": 1.3827, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.121650821214074e-05, |
|
"loss": 1.417, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.103333076410166e-05, |
|
"loss": 1.4142, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.085025935170397e-05, |
|
"loss": 1.4047, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.066729464510045e-05, |
|
"loss": 1.4153, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.04844373140533e-05, |
|
"loss": 1.3896, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.030168802793164e-05, |
|
"loss": 1.4255, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.011904745570912e-05, |
|
"loss": 1.4286, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.993651626596138e-05, |
|
"loss": 1.4337, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.97540951268637e-05, |
|
"loss": 1.3943, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.95717847061885e-05, |
|
"loss": 1.4138, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.938958567130285e-05, |
|
"loss": 1.3604, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.920749868916618e-05, |
|
"loss": 1.4096, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.902552442632765e-05, |
|
"loss": 1.3915, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.88436635489238e-05, |
|
"loss": 1.3946, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.868008625403449e-05, |
|
"loss": 1.3984, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.851660182560898e-05, |
|
"loss": 1.4222, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.833506196772657e-05, |
|
"loss": 1.43, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.815363802279173e-05, |
|
"loss": 1.3911, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.797233065492654e-05, |
|
"loss": 1.4225, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.779114052782636e-05, |
|
"loss": 1.4097, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.761006830475733e-05, |
|
"loss": 1.3827, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.742911464855399e-05, |
|
"loss": 1.4585, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.724828022161692e-05, |
|
"loss": 1.4062, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.706756568591013e-05, |
|
"loss": 1.4324, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.68869717029588e-05, |
|
"loss": 1.3791, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.670649893384692e-05, |
|
"loss": 1.4147, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.652614803921461e-05, |
|
"loss": 1.4273, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.634591967925598e-05, |
|
"loss": 1.3976, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.616581451371651e-05, |
|
"loss": 1.3826, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.7706289887428284, |
|
"eval_runtime": 63.1592, |
|
"eval_samples_per_second": 8.296, |
|
"eval_steps_per_second": 0.522, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.598583320189075e-05, |
|
"loss": 1.4273, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.580597640261978e-05, |
|
"loss": 1.3904, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.562624477428905e-05, |
|
"loss": 1.3824, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.544663897482568e-05, |
|
"loss": 1.3709, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.528510188239592e-05, |
|
"loss": 1.4232, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.510573696871829e-05, |
|
"loss": 1.3842, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.492649978928341e-05, |
|
"loss": 1.3655, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.47473910002085e-05, |
|
"loss": 1.3651, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.456841125714071e-05, |
|
"loss": 1.3925, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.440744036422758e-05, |
|
"loss": 1.3946, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.422870761318759e-05, |
|
"loss": 1.4216, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.405010580685171e-05, |
|
"loss": 1.3654, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.387163559901117e-05, |
|
"loss": 1.3438, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.36932976429756e-05, |
|
"loss": 1.4255, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.35150925915705e-05, |
|
"loss": 1.4268, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.333702109713477e-05, |
|
"loss": 1.3947, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.315908381151857e-05, |
|
"loss": 1.4452, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.298128138608059e-05, |
|
"loss": 1.4187, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.280361447168603e-05, |
|
"loss": 1.3878, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.264383064821323e-05, |
|
"loss": 1.3753, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.246642299615586e-05, |
|
"loss": 1.372, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.230687356416249e-05, |
|
"loss": 1.361, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.212972751884663e-05, |
|
"loss": 1.4421, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.195272010177959e-05, |
|
"loss": 1.4402, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.177585196091631e-05, |
|
"loss": 1.3958, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.159912374370183e-05, |
|
"loss": 1.4365, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.142253609706898e-05, |
|
"loss": 1.4041, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.124608966743606e-05, |
|
"loss": 1.4321, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.106978510070443e-05, |
|
"loss": 1.4154, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.089362304225603e-05, |
|
"loss": 1.4208, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.071760413695131e-05, |
|
"loss": 1.3757, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.054172902912656e-05, |
|
"loss": 1.4266, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.0365998362591744e-05, |
|
"loss": 1.4081, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.019041278062807e-05, |
|
"loss": 1.4108, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.0032510335413086e-05, |
|
"loss": 1.3667, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.985720218447026e-05, |
|
"loss": 1.3987, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.9682040980602316e-05, |
|
"loss": 1.3553, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.9524522066830346e-05, |
|
"loss": 1.3722, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.934964182845485e-05, |
|
"loss": 1.4445, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.917491039513411e-05, |
|
"loss": 1.3968, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.9000328406491425e-05, |
|
"loss": 1.3855, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.882589650160322e-05, |
|
"loss": 1.3988, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.865161531899642e-05, |
|
"loss": 1.3642, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.8477485496646245e-05, |
|
"loss": 1.4189, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.8303507671973864e-05, |
|
"loss": 1.4004, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.812968248184392e-05, |
|
"loss": 1.3928, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.795601056256257e-05, |
|
"loss": 1.4273, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.778249254987461e-05, |
|
"loss": 1.3824, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.7609129078961655e-05, |
|
"loss": 1.4004, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.7435920784439514e-05, |
|
"loss": 1.3801, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.7262868300355975e-05, |
|
"loss": 1.373, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.7089972260188485e-05, |
|
"loss": 1.414, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.6917233296841776e-05, |
|
"loss": 1.3001, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.6744652042645616e-05, |
|
"loss": 1.3162, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.6572229129352474e-05, |
|
"loss": 1.2765, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.6399965188135084e-05, |
|
"loss": 1.2453, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.622786084958437e-05, |
|
"loss": 1.2787, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.6090292716047934e-05, |
|
"loss": 1.2726, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.591847724951989e-05, |
|
"loss": 1.2786, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.574682314819745e-05, |
|
"loss": 1.2788, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.557533104043913e-05, |
|
"loss": 1.2654, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.54040015540104e-05, |
|
"loss": 1.2507, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.5249944575829906e-05, |
|
"loss": 1.3315, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.507892579728751e-05, |
|
"loss": 1.2968, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.490807145722008e-05, |
|
"loss": 1.3051, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.47373821810585e-05, |
|
"loss": 1.306, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.4566858593629454e-05, |
|
"loss": 1.3139, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.439650131915299e-05, |
|
"loss": 1.277, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.4226310981240466e-05, |
|
"loss": 1.2737, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.4056288202892126e-05, |
|
"loss": 1.311, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.3886433606494804e-05, |
|
"loss": 1.2775, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.37167478138197e-05, |
|
"loss": 1.2633, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.354723144602016e-05, |
|
"loss": 1.3022, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.337788512362931e-05, |
|
"loss": 1.2979, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.320870946655765e-05, |
|
"loss": 1.2726, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.303970509409113e-05, |
|
"loss": 1.2303, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.2870872624888615e-05, |
|
"loss": 1.2648, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.2702212676979704e-05, |
|
"loss": 1.2865, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.253372586776248e-05, |
|
"loss": 1.2944, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.236541281400122e-05, |
|
"loss": 1.3188, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.219727413182419e-05, |
|
"loss": 1.3009, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.202931043672124e-05, |
|
"loss": 1.2727, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.186152234354172e-05, |
|
"loss": 1.2515, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.172741871515152e-05, |
|
"loss": 1.3243, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.1559948252801414e-05, |
|
"loss": 1.3009, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.139265511052607e-05, |
|
"loss": 1.3033, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.122553990072023e-05, |
|
"loss": 1.2961, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.10586032351273e-05, |
|
"loss": 1.2361, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.090851339647496e-05, |
|
"loss": 1.2856, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.074191764789694e-05, |
|
"loss": 1.2688, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.0575502213883655e-05, |
|
"loss": 1.2338, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.040926770361687e-05, |
|
"loss": 1.3065, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.0243214725616126e-05, |
|
"loss": 1.2683, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.00773438877363e-05, |
|
"loss": 1.3036, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.99116557971657e-05, |
|
"loss": 1.25, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.9746151060423564e-05, |
|
"loss": 1.2719, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.958083028335794e-05, |
|
"loss": 1.2411, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.9415694071143584e-05, |
|
"loss": 1.286, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.9250743028279486e-05, |
|
"loss": 1.2786, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.9085977758586906e-05, |
|
"loss": 1.2634, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.7820777893066406, |
|
"eval_runtime": 62.0421, |
|
"eval_samples_per_second": 8.446, |
|
"eval_steps_per_second": 0.532, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.8921398865207045e-05, |
|
"loss": 1.3052, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.875700695059875e-05, |
|
"loss": 1.3028, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.859280261653654e-05, |
|
"loss": 1.3132, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.8428786464108225e-05, |
|
"loss": 1.3153, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.826495909371276e-05, |
|
"loss": 1.3391, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.810132110505804e-05, |
|
"loss": 1.2821, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.793787309715871e-05, |
|
"loss": 1.2542, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.779093281812042e-05, |
|
"loss": 1.2344, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.7627847421449165e-05, |
|
"loss": 1.2916, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.746495373873521e-05, |
|
"loss": 1.2703, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.730225236626855e-05, |
|
"loss": 1.3033, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.713974389963527e-05, |
|
"loss": 1.2804, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.697742893371525e-05, |
|
"loss": 1.317, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.6815308062680086e-05, |
|
"loss": 1.3142, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.665338187999084e-05, |
|
"loss": 1.2592, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.649165097839591e-05, |
|
"loss": 1.2689, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.6330115949928876e-05, |
|
"loss": 1.2734, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.618490238457079e-05, |
|
"loss": 1.2713, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.602374114352934e-05, |
|
"loss": 1.3216, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.586277748845055e-05, |
|
"loss": 1.2775, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.570201200855939e-05, |
|
"loss": 1.2749, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.554144529235537e-05, |
|
"loss": 1.2809, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.538107792761041e-05, |
|
"loss": 1.2817, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.522091050136663e-05, |
|
"loss": 1.2324, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.50609435999344e-05, |
|
"loss": 1.2649, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.4901177808889936e-05, |
|
"loss": 1.2493, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.474161371307322e-05, |
|
"loss": 1.2946, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.458225189658598e-05, |
|
"loss": 1.2523, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.44230929427895e-05, |
|
"loss": 1.2769, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.426413743430241e-05, |
|
"loss": 1.2823, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.410538595299864e-05, |
|
"loss": 1.2536, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.3962684543383956e-05, |
|
"loss": 1.2686, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.380432231411452e-05, |
|
"loss": 1.2826, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.364616579523162e-05, |
|
"loss": 1.2906, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.348821556568439e-05, |
|
"loss": 1.2777, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.3346237214366844e-05, |
|
"loss": 1.2865, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.3188680526855985e-05, |
|
"loss": 1.2784, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.303133180335535e-05, |
|
"loss": 1.2866, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.287419161985704e-05, |
|
"loss": 1.2954, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.2717260551589775e-05, |
|
"loss": 1.2677, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.2560539173016813e-05, |
|
"loss": 1.2825, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.240402805783377e-05, |
|
"loss": 1.2749, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.224772777896659e-05, |
|
"loss": 1.2797, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.209163890856951e-05, |
|
"loss": 1.2819, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.193576201802268e-05, |
|
"loss": 1.2607, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.1780097677930485e-05, |
|
"loss": 1.2767, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.162464645811913e-05, |
|
"loss": 1.2333, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.146940892763472e-05, |
|
"loss": 1.2667, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.131438565474112e-05, |
|
"loss": 1.3182, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.11595772069178e-05, |
|
"loss": 1.2889, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.100498415085804e-05, |
|
"loss": 1.3046, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.085060705246642e-05, |
|
"loss": 1.2576, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.069644647685712e-05, |
|
"loss": 1.2588, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.0542502988351686e-05, |
|
"loss": 1.2901, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.038877715047699e-05, |
|
"loss": 1.3228, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.0250610452792004e-05, |
|
"loss": 1.2813, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.011262091761672e-05, |
|
"loss": 1.3074, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.997480895410295e-05, |
|
"loss": 1.2753, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.9821893310242744e-05, |
|
"loss": 1.2519, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.966919795488333e-05, |
|
"loss": 1.331, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.9516723446982664e-05, |
|
"loss": 1.3126, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.936447034469024e-05, |
|
"loss": 1.2616, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.92124392053451e-05, |
|
"loss": 1.2957, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.9060630585473746e-05, |
|
"loss": 1.309, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.890904504078814e-05, |
|
"loss": 1.2873, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.8757683126183654e-05, |
|
"loss": 1.283, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.8606545395737005e-05, |
|
"loss": 1.3069, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.84556324027043e-05, |
|
"loss": 1.2527, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.8304944699518954e-05, |
|
"loss": 1.2922, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.816951884539331e-05, |
|
"loss": 1.2795, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.801926071191671e-05, |
|
"loss": 1.285, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.786922946567352e-05, |
|
"loss": 1.2804, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.771942565586933e-05, |
|
"loss": 1.318, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.7569849830877333e-05, |
|
"loss": 1.3102, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.742050253823604e-05, |
|
"loss": 1.3083, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.7286285821885306e-05, |
|
"loss": 1.2854, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.713737424618142e-05, |
|
"loss": 1.305, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.6988692785952173e-05, |
|
"loss": 1.2948, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.68402419854622e-05, |
|
"loss": 1.2779, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.6692022388131795e-05, |
|
"loss": 1.3145, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.654403453653494e-05, |
|
"loss": 1.2673, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.639627897239718e-05, |
|
"loss": 1.2883, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.6248756236593863e-05, |
|
"loss": 1.2678, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.6101466869147995e-05, |
|
"loss": 1.2771, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.5954411409228294e-05, |
|
"loss": 1.3139, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.580759039514729e-05, |
|
"loss": 1.2914, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.566100436435924e-05, |
|
"loss": 1.2685, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.551465385345826e-05, |
|
"loss": 1.2932, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.5383140205951094e-05, |
|
"loss": 1.2751, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.5237238658062945e-05, |
|
"loss": 1.2775, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.5091574181302256e-05, |
|
"loss": 1.2826, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.494614730888971e-05, |
|
"loss": 1.2661, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.480095857317618e-05, |
|
"loss": 1.27, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.4656008505640814e-05, |
|
"loss": 1.3088, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.4511297636889095e-05, |
|
"loss": 1.263, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.4366826496650886e-05, |
|
"loss": 1.2896, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.422259561377853e-05, |
|
"loss": 1.2919, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.4078605516244785e-05, |
|
"loss": 1.2451, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.396358715789669e-05, |
|
"loss": 1.3136, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.383437644428432e-05, |
|
"loss": 1.2834, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.78007972240448, |
|
"eval_runtime": 61.984, |
|
"eval_samples_per_second": 8.454, |
|
"eval_steps_per_second": 0.532, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.369103909748521e-05, |
|
"loss": 1.2585, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.354794448184514e-05, |
|
"loss": 1.27, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.340509312117752e-05, |
|
"loss": 1.2923, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.32624855384053e-05, |
|
"loss": 1.2711, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.3120122255559e-05, |
|
"loss": 1.2891, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.2978003793774914e-05, |
|
"loss": 1.2635, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.283613067329311e-05, |
|
"loss": 1.2926, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.269450341345558e-05, |
|
"loss": 1.2621, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.2553122532704325e-05, |
|
"loss": 1.291, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.241198854857938e-05, |
|
"loss": 1.2658, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.227110197771703e-05, |
|
"loss": 1.2671, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.213046333584792e-05, |
|
"loss": 1.2686, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.1990073137795066e-05, |
|
"loss": 1.2723, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.186393480377876e-05, |
|
"loss": 1.3037, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.172401806405554e-05, |
|
"loss": 1.284, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.1584351255985664e-05, |
|
"loss": 1.2582, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.144493489083469e-05, |
|
"loss": 1.3076, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.130576947895139e-05, |
|
"loss": 1.3193, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1166855529765825e-05, |
|
"loss": 1.2829, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.102819355178763e-05, |
|
"loss": 1.3018, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.0889784052604066e-05, |
|
"loss": 1.2842, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.075162753887814e-05, |
|
"loss": 1.2816, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.061372451634678e-05, |
|
"loss": 1.2915, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.0503584951244668e-05, |
|
"loss": 1.2235, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.0366139484357482e-05, |
|
"loss": 1.2766, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.0228948919785782e-05, |
|
"loss": 1.2729, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.0092013759730564e-05, |
|
"loss": 1.2784, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.9955334505457845e-05, |
|
"loss": 1.2827, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.981891165729691e-05, |
|
"loss": 1.279, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.9682745714638417e-05, |
|
"loss": 1.2917, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.9546837175932596e-05, |
|
"loss": 1.2764, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.941118653868744e-05, |
|
"loss": 1.3066, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 1.2945, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.9154162624127146e-05, |
|
"loss": 1.2654, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.9019262705797567e-05, |
|
"loss": 1.2715, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.888462262017233e-05, |
|
"loss": 1.3311, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.875024286011615e-05, |
|
"loss": 1.306, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.8616123917540673e-05, |
|
"loss": 1.2865, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.848226628340287e-05, |
|
"loss": 1.2676, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.8348670447703218e-05, |
|
"loss": 1.2997, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.8215336899483768e-05, |
|
"loss": 1.2863, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.808226612682646e-05, |
|
"loss": 1.2532, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.7949458616851343e-05, |
|
"loss": 1.2524, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.781691485571475e-05, |
|
"loss": 1.263, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.7684635328607477e-05, |
|
"loss": 1.2607, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.7552620519753137e-05, |
|
"loss": 1.2505, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.742087091240628e-05, |
|
"loss": 1.2687, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.7302523411710645e-05, |
|
"loss": 1.2855, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.7171279015116002e-05, |
|
"loss": 1.2412, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.7040301215970876e-05, |
|
"loss": 1.2575, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.6922649533852228e-05, |
|
"loss": 1.3001, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.6792179589961273e-05, |
|
"loss": 1.2726, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.66619776312545e-05, |
|
"loss": 1.2727, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.6532044134350288e-05, |
|
"loss": 1.2466, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.6402379574884418e-05, |
|
"loss": 1.2975, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.627298442750803e-05, |
|
"loss": 1.2745, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.614385916588613e-05, |
|
"loss": 1.2988, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6015004262695798e-05, |
|
"loss": 1.2541, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.5886420189624407e-05, |
|
"loss": 1.2596, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.5758107417367915e-05, |
|
"loss": 1.267, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.5630066415629195e-05, |
|
"loss": 1.2716, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.550229765311628e-05, |
|
"loss": 1.2824, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.5400278969684065e-05, |
|
"loss": 1.3018, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.5273001415739562e-05, |
|
"loss": 1.2786, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.5145997408096057e-05, |
|
"loss": 1.2634, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.503192806757474e-05, |
|
"loss": 1.3144, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.4905445077906675e-05, |
|
"loss": 1.2597, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.477923698001955e-05, |
|
"loss": 1.2646, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4653304235911823e-05, |
|
"loss": 1.2471, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4527647306573998e-05, |
|
"loss": 1.2835, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4402266651986927e-05, |
|
"loss": 1.2674, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4277162731120108e-05, |
|
"loss": 1.2984, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4152336001930054e-05, |
|
"loss": 1.2879, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4027786921358607e-05, |
|
"loss": 1.2361, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.3903515945331155e-05, |
|
"loss": 1.3072, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.3779523528755145e-05, |
|
"loss": 1.2665, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.3655810125518284e-05, |
|
"loss": 1.3312, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.3532376188486948e-05, |
|
"loss": 1.3107, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.340922216950443e-05, |
|
"loss": 1.2497, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.328634851938949e-05, |
|
"loss": 1.3204, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.316375568793443e-05, |
|
"loss": 1.2692, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.3041444123903668e-05, |
|
"loss": 1.26, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.2919414275031914e-05, |
|
"loss": 1.2608, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.2797666588022748e-05, |
|
"loss": 1.2862, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.2676201508546792e-05, |
|
"loss": 1.2762, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2567124933972495e-05, |
|
"loss": 1.3093, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2470360715755768e-05, |
|
"loss": 1.2785, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2349660894643332e-05, |
|
"loss": 1.2841, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.222924532103765e-05, |
|
"loss": 1.2676, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2109114435733026e-05, |
|
"loss": 1.2522, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.19892686784816e-05, |
|
"loss": 1.2573, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.1869708487991812e-05, |
|
"loss": 1.2572, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.1750434301926704e-05, |
|
"loss": 1.23, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.163144655690249e-05, |
|
"loss": 1.2547, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.1512745688486646e-05, |
|
"loss": 1.29, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.139433213119664e-05, |
|
"loss": 1.2863, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.127620631849816e-05, |
|
"loss": 1.2471, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.11583686828036e-05, |
|
"loss": 1.2756, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.104081965547041e-05, |
|
"loss": 1.272, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.092355966679961e-05, |
|
"loss": 1.2714, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 0.7770761251449585, |
|
"eval_runtime": 62.0435, |
|
"eval_samples_per_second": 8.446, |
|
"eval_steps_per_second": 0.532, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.080658914603415e-05, |
|
"loss": 1.3074, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.068990852135728e-05, |
|
"loss": 1.2862, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.057351821989113e-05, |
|
"loss": 1.285, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.045741866769507e-05, |
|
"loss": 1.2885, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.034161028976408e-05, |
|
"loss": 1.2729, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.0226093510027388e-05, |
|
"loss": 1.2783, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.0110868751346678e-05, |
|
"loss": 1.2502, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.999593643551475e-05, |
|
"loss": 1.2787, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9881296983253773e-05, |
|
"loss": 1.2514, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9766950814213946e-05, |
|
"loss": 1.3199, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.966429036520796e-05, |
|
"loss": 1.321, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9550502586578255e-05, |
|
"loss": 1.2912, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9437009302078558e-05, |
|
"loss": 1.2402, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9323810927163365e-05, |
|
"loss": 1.2623, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.921090787620764e-05, |
|
"loss": 1.2941, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 1.2472, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8985989398267557e-05, |
|
"loss": 1.2568, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.887397479462174e-05, |
|
"loss": 1.2569, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8762257161609442e-05, |
|
"loss": 1.2837, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.865083690818521e-05, |
|
"loss": 1.255, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8550813276774915e-05, |
|
"loss": 1.264, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.845103114979575e-05, |
|
"loss": 1.2722, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8340445725584443e-05, |
|
"loss": 1.2828, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8230159225047806e-05, |
|
"loss": 1.2776, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8120172051901564e-05, |
|
"loss": 1.2505, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.801048460876572e-05, |
|
"loss": 1.2663, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7901097297163094e-05, |
|
"loss": 1.2305, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.779201051751783e-05, |
|
"loss": 1.2955, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.768322466915392e-05, |
|
"loss": 1.2682, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7574740150293778e-05, |
|
"loss": 1.2796, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.746655735805681e-05, |
|
"loss": 1.2872, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7380228633595075e-05, |
|
"loss": 1.2768, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7272589946494132e-05, |
|
"loss": 1.2804, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7165254092070015e-05, |
|
"loss": 1.2582, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7058221463237277e-05, |
|
"loss": 1.3296, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.695149245180051e-05, |
|
"loss": 1.2726, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.685569625731185e-05, |
|
"loss": 1.273, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6749545194367288e-05, |
|
"loss": 1.3058, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6643698878761716e-05, |
|
"loss": 1.295, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6538157697957113e-05, |
|
"loss": 1.3008, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.643292203829839e-05, |
|
"loss": 1.2531, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.632799228501215e-05, |
|
"loss": 1.2844, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.622336882220514e-05, |
|
"loss": 1.2638, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6119052032862915e-05, |
|
"loss": 1.2261, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.601504229884846e-05, |
|
"loss": 1.2561, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.5911340000900688e-05, |
|
"loss": 1.2693, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.580794551863316e-05, |
|
"loss": 1.267, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.5704859230532563e-05, |
|
"loss": 1.3048, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.560208151395749e-05, |
|
"loss": 1.2803, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.549961274513695e-05, |
|
"loss": 1.2607, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5407655313570525e-05, |
|
"loss": 1.275, |
|
"step": 14510 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5315948706191573e-05, |
|
"loss": 1.2627, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5214346982990213e-05, |
|
"loss": 1.2514, |
|
"step": 14530 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5113055626887762e-05, |
|
"loss": 1.2496, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5012075008672267e-05, |
|
"loss": 1.3028, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4911405497994235e-05, |
|
"loss": 1.2599, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4811047463365357e-05, |
|
"loss": 1.2633, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4711001272157132e-05, |
|
"loss": 1.2443, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4611267290599528e-05, |
|
"loss": 1.3036, |
|
"step": 14590 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4511845883779607e-05, |
|
"loss": 1.2934, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.4412737415640232e-05, |
|
"loss": 1.2485, |
|
"step": 14610 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.4313942248978752e-05, |
|
"loss": 1.2625, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.4235131935781309e-05, |
|
"loss": 1.272, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.4136901622367581e-05, |
|
"loss": 1.2825, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.403898562015863e-05, |
|
"loss": 1.2842, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3941384287586633e-05, |
|
"loss": 1.2833, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.384409798193188e-05, |
|
"loss": 1.2957, |
|
"step": 14670 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3747127059321474e-05, |
|
"loss": 1.2412, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3650471874727967e-05, |
|
"loss": 1.2911, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3554132781968232e-05, |
|
"loss": 1.3062, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3458110133701962e-05, |
|
"loss": 1.2822, |
|
"step": 14710 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3362404281430497e-05, |
|
"loss": 1.2376, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.3267015575495512e-05, |
|
"loss": 1.2577, |
|
"step": 14730 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.3171944365077748e-05, |
|
"loss": 1.2595, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.307719099819571e-05, |
|
"loss": 1.2946, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.2982755821704372e-05, |
|
"loss": 1.2915, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.288863918129396e-05, |
|
"loss": 1.2599, |
|
"step": 14770 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2794841421488679e-05, |
|
"loss": 1.2552, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2710696364389941e-05, |
|
"loss": 1.2647, |
|
"step": 14790 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2626810128213363e-05, |
|
"loss": 1.2425, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2533907057030315e-05, |
|
"loss": 1.2571, |
|
"step": 14810 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.244132416498789e-05, |
|
"loss": 1.297, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2349061790995841e-05, |
|
"loss": 1.2686, |
|
"step": 14830 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.225712027279059e-05, |
|
"loss": 1.2944, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.21654999469341e-05, |
|
"loss": 1.2394, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.2074201148812537e-05, |
|
"loss": 1.2908, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1983224212635024e-05, |
|
"loss": 1.2721, |
|
"step": 14870 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1892569471432557e-05, |
|
"loss": 1.2818, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1802237257056659e-05, |
|
"loss": 1.2811, |
|
"step": 14890 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.171222790017823e-05, |
|
"loss": 1.2835, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.1622541730286296e-05, |
|
"loss": 1.2731, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.153317907568684e-05, |
|
"loss": 1.2946, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.1444140263501591e-05, |
|
"loss": 1.2726, |
|
"step": 14930 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.135542561966675e-05, |
|
"loss": 1.2807, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.1275859872585081e-05, |
|
"loss": 1.2817, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.1187762042319471e-05, |
|
"loss": 1.2802, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.1108751952271423e-05, |
|
"loss": 1.2721, |
|
"step": 14970 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.1021272099769108e-05, |
|
"loss": 1.2398, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.093411796357211e-05, |
|
"loss": 1.2574, |
|
"step": 14990 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.0847289862717614e-05, |
|
"loss": 1.2228, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.7751675248146057, |
|
"eval_runtime": 62.0277, |
|
"eval_samples_per_second": 8.448, |
|
"eval_steps_per_second": 0.532, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0760788115049313e-05, |
|
"loss": 1.3108, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0674613037216263e-05, |
|
"loss": 1.2385, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0588764944671713e-05, |
|
"loss": 1.2627, |
|
"step": 15030 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0503244151671942e-05, |
|
"loss": 1.2532, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0426555537850258e-05, |
|
"loss": 1.2731, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.034165747546959e-05, |
|
"loss": 1.2618, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.0257087617197447e-05, |
|
"loss": 1.2941, |
|
"step": 15070 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.017284627261097e-05, |
|
"loss": 1.229, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.008893375008475e-05, |
|
"loss": 1.3288, |
|
"step": 15090 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.0005350356789733e-05, |
|
"loss": 1.2818, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.922096398692005e-06, |
|
"loss": 1.2817, |
|
"step": 15110 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.839172180551736e-06, |
|
"loss": 1.2831, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.756578005922001e-06, |
|
"loss": 1.2657, |
|
"step": 15130 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.674314177147791e-06, |
|
"loss": 1.2788, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.592380995364781e-06, |
|
"loss": 1.2736, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.510778760498273e-06, |
|
"loss": 1.262, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.429507771262148e-06, |
|
"loss": 1.2497, |
|
"step": 15170 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.348568325157681e-06, |
|
"loss": 1.2698, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.267960718472513e-06, |
|
"loss": 1.2894, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.187685246279565e-06, |
|
"loss": 1.277, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.107742202435876e-06, |
|
"loss": 1.2803, |
|
"step": 15210 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.028131879581714e-06, |
|
"loss": 1.2451, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.948854569139287e-06, |
|
"loss": 1.241, |
|
"step": 15230 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.8699105613118e-06, |
|
"loss": 1.2558, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.79914616687264e-06, |
|
"loss": 1.2357, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.720836229152817e-06, |
|
"loss": 1.2819, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.642860428733857e-06, |
|
"loss": 1.288, |
|
"step": 15270 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.565219051054663e-06, |
|
"loss": 1.283, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.495627977514654e-06, |
|
"loss": 1.2858, |
|
"step": 15290 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.41862278503991e-06, |
|
"loss": 1.2931, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.341952836151169e-06, |
|
"loss": 1.2803, |
|
"step": 15310 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.265618411507148e-06, |
|
"loss": 1.2773, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.189619790538295e-06, |
|
"loss": 1.2717, |
|
"step": 15330 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.113957251445836e-06, |
|
"loss": 1.2474, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.038631071200698e-06, |
|
"loss": 1.2828, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.963641525542564e-06, |
|
"loss": 1.2829, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.888988888978833e-06, |
|
"loss": 1.2845, |
|
"step": 15370 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.814673434783604e-06, |
|
"loss": 1.2726, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.740695434996626e-06, |
|
"loss": 1.2498, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.667055160422431e-06, |
|
"loss": 1.2746, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.593752880629257e-06, |
|
"loss": 1.271, |
|
"step": 15410 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.52078886394807e-06, |
|
"loss": 1.256, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.448163377471562e-06, |
|
"loss": 1.2778, |
|
"step": 15430 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.375876687053251e-06, |
|
"loss": 1.2898, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.303929057306414e-06, |
|
"loss": 1.2512, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.23232075160315e-06, |
|
"loss": 1.2936, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.161052032073445e-06, |
|
"loss": 1.2946, |
|
"step": 15470 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.097200746323862e-06, |
|
"loss": 1.2764, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.026577958239167e-06, |
|
"loss": 1.2406, |
|
"step": 15490 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.956295509471921e-06, |
|
"loss": 1.2662, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.88635365729865e-06, |
|
"loss": 1.244, |
|
"step": 15510 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.8167526577491034e-06, |
|
"loss": 1.2808, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.747492765605312e-06, |
|
"loss": 1.3011, |
|
"step": 15530 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.678574234400659e-06, |
|
"loss": 1.2447, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.60999731641887e-06, |
|
"loss": 1.2038, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.548570377045693e-06, |
|
"loss": 1.3007, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.480643214749759e-06, |
|
"loss": 1.2823, |
|
"step": 15570 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.413058390224724e-06, |
|
"loss": 1.2388, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.345816150872197e-06, |
|
"loss": 1.2874, |
|
"step": 15590 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.278916742839691e-06, |
|
"loss": 1.2493, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.2123604110197686e-06, |
|
"loss": 1.282, |
|
"step": 15610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.146147399049107e-06, |
|
"loss": 1.2575, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.0802779493076665e-06, |
|
"loss": 1.286, |
|
"step": 15630 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.014752302917681e-06, |
|
"loss": 1.281, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.949570699742935e-06, |
|
"loss": 1.2855, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.8847333783877635e-06, |
|
"loss": 1.2316, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.820240576196223e-06, |
|
"loss": 1.2645, |
|
"step": 15670 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.7560925292512335e-06, |
|
"loss": 1.2897, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.69228947237368e-06, |
|
"loss": 1.2823, |
|
"step": 15690 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.635161880753381e-06, |
|
"loss": 1.2788, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.572014947411885e-06, |
|
"loss": 1.2814, |
|
"step": 15710 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.515478243480177e-06, |
|
"loss": 1.2719, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.452988268147996e-06, |
|
"loss": 1.2618, |
|
"step": 15730 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.390844392429362e-06, |
|
"loss": 1.3436, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.329046843808683e-06, |
|
"loss": 1.2658, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.267595848502604e-06, |
|
"loss": 1.2742, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.2064916314591646e-06, |
|
"loss": 1.2553, |
|
"step": 15770 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.145734416356996e-06, |
|
"loss": 1.2679, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.085324425604499e-06, |
|
"loss": 1.2254, |
|
"step": 15790 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.025261880338994e-06, |
|
"loss": 1.2656, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.965547000425985e-06, |
|
"loss": 1.2524, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.9061800044582385e-06, |
|
"loss": 1.2899, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.853047328501259e-06, |
|
"loss": 1.2837, |
|
"step": 15830 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.794341909691191e-06, |
|
"loss": 1.2689, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.735985001541243e-06, |
|
"loss": 1.2794, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.677976817673235e-06, |
|
"loss": 1.2599, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.62031757043242e-06, |
|
"loss": 1.2905, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.563007470886749e-06, |
|
"loss": 1.2726, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.506046728826075e-06, |
|
"loss": 1.2318, |
|
"step": 15890 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.449435552761372e-06, |
|
"loss": 1.2712, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.398784544532874e-06, |
|
"loss": 1.3048, |
|
"step": 15910 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.342838113724712e-06, |
|
"loss": 1.2803, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.2872418463554055e-06, |
|
"loss": 1.3073, |
|
"step": 15930 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.231995945941125e-06, |
|
"loss": 1.2495, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.1771006147155015e-06, |
|
"loss": 1.2985, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.122556053628868e-06, |
|
"loss": 1.2603, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.068362462347508e-06, |
|
"loss": 1.2751, |
|
"step": 15970 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.014520039252956e-06, |
|
"loss": 1.2502, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.961028981441251e-06, |
|
"loss": 1.2988, |
|
"step": 15990 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.907889484722238e-06, |
|
"loss": 1.2901, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.7744143605232239, |
|
"eval_runtime": 61.9699, |
|
"eval_samples_per_second": 8.456, |
|
"eval_steps_per_second": 0.533, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.855101743618806e-06, |
|
"loss": 1.278, |
|
"step": 16010 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.8026659513662353e-06, |
|
"loss": 1.2782, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7505822999114206e-06, |
|
"loss": 1.266, |
|
"step": 16030 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6988509799122494e-06, |
|
"loss": 1.2606, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.647472180736833e-06, |
|
"loss": 1.2544, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5964460904628685e-06, |
|
"loss": 1.2632, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5457728958768642e-06, |
|
"loss": 1.2793, |
|
"step": 16070 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.495452782473596e-06, |
|
"loss": 1.2691, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.4454859344552835e-06, |
|
"loss": 1.2889, |
|
"step": 16090 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.4008179643440496e-06, |
|
"loss": 1.2798, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3515228234023422e-06, |
|
"loss": 1.2751, |
|
"step": 16110 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.307459683817815e-06, |
|
"loss": 1.2461, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.2588369013774933e-06, |
|
"loss": 1.2488, |
|
"step": 16130 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.210568250480306e-06, |
|
"loss": 1.2522, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.1626539078188687e-06, |
|
"loss": 1.2958, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.1150940487888804e-06, |
|
"loss": 1.2353, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.0678888474883316e-06, |
|
"loss": 1.2498, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.0210384767169975e-06, |
|
"loss": 1.2708, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.97454310797578e-06, |
|
"loss": 1.2368, |
|
"step": 16190 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.9284029114660107e-06, |
|
"loss": 1.2822, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.8826180560888927e-06, |
|
"loss": 1.2863, |
|
"step": 16210 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.837188709444882e-06, |
|
"loss": 1.2477, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.792115037833032e-06, |
|
"loss": 1.2577, |
|
"step": 16230 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.7473972062503905e-06, |
|
"loss": 1.2445, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.707455536371439e-06, |
|
"loss": 1.2237, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.6634142507455885e-06, |
|
"loss": 1.2587, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.624081735149897e-06, |
|
"loss": 1.2759, |
|
"step": 16270 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.580717577477021e-06, |
|
"loss": 1.2786, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.5377100336767545e-06, |
|
"loss": 1.272, |
|
"step": 16290 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.495059261182886e-06, |
|
"loss": 1.2404, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.452765416123215e-06, |
|
"loss": 1.2751, |
|
"step": 16310 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.4108286533189527e-06, |
|
"loss": 1.2624, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.3692491262841785e-06, |
|
"loss": 1.2965, |
|
"step": 16330 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.3280269872252847e-06, |
|
"loss": 1.2947, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.287162387040365e-06, |
|
"loss": 1.2839, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.2506900662738086e-06, |
|
"loss": 1.2637, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.210505200985846e-06, |
|
"loss": 1.2615, |
|
"step": 16370 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1706783047731326e-06, |
|
"loss": 1.2743, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1312095234263807e-06, |
|
"loss": 1.2368, |
|
"step": 16390 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.0920990014253185e-06, |
|
"loss": 1.2542, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.0533468819382893e-06, |
|
"loss": 1.2367, |
|
"step": 16410 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.014953306821632e-06, |
|
"loss": 1.2418, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.976918416619211e-06, |
|
"loss": 1.2555, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.939242350561854e-06, |
|
"loss": 1.2583, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9019252465669046e-06, |
|
"loss": 1.2734, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.8649672412376916e-06, |
|
"loss": 1.2125, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.8283684698629843e-06, |
|
"loss": 1.2853, |
|
"step": 16470 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.7921290664165923e-06, |
|
"loss": 1.2621, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.756249163556778e-06, |
|
"loss": 1.2599, |
|
"step": 16490 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.7207288926258225e-06, |
|
"loss": 1.2865, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.6855683836495383e-06, |
|
"loss": 1.238, |
|
"step": 16510 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.6507677653367915e-06, |
|
"loss": 1.2989, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.6163271650790456e-06, |
|
"loss": 1.2784, |
|
"step": 16530 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.5822467089498304e-06, |
|
"loss": 1.2912, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.5485265217043854e-06, |
|
"loss": 1.2584, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.5184864851265469e-06, |
|
"loss": 1.2535, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.4854511477372047e-06, |
|
"loss": 1.3007, |
|
"step": 16570 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.456027673515925e-06, |
|
"loss": 1.2791, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.4236776225376336e-06, |
|
"loss": 1.2686, |
|
"step": 16590 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.3916884209024705e-06, |
|
"loss": 1.2315, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.3600601857104101e-06, |
|
"loss": 1.2747, |
|
"step": 16610 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.3287930327400167e-06, |
|
"loss": 1.2595, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.2978870764481232e-06, |
|
"loss": 1.2505, |
|
"step": 16630 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.2673424299693204e-06, |
|
"loss": 1.2814, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.2371592051156345e-06, |
|
"loss": 1.2427, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.2073375123760168e-06, |
|
"loss": 1.2477, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1778774609160436e-06, |
|
"loss": 1.2516, |
|
"step": 16670 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1487791585774176e-06, |
|
"loss": 1.2804, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.1200427118776224e-06, |
|
"loss": 1.2826, |
|
"step": 16690 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0916682260095789e-06, |
|
"loss": 1.2703, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.063655804841146e-06, |
|
"loss": 1.2501, |
|
"step": 16710 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0360055509148535e-06, |
|
"loss": 1.2323, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.008717565447448e-06, |
|
"loss": 1.2773, |
|
"step": 16730 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.871480775350161e-07, |
|
"loss": 1.3079, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.605124261266474e-07, |
|
"loss": 1.2767, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.34239319527963e-07, |
|
"loss": 1.2475, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.083288539145196e-07, |
|
"loss": 1.2586, |
|
"step": 16770 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.827811241344131e-07, |
|
"loss": 1.2465, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.575962237078572e-07, |
|
"loss": 1.2841, |
|
"step": 16790 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.327742448269394e-07, |
|
"loss": 1.2984, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.083152783552095e-07, |
|
"loss": 1.2587, |
|
"step": 16810 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.842194138273584e-07, |
|
"loss": 1.2659, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.628436608436595e-07, |
|
"loss": 1.2654, |
|
"step": 16830 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.3943793191662e-07, |
|
"loss": 1.2415, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.163955570664738e-07, |
|
"loss": 1.2256, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.937166206423485e-07, |
|
"loss": 1.2541, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.714012056629693e-07, |
|
"loss": 1.2869, |
|
"step": 16870 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.494493938163038e-07, |
|
"loss": 1.2867, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.278612654593729e-07, |
|
"loss": 1.242, |
|
"step": 16890 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.066368996178517e-07, |
|
"loss": 1.2183, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.85776373985858e-07, |
|
"loss": 1.2517, |
|
"step": 16910 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.652797649255969e-07, |
|
"loss": 1.2363, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.4514714746714e-07, |
|
"loss": 1.2635, |
|
"step": 16930 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.253785953081125e-07, |
|
"loss": 1.2782, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.059741808134621e-07, |
|
"loss": 1.3006, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.869339750151469e-07, |
|
"loss": 1.2425, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.682580476119247e-07, |
|
"loss": 1.276, |
|
"step": 16970 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.499464669690423e-07, |
|
"loss": 1.2827, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.3199930011802446e-07, |
|
"loss": 1.3223, |
|
"step": 16990 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.1441661275645195e-07, |
|
"loss": 1.2453, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 0.7738975882530212, |
|
"eval_runtime": 61.8545, |
|
"eval_samples_per_second": 8.471, |
|
"eval_steps_per_second": 0.534, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.971984692476394e-07, |
|
"loss": 1.2683, |
|
"step": 17010 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.820138772047788e-07, |
|
"loss": 1.2824, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.6548853955771235e-07, |
|
"loss": 1.271, |
|
"step": 17030 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.493279248699355e-07, |
|
"loss": 1.298, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.3353209229913806e-07, |
|
"loss": 1.2695, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.181010996677003e-07, |
|
"loss": 1.2698, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.030350034624374e-07, |
|
"loss": 1.266, |
|
"step": 17070 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.88333858834422e-07, |
|
"loss": 1.3003, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.7399771959880637e-07, |
|
"loss": 1.2441, |
|
"step": 17090 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.600266382345895e-07, |
|
"loss": 1.2973, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.4642066588441705e-07, |
|
"loss": 1.2442, |
|
"step": 17110 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.3317985235443707e-07, |
|
"loss": 1.2901, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.215753710563373e-07, |
|
"loss": 1.2668, |
|
"step": 17130 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.0902849171310356e-07, |
|
"loss": 1.2799, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.968469080681823e-07, |
|
"loss": 1.2652, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.8619584749273167e-07, |
|
"loss": 1.2909, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.747084474202576e-07, |
|
"loss": 1.2791, |
|
"step": 17170 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.6358646867835615e-07, |
|
"loss": 1.269, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.5282995198021565e-07, |
|
"loss": 1.2189, |
|
"step": 17190 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.424389367012613e-07, |
|
"loss": 1.2991, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.3241346087892182e-07, |
|
"loss": 1.2893, |
|
"step": 17210 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.2275356121254077e-07, |
|
"loss": 1.2596, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.1345927306323224e-07, |
|
"loss": 1.2393, |
|
"step": 17230 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.0453063045375855e-07, |
|
"loss": 1.2814, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.596766606836393e-08, |
|
"loss": 1.2632, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.777041125273e-08, |
|
"loss": 1.2705, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.993889601378701e-08, |
|
"loss": 1.2207, |
|
"step": 17270 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.24731490196584e-08, |
|
"loss": 1.2858, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 17285, |
|
"total_flos": 9.82692376847319e+16, |
|
"train_loss": 1.5514631569575188, |
|
"train_runtime": 557290.3071, |
|
"train_samples_per_second": 3.97, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"max_steps": 17285, |
|
"num_train_epochs": 4, |
|
"total_flos": 9.82692376847319e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|