|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 5000, |
|
"global_step": 1049, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009532888465204958, |
|
"grad_norm": 9735.205078125, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 14.3342, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.019065776930409915, |
|
"grad_norm": 12878.38671875, |
|
"learning_rate": 2.2e-06, |
|
"loss": 13.2398, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.028598665395614873, |
|
"grad_norm": 7679.97412109375, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 11.5969, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03813155386081983, |
|
"grad_norm": 1830.318603515625, |
|
"learning_rate": 5.2e-06, |
|
"loss": 7.4151, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.047664442326024785, |
|
"grad_norm": 1004.4345092773438, |
|
"learning_rate": 7.2e-06, |
|
"loss": 5.4187, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.057197330791229746, |
|
"grad_norm": 588.986328125, |
|
"learning_rate": 9.2e-06, |
|
"loss": 4.4843, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0667302192564347, |
|
"grad_norm": 599.4677734375, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 3.5594, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07626310772163966, |
|
"grad_norm": 360.55419921875, |
|
"learning_rate": 1.32e-05, |
|
"loss": 2.8006, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08579599618684461, |
|
"grad_norm": 78.43992614746094, |
|
"learning_rate": 1.52e-05, |
|
"loss": 1.9745, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09532888465204957, |
|
"grad_norm": 49.44837951660156, |
|
"learning_rate": 1.7199999999999998e-05, |
|
"loss": 1.1316, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10486177311725453, |
|
"grad_norm": 25.379404067993164, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.5633, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11439466158245949, |
|
"grad_norm": 57.260284423828125, |
|
"learning_rate": 2.12e-05, |
|
"loss": 0.3606, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12392755004766444, |
|
"grad_norm": 83.0460205078125, |
|
"learning_rate": 2.32e-05, |
|
"loss": 0.3603, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1334604385128694, |
|
"grad_norm": 91.75605773925781, |
|
"learning_rate": 2.5200000000000003e-05, |
|
"loss": 0.2501, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14299332697807435, |
|
"grad_norm": 84.04005432128906, |
|
"learning_rate": 2.7200000000000004e-05, |
|
"loss": 0.2193, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15252621544327932, |
|
"grad_norm": 28.945568084716797, |
|
"learning_rate": 2.9199999999999998e-05, |
|
"loss": 0.2095, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16205910390848427, |
|
"grad_norm": 9.935993194580078, |
|
"learning_rate": 3.12e-05, |
|
"loss": 0.1697, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17159199237368922, |
|
"grad_norm": 5.943849563598633, |
|
"learning_rate": 3.32e-05, |
|
"loss": 0.1945, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1811248808388942, |
|
"grad_norm": 13.284988403320312, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.1817, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19065776930409914, |
|
"grad_norm": 42.371212005615234, |
|
"learning_rate": 3.72e-05, |
|
"loss": 0.5578, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2001906577693041, |
|
"grad_norm": 17.454591751098633, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.2339, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20972354623450906, |
|
"grad_norm": 248.91416931152344, |
|
"learning_rate": 4.12e-05, |
|
"loss": 0.1741, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.219256434699714, |
|
"grad_norm": 1.8945605754852295, |
|
"learning_rate": 4.32e-05, |
|
"loss": 0.1669, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22878932316491898, |
|
"grad_norm": 2.4241456985473633, |
|
"learning_rate": 4.52e-05, |
|
"loss": 0.1693, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23832221163012393, |
|
"grad_norm": 1.1613476276397705, |
|
"learning_rate": 4.72e-05, |
|
"loss": 0.1331, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.24785510009532888, |
|
"grad_norm": 2.931299924850464, |
|
"learning_rate": 4.92e-05, |
|
"loss": 0.1527, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25738798856053385, |
|
"grad_norm": 139.0654754638672, |
|
"learning_rate": 5.1200000000000004e-05, |
|
"loss": 0.1426, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2669208770257388, |
|
"grad_norm": 1.6811221837997437, |
|
"learning_rate": 5.3200000000000006e-05, |
|
"loss": 0.1377, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.27645376549094375, |
|
"grad_norm": 2.773026466369629, |
|
"learning_rate": 5.520000000000001e-05, |
|
"loss": 0.1551, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2859866539561487, |
|
"grad_norm": 0.9235513210296631, |
|
"learning_rate": 5.72e-05, |
|
"loss": 0.1328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29551954242135364, |
|
"grad_norm": 0.634608805179596, |
|
"learning_rate": 5.92e-05, |
|
"loss": 0.1203, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.30505243088655865, |
|
"grad_norm": 1.2987741231918335, |
|
"learning_rate": 6.12e-05, |
|
"loss": 0.1127, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3145853193517636, |
|
"grad_norm": 1.1649960279464722, |
|
"learning_rate": 6.32e-05, |
|
"loss": 0.1228, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.32411820781696854, |
|
"grad_norm": 1.1517277956008911, |
|
"learning_rate": 6.52e-05, |
|
"loss": 0.15, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3336510962821735, |
|
"grad_norm": 2.274184465408325, |
|
"learning_rate": 6.720000000000001e-05, |
|
"loss": 0.1121, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.34318398474737843, |
|
"grad_norm": 0.8627381324768066, |
|
"learning_rate": 6.92e-05, |
|
"loss": 0.1171, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.35271687321258344, |
|
"grad_norm": 1.2641953229904175, |
|
"learning_rate": 7.12e-05, |
|
"loss": 0.1437, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3622497616777884, |
|
"grad_norm": 14.235816955566406, |
|
"learning_rate": 7.32e-05, |
|
"loss": 0.1719, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.37178265014299333, |
|
"grad_norm": 1.2613039016723633, |
|
"learning_rate": 7.52e-05, |
|
"loss": 0.1401, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3813155386081983, |
|
"grad_norm": 1.125999927520752, |
|
"learning_rate": 7.72e-05, |
|
"loss": 0.1176, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3908484270734032, |
|
"grad_norm": 2.6700384616851807, |
|
"learning_rate": 7.920000000000001e-05, |
|
"loss": 0.116, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4003813155386082, |
|
"grad_norm": 0.9148916602134705, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 0.1392, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4099142040038132, |
|
"grad_norm": 1.3875360488891602, |
|
"learning_rate": 8.32e-05, |
|
"loss": 0.168, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4194470924690181, |
|
"grad_norm": 0.6264955997467041, |
|
"learning_rate": 8.52e-05, |
|
"loss": 0.1484, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.42897998093422307, |
|
"grad_norm": 1.0963646173477173, |
|
"learning_rate": 8.72e-05, |
|
"loss": 0.1457, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.438512869399428, |
|
"grad_norm": 0.6666226387023926, |
|
"learning_rate": 8.92e-05, |
|
"loss": 0.1311, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.44804575786463297, |
|
"grad_norm": 0.9684863686561584, |
|
"learning_rate": 9.120000000000001e-05, |
|
"loss": 0.1428, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.45757864632983797, |
|
"grad_norm": 0.6623441576957703, |
|
"learning_rate": 9.320000000000002e-05, |
|
"loss": 0.1317, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4671115347950429, |
|
"grad_norm": 14.834782600402832, |
|
"learning_rate": 9.52e-05, |
|
"loss": 0.2165, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.47664442326024786, |
|
"grad_norm": 2.777594566345215, |
|
"learning_rate": 9.72e-05, |
|
"loss": 0.2189, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4861773117254528, |
|
"grad_norm": 0.6071160435676575, |
|
"learning_rate": 9.92e-05, |
|
"loss": 0.1401, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.49571020019065776, |
|
"grad_norm": 0.9390038251876831, |
|
"learning_rate": 9.890710382513662e-05, |
|
"loss": 0.2272, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5052430886558628, |
|
"grad_norm": 0.527932345867157, |
|
"learning_rate": 9.708561020036431e-05, |
|
"loss": 0.1311, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5147759771210677, |
|
"grad_norm": 0.7770779728889465, |
|
"learning_rate": 9.5264116575592e-05, |
|
"loss": 0.1278, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5243088655862727, |
|
"grad_norm": 0.5907878279685974, |
|
"learning_rate": 9.344262295081968e-05, |
|
"loss": 0.1331, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5338417540514776, |
|
"grad_norm": 0.8410826325416565, |
|
"learning_rate": 9.162112932604736e-05, |
|
"loss": 0.1274, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5433746425166825, |
|
"grad_norm": 7.286756992340088, |
|
"learning_rate": 8.979963570127505e-05, |
|
"loss": 0.108, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5529075309818875, |
|
"grad_norm": 1.711925745010376, |
|
"learning_rate": 8.797814207650273e-05, |
|
"loss": 0.1128, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5624404194470924, |
|
"grad_norm": 1.0234737396240234, |
|
"learning_rate": 8.615664845173043e-05, |
|
"loss": 0.1405, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5719733079122974, |
|
"grad_norm": 12.920223236083984, |
|
"learning_rate": 8.433515482695811e-05, |
|
"loss": 0.3122, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5815061963775023, |
|
"grad_norm": 1.8921475410461426, |
|
"learning_rate": 8.25136612021858e-05, |
|
"loss": 0.1527, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5910390848427073, |
|
"grad_norm": 0.22176694869995117, |
|
"learning_rate": 8.069216757741349e-05, |
|
"loss": 0.0753, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6005719733079123, |
|
"grad_norm": 5.071296691894531, |
|
"learning_rate": 7.887067395264117e-05, |
|
"loss": 0.1564, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6101048617731173, |
|
"grad_norm": 0.5505660772323608, |
|
"learning_rate": 7.704918032786885e-05, |
|
"loss": 0.1145, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6196377502383222, |
|
"grad_norm": 10.267545700073242, |
|
"learning_rate": 7.522768670309654e-05, |
|
"loss": 0.1342, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6291706387035272, |
|
"grad_norm": 0.65236496925354, |
|
"learning_rate": 7.340619307832422e-05, |
|
"loss": 0.156, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6387035271687321, |
|
"grad_norm": 117.93495178222656, |
|
"learning_rate": 7.158469945355192e-05, |
|
"loss": 0.1338, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6482364156339371, |
|
"grad_norm": 0.8004688024520874, |
|
"learning_rate": 6.97632058287796e-05, |
|
"loss": 0.1319, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.657769304099142, |
|
"grad_norm": 0.7219289541244507, |
|
"learning_rate": 6.79417122040073e-05, |
|
"loss": 0.1115, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.667302192564347, |
|
"grad_norm": 0.35620227456092834, |
|
"learning_rate": 6.612021857923498e-05, |
|
"loss": 0.1471, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6768350810295519, |
|
"grad_norm": 1.245010495185852, |
|
"learning_rate": 6.429872495446266e-05, |
|
"loss": 0.1107, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6863679694947569, |
|
"grad_norm": 0.7738503217697144, |
|
"learning_rate": 6.247723132969034e-05, |
|
"loss": 0.1153, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6959008579599618, |
|
"grad_norm": 0.8265978693962097, |
|
"learning_rate": 6.0655737704918034e-05, |
|
"loss": 0.1653, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7054337464251669, |
|
"grad_norm": 0.6407246589660645, |
|
"learning_rate": 5.8834244080145716e-05, |
|
"loss": 0.1236, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7149666348903718, |
|
"grad_norm": 1.0173193216323853, |
|
"learning_rate": 5.701275045537341e-05, |
|
"loss": 0.1167, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7244995233555768, |
|
"grad_norm": 0.8038718104362488, |
|
"learning_rate": 5.519125683060109e-05, |
|
"loss": 0.1183, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7340324118207817, |
|
"grad_norm": 0.5294243097305298, |
|
"learning_rate": 5.336976320582878e-05, |
|
"loss": 0.1066, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7435653002859867, |
|
"grad_norm": 0.5142996311187744, |
|
"learning_rate": 5.1548269581056475e-05, |
|
"loss": 0.1085, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7530981887511916, |
|
"grad_norm": 0.5086462497711182, |
|
"learning_rate": 4.9726775956284156e-05, |
|
"loss": 0.1077, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7626310772163966, |
|
"grad_norm": 0.3155575394630432, |
|
"learning_rate": 4.7905282331511844e-05, |
|
"loss": 0.0976, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7721639656816015, |
|
"grad_norm": 0.6338363289833069, |
|
"learning_rate": 4.6083788706739525e-05, |
|
"loss": 0.104, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7816968541468065, |
|
"grad_norm": 0.6568697094917297, |
|
"learning_rate": 4.426229508196721e-05, |
|
"loss": 0.1092, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7912297426120114, |
|
"grad_norm": 0.5086967349052429, |
|
"learning_rate": 4.24408014571949e-05, |
|
"loss": 0.1209, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8007626310772163, |
|
"grad_norm": 0.466325044631958, |
|
"learning_rate": 4.061930783242259e-05, |
|
"loss": 0.1113, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8102955195424214, |
|
"grad_norm": 0.8668030500411987, |
|
"learning_rate": 3.879781420765027e-05, |
|
"loss": 0.1015, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8198284080076264, |
|
"grad_norm": 0.7336855530738831, |
|
"learning_rate": 3.697632058287796e-05, |
|
"loss": 0.0947, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8293612964728313, |
|
"grad_norm": 0.8070642948150635, |
|
"learning_rate": 3.515482695810565e-05, |
|
"loss": 0.1254, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8388941849380362, |
|
"grad_norm": 0.5929535031318665, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0781, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8484270734032412, |
|
"grad_norm": 0.585108757019043, |
|
"learning_rate": 3.1511839708561016e-05, |
|
"loss": 0.0882, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8579599618684461, |
|
"grad_norm": 0.6924892067909241, |
|
"learning_rate": 2.9690346083788704e-05, |
|
"loss": 0.1026, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8674928503336511, |
|
"grad_norm": 0.968221127986908, |
|
"learning_rate": 2.7868852459016392e-05, |
|
"loss": 0.109, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.877025738798856, |
|
"grad_norm": 0.5783460736274719, |
|
"learning_rate": 2.604735883424408e-05, |
|
"loss": 0.0762, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.886558627264061, |
|
"grad_norm": 0.44456052780151367, |
|
"learning_rate": 2.422586520947177e-05, |
|
"loss": 0.0963, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8960915157292659, |
|
"grad_norm": 0.49880489706993103, |
|
"learning_rate": 2.2404371584699453e-05, |
|
"loss": 0.0988, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9056244041944709, |
|
"grad_norm": 0.5427205562591553, |
|
"learning_rate": 2.058287795992714e-05, |
|
"loss": 0.0909, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9151572926596759, |
|
"grad_norm": 0.5747605562210083, |
|
"learning_rate": 1.8761384335154826e-05, |
|
"loss": 0.0983, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9246901811248809, |
|
"grad_norm": 0.8023502230644226, |
|
"learning_rate": 1.6939890710382514e-05, |
|
"loss": 0.1178, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9342230695900858, |
|
"grad_norm": 0.5023919939994812, |
|
"learning_rate": 1.51183970856102e-05, |
|
"loss": 0.0837, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9437559580552908, |
|
"grad_norm": 0.5053642392158508, |
|
"learning_rate": 1.3296903460837887e-05, |
|
"loss": 0.0964, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9532888465204957, |
|
"grad_norm": 0.9289111495018005, |
|
"learning_rate": 1.1475409836065575e-05, |
|
"loss": 0.1087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9628217349857007, |
|
"grad_norm": 0.6367629766464233, |
|
"learning_rate": 9.653916211293261e-06, |
|
"loss": 0.0936, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9723546234509056, |
|
"grad_norm": 0.7083348035812378, |
|
"learning_rate": 7.832422586520947e-06, |
|
"loss": 0.0905, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9818875119161106, |
|
"grad_norm": 0.9046676158905029, |
|
"learning_rate": 6.010928961748634e-06, |
|
"loss": 0.1157, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9914204003813155, |
|
"grad_norm": 0.5118803977966309, |
|
"learning_rate": 4.189435336976321e-06, |
|
"loss": 0.104, |
|
"step": 1040 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1049, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.394667177161523e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|