|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9956122856003191, |
|
"eval_steps": 500, |
|
"global_step": 117, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008509506714532641, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.8719, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017019013429065283, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.8402, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.025528520143597924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8438, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.034038026858130566, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.8171, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04254753357266321, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.8029, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05105704028719585, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7858, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0595665470017285, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.7671, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06807605371626113, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.7989, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07658556043079377, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7546, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08509506714532641, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.745, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09360457385985906, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.7169, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1021140805743917, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7561, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11062358728892434, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.8475, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.119133094003457, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.9401, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12764260071798963, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8515, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13615210743252226, |
|
"learning_rate": 4.9509803921568634e-05, |
|
"loss": 0.8282, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.14466161414705492, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 0.7526, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.15317112086158755, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 0.709, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1616806275761202, |
|
"learning_rate": 4.803921568627452e-05, |
|
"loss": 0.7005, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17019013429065283, |
|
"learning_rate": 4.7549019607843135e-05, |
|
"loss": 0.6978, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17869964100518548, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.6689, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1872091477197181, |
|
"learning_rate": 4.656862745098039e-05, |
|
"loss": 0.7231, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.19571865443425077, |
|
"learning_rate": 4.607843137254902e-05, |
|
"loss": 0.6685, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.2042281611487834, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 0.6776, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.21273766786331605, |
|
"learning_rate": 4.5098039215686275e-05, |
|
"loss": 0.6853, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22124717457784868, |
|
"learning_rate": 4.460784313725491e-05, |
|
"loss": 0.7705, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.22975668129238133, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.7922, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.238266188006914, |
|
"learning_rate": 4.362745098039216e-05, |
|
"loss": 0.6948, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.24677569472144661, |
|
"learning_rate": 4.313725490196079e-05, |
|
"loss": 0.6748, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.25528520143597927, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 0.7134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2637947081505119, |
|
"learning_rate": 4.215686274509804e-05, |
|
"loss": 0.6921, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2723042148650445, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6839, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2808137215795772, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.6808, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.28932322829410984, |
|
"learning_rate": 4.068627450980392e-05, |
|
"loss": 0.6939, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2978327350086425, |
|
"learning_rate": 4.0196078431372555e-05, |
|
"loss": 0.6846, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3063422417231751, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 0.6456, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.31485174843770775, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 0.6725, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3233612551522404, |
|
"learning_rate": 3.872549019607844e-05, |
|
"loss": 0.682, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.33187076186677306, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.7029, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.34038026858130566, |
|
"learning_rate": 3.774509803921569e-05, |
|
"loss": 0.7072, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3488897752958383, |
|
"learning_rate": 3.725490196078432e-05, |
|
"loss": 0.6925, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.35739928201037097, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 0.6642, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3659087887249036, |
|
"learning_rate": 3.627450980392157e-05, |
|
"loss": 0.6483, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3744182954394362, |
|
"learning_rate": 3.5784313725490195e-05, |
|
"loss": 0.6779, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3829278021539689, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.6664, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.39143730886850153, |
|
"learning_rate": 3.480392156862745e-05, |
|
"loss": 0.6493, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3999468155830342, |
|
"learning_rate": 3.431372549019608e-05, |
|
"loss": 0.6853, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4084563222975668, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 0.6549, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.41696582901209944, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6659, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4254753357266321, |
|
"learning_rate": 3.284313725490196e-05, |
|
"loss": 0.6639, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.43398484244116475, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.6583, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.44249434915569735, |
|
"learning_rate": 3.186274509803922e-05, |
|
"loss": 0.6585, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.45100385587023, |
|
"learning_rate": 3.137254901960784e-05, |
|
"loss": 0.6753, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.45951336258476266, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.6712, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.4680228692992953, |
|
"learning_rate": 3.0392156862745097e-05, |
|
"loss": 0.6632, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.476532376013828, |
|
"learning_rate": 2.9901960784313725e-05, |
|
"loss": 0.6673, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4850418827283606, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.6444, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.49355138944289323, |
|
"learning_rate": 2.8921568627450986e-05, |
|
"loss": 0.6673, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5020608961574259, |
|
"learning_rate": 2.8431372549019608e-05, |
|
"loss": 0.6629, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5105704028719585, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 0.6723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5190799095864912, |
|
"learning_rate": 2.7450980392156865e-05, |
|
"loss": 0.6622, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5275894163010239, |
|
"learning_rate": 2.696078431372549e-05, |
|
"loss": 0.6606, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5360989230155564, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.6823, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.544608429730089, |
|
"learning_rate": 2.5980392156862747e-05, |
|
"loss": 0.6519, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5531179364446217, |
|
"learning_rate": 2.5490196078431373e-05, |
|
"loss": 0.6702, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5616274431591544, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6463, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.570136949873687, |
|
"learning_rate": 2.4509803921568626e-05, |
|
"loss": 0.6604, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5786464565882197, |
|
"learning_rate": 2.401960784313726e-05, |
|
"loss": 0.6535, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.6605, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.595665470017285, |
|
"learning_rate": 2.303921568627451e-05, |
|
"loss": 0.6514, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6041749767318175, |
|
"learning_rate": 2.2549019607843138e-05, |
|
"loss": 0.6533, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6126844834463502, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 0.6564, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6211939901608828, |
|
"learning_rate": 2.1568627450980395e-05, |
|
"loss": 0.6679, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6297034968754155, |
|
"learning_rate": 2.107843137254902e-05, |
|
"loss": 0.6476, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6382130035899481, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.6917, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6467225103044808, |
|
"learning_rate": 2.0098039215686277e-05, |
|
"loss": 0.6565, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6552320170190135, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 0.6329, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6637415237335461, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 0.6149, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6722510304480788, |
|
"learning_rate": 1.862745098039216e-05, |
|
"loss": 0.6799, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6807605371626113, |
|
"learning_rate": 1.8137254901960785e-05, |
|
"loss": 0.6458, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.689270043877144, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.6376, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.6977795505916766, |
|
"learning_rate": 1.715686274509804e-05, |
|
"loss": 0.6688, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7062890573062093, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.6466, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7147985640207419, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 0.6386, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7233080707352746, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.6427, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7318175774498072, |
|
"learning_rate": 1.5196078431372548e-05, |
|
"loss": 0.6617, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7403270841643399, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.6573, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7488365908788724, |
|
"learning_rate": 1.4215686274509804e-05, |
|
"loss": 0.6342, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7573460975934051, |
|
"learning_rate": 1.3725490196078432e-05, |
|
"loss": 0.6455, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7658556043079378, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.6042, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7743651110224704, |
|
"learning_rate": 1.2745098039215686e-05, |
|
"loss": 0.639, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7828746177370031, |
|
"learning_rate": 1.2254901960784313e-05, |
|
"loss": 0.6496, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.7913841244515357, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.6474, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.7998936311660684, |
|
"learning_rate": 1.1274509803921569e-05, |
|
"loss": 0.6418, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.808403137880601, |
|
"learning_rate": 1.0784313725490197e-05, |
|
"loss": 0.6434, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8169126445951336, |
|
"learning_rate": 1.0294117647058824e-05, |
|
"loss": 0.659, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8254221513096662, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.6342, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8339316580241989, |
|
"learning_rate": 9.31372549019608e-06, |
|
"loss": 0.647, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8424411647387315, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.6306, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8509506714532642, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.6724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8594601781677969, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.6455, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8679696848823295, |
|
"learning_rate": 7.3529411764705884e-06, |
|
"loss": 0.634, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8764791915968622, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 0.6353, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.8849886983113947, |
|
"learning_rate": 6.372549019607843e-06, |
|
"loss": 0.6632, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.8934982050259274, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.6616, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.90200771174046, |
|
"learning_rate": 5.392156862745099e-06, |
|
"loss": 0.6312, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9105172184549927, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 0.6592, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9190267251695253, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.6634, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.927536231884058, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.6428, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9360457385985906, |
|
"learning_rate": 3.431372549019608e-06, |
|
"loss": 0.6261, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9445552453131233, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.6425, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.953064752027656, |
|
"learning_rate": 2.450980392156863e-06, |
|
"loss": 0.6614, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9615742587421885, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.6545, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.9700837654567211, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 0.6184, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9785932721712538, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.6671, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9871027788857865, |
|
"learning_rate": 4.901960784313725e-07, |
|
"loss": 0.65, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.9956122856003191, |
|
"learning_rate": 0.0, |
|
"loss": 0.6446, |
|
"step": 117 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 117, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.689042619882799e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|