|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.433962264150944, |
|
"eval_steps": 500, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5241090146750524, |
|
"grad_norm": 14.80826473236084, |
|
"learning_rate": 4.97e-05, |
|
"loss": 1.8113, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5241090146750524, |
|
"eval_accuracy": 0.6428945988463556, |
|
"eval_f1_class_0": 0.632183908045977, |
|
"eval_f1_class_1": 0.9472222222222223, |
|
"eval_f1_class_2": 0.3657331136738056, |
|
"eval_f1_class_3": 0.5542168674698795, |
|
"eval_f1_class_4": 0.6165254237288136, |
|
"eval_f1_class_5": 0.6261808367071524, |
|
"eval_f1_class_6": 0.6833541927409262, |
|
"eval_f1_class_7": 0.5899053627760252, |
|
"eval_f1_class_8": 0.662280701754386, |
|
"eval_f1_class_9": 0.6956521739130435, |
|
"eval_f1_macro": 0.6373254803032232, |
|
"eval_f1_micro": 0.6428945988463556, |
|
"eval_f1_weighted": 0.6376303124409798, |
|
"eval_loss": 1.1190885305404663, |
|
"eval_recall_weighted": 0.6428945988463556, |
|
"eval_runtime": 18.5604, |
|
"eval_samples_per_second": 205.491, |
|
"eval_steps_per_second": 6.465, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0482180293501049, |
|
"grad_norm": 6.733876705169678, |
|
"learning_rate": 4.725110619469027e-05, |
|
"loss": 0.885, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0482180293501049, |
|
"eval_accuracy": 0.8185631882538018, |
|
"eval_f1_class_0": 0.7974522292993631, |
|
"eval_f1_class_1": 0.9651324965132496, |
|
"eval_f1_class_2": 0.6306569343065694, |
|
"eval_f1_class_3": 0.7193229901269393, |
|
"eval_f1_class_4": 0.8341584158415841, |
|
"eval_f1_class_5": 0.807061790668348, |
|
"eval_f1_class_6": 0.8753246753246754, |
|
"eval_f1_class_7": 0.8368794326241134, |
|
"eval_f1_class_8": 0.8787446504992867, |
|
"eval_f1_class_9": 0.8255528255528255, |
|
"eval_f1_macro": 0.8170286440756953, |
|
"eval_f1_micro": 0.8185631882538018, |
|
"eval_f1_weighted": 0.8175319894578669, |
|
"eval_loss": 0.6289433836936951, |
|
"eval_recall_weighted": 0.8185631882538018, |
|
"eval_runtime": 18.5795, |
|
"eval_samples_per_second": 205.28, |
|
"eval_steps_per_second": 6.459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5723270440251573, |
|
"grad_norm": 21.78145408630371, |
|
"learning_rate": 4.449115044247788e-05, |
|
"loss": 0.4651, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5723270440251573, |
|
"eval_accuracy": 0.8615626638699528, |
|
"eval_f1_class_0": 0.8236877523553162, |
|
"eval_f1_class_1": 0.9653259361997226, |
|
"eval_f1_class_2": 0.6986128625472887, |
|
"eval_f1_class_3": 0.8308457711442785, |
|
"eval_f1_class_4": 0.8667563930013459, |
|
"eval_f1_class_5": 0.8346456692913385, |
|
"eval_f1_class_6": 0.8898305084745762, |
|
"eval_f1_class_7": 0.9136420525657072, |
|
"eval_f1_class_8": 0.8668280871670703, |
|
"eval_f1_class_9": 0.9410150891632374, |
|
"eval_f1_macro": 0.8631190121909882, |
|
"eval_f1_micro": 0.8615626638699528, |
|
"eval_f1_weighted": 0.8631956720576096, |
|
"eval_loss": 0.5186640024185181, |
|
"eval_recall_weighted": 0.8615626638699528, |
|
"eval_runtime": 18.5317, |
|
"eval_samples_per_second": 205.809, |
|
"eval_steps_per_second": 6.475, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0964360587002098, |
|
"grad_norm": 6.887778282165527, |
|
"learning_rate": 4.172566371681416e-05, |
|
"loss": 0.3292, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0964360587002098, |
|
"eval_accuracy": 0.9003670686942842, |
|
"eval_f1_class_0": 0.8623188405797102, |
|
"eval_f1_class_1": 0.9615384615384616, |
|
"eval_f1_class_2": 0.7830045523520485, |
|
"eval_f1_class_3": 0.9002624671916011, |
|
"eval_f1_class_4": 0.8952618453865336, |
|
"eval_f1_class_5": 0.8938271604938272, |
|
"eval_f1_class_6": 0.9402173913043479, |
|
"eval_f1_class_7": 0.9341935483870967, |
|
"eval_f1_class_8": 0.9295039164490861, |
|
"eval_f1_class_9": 0.8950131233595802, |
|
"eval_f1_macro": 0.8995141307042294, |
|
"eval_f1_micro": 0.9003670686942842, |
|
"eval_f1_weighted": 0.899989630913346, |
|
"eval_loss": 0.37930119037628174, |
|
"eval_recall_weighted": 0.9003670686942842, |
|
"eval_runtime": 18.5937, |
|
"eval_samples_per_second": 205.123, |
|
"eval_steps_per_second": 6.454, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.620545073375262, |
|
"grad_norm": 18.899120330810547, |
|
"learning_rate": 3.896017699115044e-05, |
|
"loss": 0.2064, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.620545073375262, |
|
"eval_accuracy": 0.916885159937074, |
|
"eval_f1_class_0": 0.8660049627791564, |
|
"eval_f1_class_1": 0.9670329670329672, |
|
"eval_f1_class_2": 0.8037889039242219, |
|
"eval_f1_class_3": 0.9132653061224489, |
|
"eval_f1_class_4": 0.9252217997465145, |
|
"eval_f1_class_5": 0.896719319562576, |
|
"eval_f1_class_6": 0.9539295392953929, |
|
"eval_f1_class_7": 0.9450000000000001, |
|
"eval_f1_class_8": 0.9398601398601398, |
|
"eval_f1_class_9": 0.9660056657223797, |
|
"eval_f1_macro": 0.9176828604045797, |
|
"eval_f1_micro": 0.916885159937074, |
|
"eval_f1_weighted": 0.9176253934359663, |
|
"eval_loss": 0.34075525403022766, |
|
"eval_recall_weighted": 0.916885159937074, |
|
"eval_runtime": 18.4847, |
|
"eval_samples_per_second": 206.333, |
|
"eval_steps_per_second": 6.492, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.1446540880503147, |
|
"grad_norm": 1.2899887561798096, |
|
"learning_rate": 3.619469026548672e-05, |
|
"loss": 0.1562, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.1446540880503147, |
|
"eval_accuracy": 0.9326166754063975, |
|
"eval_f1_class_0": 0.8903061224489796, |
|
"eval_f1_class_1": 0.9594594594594594, |
|
"eval_f1_class_2": 0.8427128427128427, |
|
"eval_f1_class_3": 0.9363867684478372, |
|
"eval_f1_class_4": 0.9300000000000002, |
|
"eval_f1_class_5": 0.9108433734939759, |
|
"eval_f1_class_6": 0.9784366576819407, |
|
"eval_f1_class_7": 0.9619289340101522, |
|
"eval_f1_class_8": 0.9572192513368984, |
|
"eval_f1_class_9": 0.9567642956764295, |
|
"eval_f1_macro": 0.9324057705268516, |
|
"eval_f1_micro": 0.9326166754063975, |
|
"eval_f1_weighted": 0.9323951969454738, |
|
"eval_loss": 0.29545533657073975, |
|
"eval_recall_weighted": 0.9326166754063975, |
|
"eval_runtime": 18.4597, |
|
"eval_samples_per_second": 206.612, |
|
"eval_steps_per_second": 6.501, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.668763102725367, |
|
"grad_norm": 7.331942558288574, |
|
"learning_rate": 3.342920353982301e-05, |
|
"loss": 0.1097, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.668763102725367, |
|
"eval_accuracy": 0.936811746198217, |
|
"eval_f1_class_0": 0.8847497089639115, |
|
"eval_f1_class_1": 0.967391304347826, |
|
"eval_f1_class_2": 0.8646153846153847, |
|
"eval_f1_class_3": 0.9375830013280213, |
|
"eval_f1_class_4": 0.9284818067754077, |
|
"eval_f1_class_5": 0.9315068493150684, |
|
"eval_f1_class_6": 0.9733333333333333, |
|
"eval_f1_class_7": 0.9563046192259677, |
|
"eval_f1_class_8": 0.9731903485254693, |
|
"eval_f1_class_9": 0.9495225102319237, |
|
"eval_f1_macro": 0.9366678866662314, |
|
"eval_f1_micro": 0.936811746198217, |
|
"eval_f1_weighted": 0.9365912559521494, |
|
"eval_loss": 0.3290887176990509, |
|
"eval_recall_weighted": 0.936811746198217, |
|
"eval_runtime": 18.5309, |
|
"eval_samples_per_second": 205.818, |
|
"eval_steps_per_second": 6.476, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.1928721174004195, |
|
"grad_norm": 0.010240758769214153, |
|
"learning_rate": 3.066924778761062e-05, |
|
"loss": 0.1123, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.1928721174004195, |
|
"eval_accuracy": 0.9431043523859465, |
|
"eval_f1_class_0": 0.9055118110236221, |
|
"eval_f1_class_1": 0.9780821917808219, |
|
"eval_f1_class_2": 0.8661870503597122, |
|
"eval_f1_class_3": 0.9431524547803619, |
|
"eval_f1_class_4": 0.9369592088998764, |
|
"eval_f1_class_5": 0.9245742092457422, |
|
"eval_f1_class_6": 0.9734042553191489, |
|
"eval_f1_class_7": 0.958904109589041, |
|
"eval_f1_class_8": 0.9736842105263158, |
|
"eval_f1_class_9": 0.9680998613037449, |
|
"eval_f1_macro": 0.9428559362828388, |
|
"eval_f1_micro": 0.9431043523859465, |
|
"eval_f1_weighted": 0.9427303581875908, |
|
"eval_loss": 0.3067891001701355, |
|
"eval_recall_weighted": 0.9431043523859465, |
|
"eval_runtime": 18.589, |
|
"eval_samples_per_second": 205.175, |
|
"eval_steps_per_second": 6.455, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.716981132075472, |
|
"grad_norm": 0.4483562707901001, |
|
"learning_rate": 2.7903761061946903e-05, |
|
"loss": 0.0732, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.716981132075472, |
|
"eval_accuracy": 0.940744625065548, |
|
"eval_f1_class_0": 0.8982826948480845, |
|
"eval_f1_class_1": 0.9741496598639455, |
|
"eval_f1_class_2": 0.8587570621468926, |
|
"eval_f1_class_3": 0.940127388535032, |
|
"eval_f1_class_4": 0.9477707006369427, |
|
"eval_f1_class_5": 0.9290012033694345, |
|
"eval_f1_class_6": 0.972972972972973, |
|
"eval_f1_class_7": 0.9485294117647058, |
|
"eval_f1_class_8": 0.9693741677762983, |
|
"eval_f1_class_9": 0.9666666666666666, |
|
"eval_f1_macro": 0.9405631928580975, |
|
"eval_f1_micro": 0.940744625065548, |
|
"eval_f1_weighted": 0.9405207858122142, |
|
"eval_loss": 0.3208290636539459, |
|
"eval_recall_weighted": 0.940744625065548, |
|
"eval_runtime": 18.5774, |
|
"eval_samples_per_second": 205.303, |
|
"eval_steps_per_second": 6.459, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.241090146750524, |
|
"grad_norm": 0.00789484940469265, |
|
"learning_rate": 2.5138274336283185e-05, |
|
"loss": 0.0582, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.241090146750524, |
|
"eval_accuracy": 0.9444153120083901, |
|
"eval_f1_class_0": 0.8981132075471698, |
|
"eval_f1_class_1": 0.9780821917808219, |
|
"eval_f1_class_2": 0.8748241912798874, |
|
"eval_f1_class_3": 0.9479166666666665, |
|
"eval_f1_class_4": 0.9436795994993743, |
|
"eval_f1_class_5": 0.942189421894219, |
|
"eval_f1_class_6": 0.9613259668508287, |
|
"eval_f1_class_7": 0.9551122194513716, |
|
"eval_f1_class_8": 0.9713541666666666, |
|
"eval_f1_class_9": 0.9721448467966574, |
|
"eval_f1_macro": 0.9444742478433662, |
|
"eval_f1_micro": 0.9444153120083901, |
|
"eval_f1_weighted": 0.9443976005879655, |
|
"eval_loss": 0.3178301155567169, |
|
"eval_recall_weighted": 0.9444153120083901, |
|
"eval_runtime": 18.6189, |
|
"eval_samples_per_second": 204.845, |
|
"eval_steps_per_second": 6.445, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.765199161425577, |
|
"grad_norm": 0.2585061490535736, |
|
"learning_rate": 2.237278761061947e-05, |
|
"loss": 0.0533, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.765199161425577, |
|
"eval_accuracy": 0.951232302045097, |
|
"eval_f1_class_0": 0.9144316730523627, |
|
"eval_f1_class_1": 0.9754768392370572, |
|
"eval_f1_class_2": 0.8732782369146006, |
|
"eval_f1_class_3": 0.9662337662337663, |
|
"eval_f1_class_4": 0.958974358974359, |
|
"eval_f1_class_5": 0.9410288582183187, |
|
"eval_f1_class_6": 0.9840848806366048, |
|
"eval_f1_class_7": 0.9597989949748743, |
|
"eval_f1_class_8": 0.9712793733681462, |
|
"eval_f1_class_9": 0.9667590027700832, |
|
"eval_f1_macro": 0.9511345984380173, |
|
"eval_f1_micro": 0.9512323020450971, |
|
"eval_f1_weighted": 0.9512610025073973, |
|
"eval_loss": 0.30544909834861755, |
|
"eval_recall_weighted": 0.951232302045097, |
|
"eval_runtime": 18.5675, |
|
"eval_samples_per_second": 205.412, |
|
"eval_steps_per_second": 6.463, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.289308176100629, |
|
"grad_norm": 0.020399658009409904, |
|
"learning_rate": 1.9607300884955755e-05, |
|
"loss": 0.0424, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.289308176100629, |
|
"eval_accuracy": 0.9520188778185632, |
|
"eval_f1_class_0": 0.9041450777202072, |
|
"eval_f1_class_1": 0.9782016348773842, |
|
"eval_f1_class_2": 0.8834019204389574, |
|
"eval_f1_class_3": 0.9584415584415584, |
|
"eval_f1_class_4": 0.9491094147582698, |
|
"eval_f1_class_5": 0.9521472392638036, |
|
"eval_f1_class_6": 0.9824086603518267, |
|
"eval_f1_class_7": 0.9638854296388544, |
|
"eval_f1_class_8": 0.9814323607427056, |
|
"eval_f1_class_9": 0.9669421487603306, |
|
"eval_f1_macro": 0.9520115444993896, |
|
"eval_f1_micro": 0.9520188778185632, |
|
"eval_f1_weighted": 0.9520110205225185, |
|
"eval_loss": 0.30924680829048157, |
|
"eval_recall_weighted": 0.9520188778185632, |
|
"eval_runtime": 18.5678, |
|
"eval_samples_per_second": 205.41, |
|
"eval_steps_per_second": 6.463, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.813417190775681, |
|
"grad_norm": 3.3116018772125244, |
|
"learning_rate": 1.6841814159292034e-05, |
|
"loss": 0.0378, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.813417190775681, |
|
"eval_accuracy": 0.9530676455165181, |
|
"eval_f1_class_0": 0.9155107187894074, |
|
"eval_f1_class_1": 0.9768707482993196, |
|
"eval_f1_class_2": 0.8838526912181304, |
|
"eval_f1_class_3": 0.9488491048593349, |
|
"eval_f1_class_4": 0.951885565669701, |
|
"eval_f1_class_5": 0.9431680773881501, |
|
"eval_f1_class_6": 0.9813829787234043, |
|
"eval_f1_class_7": 0.9669211195928754, |
|
"eval_f1_class_8": 0.9868073878627968, |
|
"eval_f1_class_9": 0.9750000000000001, |
|
"eval_f1_macro": 0.953024839240312, |
|
"eval_f1_micro": 0.9530676455165181, |
|
"eval_f1_weighted": 0.952958033459228, |
|
"eval_loss": 0.30203908681869507, |
|
"eval_recall_weighted": 0.9530676455165181, |
|
"eval_runtime": 18.5121, |
|
"eval_samples_per_second": 206.028, |
|
"eval_steps_per_second": 6.482, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.337526205450734, |
|
"grad_norm": 0.005160727072507143, |
|
"learning_rate": 1.4081858407079645e-05, |
|
"loss": 0.0305, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.337526205450734, |
|
"eval_accuracy": 0.954116413214473, |
|
"eval_f1_class_0": 0.9171817058096415, |
|
"eval_f1_class_1": 0.9822646657571624, |
|
"eval_f1_class_2": 0.8885672937771346, |
|
"eval_f1_class_3": 0.95822454308094, |
|
"eval_f1_class_4": 0.952020202020202, |
|
"eval_f1_class_5": 0.9428918590522478, |
|
"eval_f1_class_6": 0.9761273209549071, |
|
"eval_f1_class_7": 0.9647355163727961, |
|
"eval_f1_class_8": 0.9866310160427807, |
|
"eval_f1_class_9": 0.9721448467966574, |
|
"eval_f1_macro": 0.9540788969664471, |
|
"eval_f1_micro": 0.954116413214473, |
|
"eval_f1_weighted": 0.9540130488123523, |
|
"eval_loss": 0.28602519631385803, |
|
"eval_recall_weighted": 0.954116413214473, |
|
"eval_runtime": 18.523, |
|
"eval_samples_per_second": 205.906, |
|
"eval_steps_per_second": 6.478, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.861635220125786, |
|
"grad_norm": 0.02748439833521843, |
|
"learning_rate": 1.131637168141593e-05, |
|
"loss": 0.0216, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.861635220125786, |
|
"eval_accuracy": 0.9562139486103828, |
|
"eval_f1_class_0": 0.9250317662007626, |
|
"eval_f1_class_1": 0.9794801641586868, |
|
"eval_f1_class_2": 0.8888888888888887, |
|
"eval_f1_class_3": 0.9527458492975734, |
|
"eval_f1_class_4": 0.9604086845466155, |
|
"eval_f1_class_5": 0.9476248477466506, |
|
"eval_f1_class_6": 0.9853137516688919, |
|
"eval_f1_class_7": 0.9682337992376113, |
|
"eval_f1_class_8": 0.9790575916230366, |
|
"eval_f1_class_9": 0.9736477115117891, |
|
"eval_f1_macro": 0.9560433054880507, |
|
"eval_f1_micro": 0.9562139486103828, |
|
"eval_f1_weighted": 0.956078385428201, |
|
"eval_loss": 0.303732693195343, |
|
"eval_recall_weighted": 0.9562139486103828, |
|
"eval_runtime": 18.6679, |
|
"eval_samples_per_second": 204.308, |
|
"eval_steps_per_second": 6.428, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.385744234800839, |
|
"grad_norm": 0.017909903079271317, |
|
"learning_rate": 8.550884955752212e-06, |
|
"loss": 0.0198, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.385744234800839, |
|
"eval_accuracy": 0.9562139486103828, |
|
"eval_f1_class_0": 0.9207547169811321, |
|
"eval_f1_class_1": 0.9795361527967258, |
|
"eval_f1_class_2": 0.8829337094499294, |
|
"eval_f1_class_3": 0.9623865110246432, |
|
"eval_f1_class_4": 0.9592875318066159, |
|
"eval_f1_class_5": 0.9451887941534713, |
|
"eval_f1_class_6": 0.9826897470039946, |
|
"eval_f1_class_7": 0.9707006369426752, |
|
"eval_f1_class_8": 0.9855072463768115, |
|
"eval_f1_class_9": 0.9721448467966574, |
|
"eval_f1_macro": 0.9561129893332657, |
|
"eval_f1_micro": 0.9562139486103828, |
|
"eval_f1_weighted": 0.9561806063383737, |
|
"eval_loss": 0.2868480682373047, |
|
"eval_recall_weighted": 0.9562139486103828, |
|
"eval_runtime": 18.5749, |
|
"eval_samples_per_second": 205.331, |
|
"eval_steps_per_second": 6.46, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.90985324947589, |
|
"grad_norm": 0.019022395834326744, |
|
"learning_rate": 5.785398230088496e-06, |
|
"loss": 0.0145, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 8.90985324947589, |
|
"eval_accuracy": 0.9580492920818039, |
|
"eval_f1_class_0": 0.9221260815822002, |
|
"eval_f1_class_1": 0.9754768392370572, |
|
"eval_f1_class_2": 0.8932584269662921, |
|
"eval_f1_class_3": 0.9621903520208606, |
|
"eval_f1_class_4": 0.9541984732824428, |
|
"eval_f1_class_5": 0.957920792079208, |
|
"eval_f1_class_6": 0.9879518072289156, |
|
"eval_f1_class_7": 0.9671717171717172, |
|
"eval_f1_class_8": 0.9867724867724869, |
|
"eval_f1_class_9": 0.9735006973500697, |
|
"eval_f1_macro": 0.9580567673691249, |
|
"eval_f1_micro": 0.9580492920818039, |
|
"eval_f1_weighted": 0.9581079328217166, |
|
"eval_loss": 0.29160091280937195, |
|
"eval_recall_weighted": 0.9580492920818039, |
|
"eval_runtime": 18.5713, |
|
"eval_samples_per_second": 205.371, |
|
"eval_steps_per_second": 6.462, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.433962264150944, |
|
"grad_norm": 0.0038110397290438414, |
|
"learning_rate": 3.019911504424779e-06, |
|
"loss": 0.014, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.433962264150944, |
|
"eval_accuracy": 0.959884635553225, |
|
"eval_f1_class_0": 0.9228855721393034, |
|
"eval_f1_class_1": 0.9809264305177112, |
|
"eval_f1_class_2": 0.9080459770114943, |
|
"eval_f1_class_3": 0.9571984435797665, |
|
"eval_f1_class_4": 0.9566326530612245, |
|
"eval_f1_class_5": 0.9585365853658537, |
|
"eval_f1_class_6": 0.9840848806366048, |
|
"eval_f1_class_7": 0.9745547073791349, |
|
"eval_f1_class_8": 0.9868073878627968, |
|
"eval_f1_class_9": 0.9680998613037449, |
|
"eval_f1_macro": 0.9597772498857635, |
|
"eval_f1_micro": 0.959884635553225, |
|
"eval_f1_weighted": 0.9597604965241316, |
|
"eval_loss": 0.28916341066360474, |
|
"eval_recall_weighted": 0.959884635553225, |
|
"eval_runtime": 18.5578, |
|
"eval_samples_per_second": 205.52, |
|
"eval_steps_per_second": 6.466, |
|
"step": 9000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.787923857566925e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|