|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9982608695652173, |
|
"eval_steps": 500, |
|
"global_step": 574, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0036036036036036037, |
|
"grad_norm": 0.1164047870616515, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 0.1263, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007207207207207207, |
|
"grad_norm": 0.1627219461416066, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 0.1446, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.010810810810810811, |
|
"grad_norm": 0.1157756817304506, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.1441, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014414414414414415, |
|
"grad_norm": 0.14566785288918435, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.1466, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"grad_norm": 0.13068033224281192, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.1342, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021621621621621623, |
|
"grad_norm": 0.15128910055561917, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.1263, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.025225225225225224, |
|
"grad_norm": 0.12625301643275005, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1306, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02882882882882883, |
|
"grad_norm": 0.1341542973939784, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.1132, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.032432432432432434, |
|
"grad_norm": 0.09503727827074428, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.0921, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"grad_norm": 0.13724411508071346, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.123, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03963963963963964, |
|
"grad_norm": 0.1249162520121657, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 0.1183, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.043243243243243246, |
|
"grad_norm": 0.14002577339626954, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.1297, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04684684684684685, |
|
"grad_norm": 0.12032689516166056, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 0.1144, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05045045045045045, |
|
"grad_norm": 0.11822508923100593, |
|
"learning_rate": 5e-05, |
|
"loss": 0.127, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05405405405405406, |
|
"grad_norm": 0.12626847998511856, |
|
"learning_rate": 5.3571428571428575e-05, |
|
"loss": 0.1246, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05765765765765766, |
|
"grad_norm": 0.1394135180306787, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.1249, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06126126126126126, |
|
"grad_norm": 0.1486627737985617, |
|
"learning_rate": 6.0714285714285715e-05, |
|
"loss": 0.1156, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06486486486486487, |
|
"grad_norm": 0.08348858837628631, |
|
"learning_rate": 6.428571428571429e-05, |
|
"loss": 0.0972, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06846846846846846, |
|
"grad_norm": 0.19888777350730014, |
|
"learning_rate": 6.785714285714286e-05, |
|
"loss": 0.146, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07207207207207207, |
|
"grad_norm": 0.12757635434343284, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 0.109, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07567567567567568, |
|
"grad_norm": 0.17261365946211904, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.139, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07927927927927927, |
|
"grad_norm": 0.19568066543467844, |
|
"learning_rate": 7.857142857142858e-05, |
|
"loss": 0.1366, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08288288288288288, |
|
"grad_norm": 0.20224174296046235, |
|
"learning_rate": 8.214285714285714e-05, |
|
"loss": 0.1437, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08648648648648649, |
|
"grad_norm": 0.24075864691751864, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.16, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09009009009009009, |
|
"grad_norm": 0.1506076228406242, |
|
"learning_rate": 8.92857142857143e-05, |
|
"loss": 0.1074, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0936936936936937, |
|
"grad_norm": 0.16087708567600026, |
|
"learning_rate": 9.285714285714286e-05, |
|
"loss": 0.1071, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0972972972972973, |
|
"grad_norm": 0.15632293632911032, |
|
"learning_rate": 9.642857142857143e-05, |
|
"loss": 0.115, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1009009009009009, |
|
"grad_norm": 0.15351274455794925, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1083, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1045045045045045, |
|
"grad_norm": 0.17410060629498864, |
|
"learning_rate": 0.00010357142857142859, |
|
"loss": 0.1143, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.10810810810810811, |
|
"grad_norm": 0.11003201267949979, |
|
"learning_rate": 0.00010714285714285715, |
|
"loss": 0.0822, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11171171171171171, |
|
"grad_norm": 0.14850855070731758, |
|
"learning_rate": 0.00011071428571428572, |
|
"loss": 0.1422, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11531531531531532, |
|
"grad_norm": 0.11574892051893418, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.0917, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.11891891891891893, |
|
"grad_norm": 0.12172342587299105, |
|
"learning_rate": 0.00011785714285714287, |
|
"loss": 0.1125, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12252252252252252, |
|
"grad_norm": 0.10233939138594608, |
|
"learning_rate": 0.00012142857142857143, |
|
"loss": 0.0916, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12612612612612611, |
|
"grad_norm": 0.1277262526433454, |
|
"learning_rate": 0.000125, |
|
"loss": 0.1168, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.12972972972972974, |
|
"grad_norm": 0.1510932624260595, |
|
"learning_rate": 0.00012857142857142858, |
|
"loss": 0.1366, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.1381015918157766, |
|
"learning_rate": 0.00013214285714285715, |
|
"loss": 0.1112, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.13693693693693693, |
|
"grad_norm": 0.12744142332679428, |
|
"learning_rate": 0.00013571428571428572, |
|
"loss": 0.1169, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14054054054054055, |
|
"grad_norm": 0.12605036861900049, |
|
"learning_rate": 0.0001392857142857143, |
|
"loss": 0.1265, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14414414414414414, |
|
"grad_norm": 0.09494597572244792, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.0856, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14774774774774774, |
|
"grad_norm": 0.11563858827548382, |
|
"learning_rate": 0.00014642857142857141, |
|
"loss": 0.091, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15135135135135136, |
|
"grad_norm": 0.10978464408087514, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.0934, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15495495495495495, |
|
"grad_norm": 0.14748905325763195, |
|
"learning_rate": 0.0001535714285714286, |
|
"loss": 0.1042, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.15855855855855855, |
|
"grad_norm": 0.15028736539177057, |
|
"learning_rate": 0.00015714285714285716, |
|
"loss": 0.1232, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.16216216216216217, |
|
"grad_norm": 0.16834971174993532, |
|
"learning_rate": 0.00016071428571428573, |
|
"loss": 0.1388, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.16576576576576577, |
|
"grad_norm": 0.1364758084150375, |
|
"learning_rate": 0.00016428571428571428, |
|
"loss": 0.1245, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.16936936936936936, |
|
"grad_norm": 0.1906244918264085, |
|
"learning_rate": 0.00016785714285714288, |
|
"loss": 0.1419, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.17297297297297298, |
|
"grad_norm": 0.15582113106280285, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.1284, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.17657657657657658, |
|
"grad_norm": 0.1653708513472312, |
|
"learning_rate": 0.000175, |
|
"loss": 0.1265, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18018018018018017, |
|
"grad_norm": 0.11080370021143991, |
|
"learning_rate": 0.0001785714285714286, |
|
"loss": 0.1136, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1837837837837838, |
|
"grad_norm": 0.14497196744014715, |
|
"learning_rate": 0.00018214285714285714, |
|
"loss": 0.1336, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1873873873873874, |
|
"grad_norm": 0.11471991362976224, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 0.1009, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.19099099099099098, |
|
"grad_norm": 0.1518492774928798, |
|
"learning_rate": 0.0001892857142857143, |
|
"loss": 0.1445, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1945945945945946, |
|
"grad_norm": 0.15638927885876117, |
|
"learning_rate": 0.00019285714285714286, |
|
"loss": 0.1095, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1981981981981982, |
|
"grad_norm": 0.12245693248057901, |
|
"learning_rate": 0.00019642857142857144, |
|
"loss": 0.099, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2018018018018018, |
|
"grad_norm": 0.13146029758520172, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1215, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.20540540540540542, |
|
"grad_norm": 0.1449923810118862, |
|
"learning_rate": 0.00019999801019909556, |
|
"loss": 0.1376, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.209009009009009, |
|
"grad_norm": 0.12275432649506118, |
|
"learning_rate": 0.0001999920408755684, |
|
"loss": 0.111, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2126126126126126, |
|
"grad_norm": 0.13294914056261917, |
|
"learning_rate": 0.00019998209226697376, |
|
"loss": 0.1184, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.21621621621621623, |
|
"grad_norm": 0.11587311682416103, |
|
"learning_rate": 0.00019996816476922677, |
|
"loss": 0.1029, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.21981981981981982, |
|
"grad_norm": 0.211706197616785, |
|
"learning_rate": 0.00019995025893658627, |
|
"loss": 0.1323, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.22342342342342342, |
|
"grad_norm": 0.135734632583536, |
|
"learning_rate": 0.00019992837548163316, |
|
"loss": 0.1073, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.22702702702702704, |
|
"grad_norm": 0.1916821730614324, |
|
"learning_rate": 0.00019990251527524178, |
|
"loss": 0.1287, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.23063063063063063, |
|
"grad_norm": 0.1696786518231171, |
|
"learning_rate": 0.00019987267934654538, |
|
"loss": 0.1467, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.23423423423423423, |
|
"grad_norm": 0.1556597769170162, |
|
"learning_rate": 0.00019983886888289514, |
|
"loss": 0.1074, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.23783783783783785, |
|
"grad_norm": 0.11435037299616506, |
|
"learning_rate": 0.00019980108522981284, |
|
"loss": 0.1054, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.24144144144144145, |
|
"grad_norm": 0.1377527772698083, |
|
"learning_rate": 0.00019975932989093747, |
|
"loss": 0.1167, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.24504504504504504, |
|
"grad_norm": 0.13089085083126692, |
|
"learning_rate": 0.00019971360452796522, |
|
"loss": 0.1268, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.24864864864864866, |
|
"grad_norm": 0.1461221542311374, |
|
"learning_rate": 0.00019966391096058346, |
|
"loss": 0.1353, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.25225225225225223, |
|
"grad_norm": 0.10972604571483792, |
|
"learning_rate": 0.0001996102511663983, |
|
"loss": 0.0839, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25585585585585585, |
|
"grad_norm": 0.14694411558687645, |
|
"learning_rate": 0.0001995526272808559, |
|
"loss": 0.1266, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2594594594594595, |
|
"grad_norm": 0.2623900472581046, |
|
"learning_rate": 0.00019949104159715743, |
|
"loss": 0.1192, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.26306306306306304, |
|
"grad_norm": 0.14253202316127417, |
|
"learning_rate": 0.0001994254965661679, |
|
"loss": 0.1268, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.22775504622269988, |
|
"learning_rate": 0.0001993559947963185, |
|
"loss": 0.1624, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 0.18974052313619846, |
|
"learning_rate": 0.00019928253905350296, |
|
"loss": 0.1656, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27387387387387385, |
|
"grad_norm": 0.18281811162027828, |
|
"learning_rate": 0.00019920513226096733, |
|
"loss": 0.1512, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2774774774774775, |
|
"grad_norm": 0.15981121539784604, |
|
"learning_rate": 0.00019912377749919374, |
|
"loss": 0.1414, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2810810810810811, |
|
"grad_norm": 0.11107015310290616, |
|
"learning_rate": 0.00019903847800577777, |
|
"loss": 0.0732, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.28468468468468466, |
|
"grad_norm": 0.11807284847655806, |
|
"learning_rate": 0.00019894923717529955, |
|
"loss": 0.1158, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2882882882882883, |
|
"grad_norm": 0.1674132871400004, |
|
"learning_rate": 0.00019885605855918885, |
|
"loss": 0.1363, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2918918918918919, |
|
"grad_norm": 0.09521837972620555, |
|
"learning_rate": 0.00019875894586558355, |
|
"loss": 0.0761, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2954954954954955, |
|
"grad_norm": 0.13313059706266978, |
|
"learning_rate": 0.00019865790295918212, |
|
"loss": 0.114, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2990990990990991, |
|
"grad_norm": 0.15752991450823575, |
|
"learning_rate": 0.00019855293386108992, |
|
"loss": 0.1143, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3027027027027027, |
|
"grad_norm": 0.11681361221271575, |
|
"learning_rate": 0.0001984440427486591, |
|
"loss": 0.0955, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3063063063063063, |
|
"grad_norm": 0.1435158350817726, |
|
"learning_rate": 0.00019833123395532226, |
|
"loss": 0.1292, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3099099099099099, |
|
"grad_norm": 0.1174821097766054, |
|
"learning_rate": 0.00019821451197042026, |
|
"loss": 0.119, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.31351351351351353, |
|
"grad_norm": 0.14421204301690782, |
|
"learning_rate": 0.00019809388143902332, |
|
"loss": 0.1313, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3171171171171171, |
|
"grad_norm": 0.13517965622709482, |
|
"learning_rate": 0.0001979693471617462, |
|
"loss": 0.1297, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3207207207207207, |
|
"grad_norm": 0.17145867539050777, |
|
"learning_rate": 0.00019784091409455728, |
|
"loss": 0.1359, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.32432432432432434, |
|
"grad_norm": 0.11953352813577937, |
|
"learning_rate": 0.00019770858734858126, |
|
"loss": 0.0878, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3279279279279279, |
|
"grad_norm": 0.13673174823647213, |
|
"learning_rate": 0.00019757237218989563, |
|
"loss": 0.1265, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.33153153153153153, |
|
"grad_norm": 0.16490479257041854, |
|
"learning_rate": 0.00019743227403932134, |
|
"loss": 0.1393, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.33513513513513515, |
|
"grad_norm": 0.12343228690652325, |
|
"learning_rate": 0.000197288298472207, |
|
"loss": 0.1174, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3387387387387387, |
|
"grad_norm": 0.16745916159569352, |
|
"learning_rate": 0.00019714045121820676, |
|
"loss": 0.1235, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.34234234234234234, |
|
"grad_norm": 0.16922526895888806, |
|
"learning_rate": 0.00019698873816105273, |
|
"loss": 0.0975, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.34594594594594597, |
|
"grad_norm": 0.129484224418453, |
|
"learning_rate": 0.00019683316533832042, |
|
"loss": 0.0928, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.34954954954954953, |
|
"grad_norm": 0.1514002748369919, |
|
"learning_rate": 0.0001966737389411887, |
|
"loss": 0.1341, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.35315315315315315, |
|
"grad_norm": 0.11053077601153272, |
|
"learning_rate": 0.00019651046531419332, |
|
"loss": 0.09, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.3567567567567568, |
|
"grad_norm": 0.11955256535981768, |
|
"learning_rate": 0.00019634335095497458, |
|
"loss": 0.0978, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"grad_norm": 0.12318848470518083, |
|
"learning_rate": 0.0001961724025140185, |
|
"loss": 0.1123, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36396396396396397, |
|
"grad_norm": 0.19878288570661823, |
|
"learning_rate": 0.0001959976267943923, |
|
"loss": 0.1449, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3675675675675676, |
|
"grad_norm": 0.11498348089609609, |
|
"learning_rate": 0.0001958190307514737, |
|
"loss": 0.101, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.37117117117117115, |
|
"grad_norm": 0.12807480490548945, |
|
"learning_rate": 0.00019563662149267406, |
|
"loss": 0.1115, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3747747747747748, |
|
"grad_norm": 0.1537951698344796, |
|
"learning_rate": 0.0001954504062771555, |
|
"loss": 0.1099, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.3783783783783784, |
|
"grad_norm": 0.13376774584465406, |
|
"learning_rate": 0.0001952603925155422, |
|
"loss": 0.0945, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.38198198198198197, |
|
"grad_norm": 0.11095795904499461, |
|
"learning_rate": 0.0001950665877696252, |
|
"loss": 0.1001, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3855855855855856, |
|
"grad_norm": 0.1176293890483276, |
|
"learning_rate": 0.00019486899975206166, |
|
"loss": 0.1114, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3891891891891892, |
|
"grad_norm": 0.16600471258328028, |
|
"learning_rate": 0.0001946676363260679, |
|
"loss": 0.1565, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3927927927927928, |
|
"grad_norm": 0.12969105825015786, |
|
"learning_rate": 0.0001944625055051065, |
|
"loss": 0.0942, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3963963963963964, |
|
"grad_norm": 0.1260399594140325, |
|
"learning_rate": 0.00019425361545256727, |
|
"loss": 0.1151, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.12141152738985596, |
|
"learning_rate": 0.00019404097448144257, |
|
"loss": 0.0953, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4036036036036036, |
|
"grad_norm": 0.16522441528864815, |
|
"learning_rate": 0.00019382459105399632, |
|
"loss": 0.1483, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4072072072072072, |
|
"grad_norm": 0.16464941562962845, |
|
"learning_rate": 0.00019360447378142728, |
|
"loss": 0.1145, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.41081081081081083, |
|
"grad_norm": 0.1301041115410939, |
|
"learning_rate": 0.00019338063142352644, |
|
"loss": 0.109, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4144144144144144, |
|
"grad_norm": 0.15394069789981274, |
|
"learning_rate": 0.00019315307288832835, |
|
"loss": 0.1484, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.418018018018018, |
|
"grad_norm": 0.1337025315682798, |
|
"learning_rate": 0.00019292180723175654, |
|
"loss": 0.1083, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.42162162162162165, |
|
"grad_norm": 0.11697191221298965, |
|
"learning_rate": 0.00019268684365726326, |
|
"loss": 0.1104, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.4252252252252252, |
|
"grad_norm": 0.14885108765057334, |
|
"learning_rate": 0.00019244819151546322, |
|
"loss": 0.1349, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.42882882882882883, |
|
"grad_norm": 0.16748465670739565, |
|
"learning_rate": 0.00019220586030376134, |
|
"loss": 0.1375, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 0.16065529576883042, |
|
"learning_rate": 0.00019195985966597494, |
|
"loss": 0.1158, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.436036036036036, |
|
"grad_norm": 0.14710431466862364, |
|
"learning_rate": 0.0001917101993919498, |
|
"loss": 0.1123, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.43963963963963965, |
|
"grad_norm": 0.1535583565878682, |
|
"learning_rate": 0.00019145688941717075, |
|
"loss": 0.1244, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.44324324324324327, |
|
"grad_norm": 0.15887496082691002, |
|
"learning_rate": 0.00019119993982236606, |
|
"loss": 0.1099, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.44684684684684683, |
|
"grad_norm": 0.17132720394894463, |
|
"learning_rate": 0.00019093936083310653, |
|
"loss": 0.1366, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.45045045045045046, |
|
"grad_norm": 0.1304195997449305, |
|
"learning_rate": 0.00019067516281939825, |
|
"loss": 0.1042, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4540540540540541, |
|
"grad_norm": 0.13720183539624425, |
|
"learning_rate": 0.00019040735629527027, |
|
"loss": 0.0939, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.45765765765765765, |
|
"grad_norm": 0.1878348429175824, |
|
"learning_rate": 0.00019013595191835574, |
|
"loss": 0.1421, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.46126126126126127, |
|
"grad_norm": 0.15221296411188612, |
|
"learning_rate": 0.00018986096048946824, |
|
"loss": 0.1207, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4648648648648649, |
|
"grad_norm": 0.12530318604533355, |
|
"learning_rate": 0.0001895823929521716, |
|
"loss": 0.1101, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.46846846846846846, |
|
"grad_norm": 0.11753990553496706, |
|
"learning_rate": 0.0001893002603923446, |
|
"loss": 0.0814, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4720720720720721, |
|
"grad_norm": 0.15143122574748422, |
|
"learning_rate": 0.00018901457403773967, |
|
"loss": 0.1259, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.4756756756756757, |
|
"grad_norm": 0.12436714806981373, |
|
"learning_rate": 0.00018872534525753615, |
|
"loss": 0.1148, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.47927927927927927, |
|
"grad_norm": 0.13100199539485474, |
|
"learning_rate": 0.00018843258556188787, |
|
"loss": 0.1189, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4828828828828829, |
|
"grad_norm": 0.1667053146851425, |
|
"learning_rate": 0.00018813630660146488, |
|
"loss": 0.1494, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.4864864864864865, |
|
"grad_norm": 0.09073673518826318, |
|
"learning_rate": 0.00018783652016699014, |
|
"loss": 0.0799, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.4900900900900901, |
|
"grad_norm": 0.13343062211829884, |
|
"learning_rate": 0.0001875332381887699, |
|
"loss": 0.1241, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4936936936936937, |
|
"grad_norm": 0.12750970397329575, |
|
"learning_rate": 0.0001872264727362194, |
|
"loss": 0.1386, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4972972972972973, |
|
"grad_norm": 0.10441877909622974, |
|
"learning_rate": 0.00018691623601738199, |
|
"loss": 0.0888, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5009009009009009, |
|
"grad_norm": 0.14478179850573814, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.1056, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5045045045045045, |
|
"grad_norm": 0.13712755003139512, |
|
"learning_rate": 0.00018628539830324229, |
|
"loss": 0.1489, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5081081081081081, |
|
"grad_norm": 0.12379021926600628, |
|
"learning_rate": 0.000185964822412769, |
|
"loss": 0.1071, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5117117117117117, |
|
"grad_norm": 0.12266406139545731, |
|
"learning_rate": 0.00018564082546466805, |
|
"loss": 0.1141, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.5153153153153153, |
|
"grad_norm": 0.09332411107267007, |
|
"learning_rate": 0.00018531342035272766, |
|
"loss": 0.0876, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.518918918918919, |
|
"grad_norm": 0.12722229275266542, |
|
"learning_rate": 0.00018498262010636774, |
|
"loss": 0.123, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5225225225225225, |
|
"grad_norm": 0.1934624210241968, |
|
"learning_rate": 0.00018464843789012085, |
|
"loss": 0.1891, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5261261261261261, |
|
"grad_norm": 0.1202206919464269, |
|
"learning_rate": 0.00018431088700310844, |
|
"loss": 0.1157, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5297297297297298, |
|
"grad_norm": 0.11855534138749764, |
|
"learning_rate": 0.0001839699808785118, |
|
"loss": 0.1126, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.10719514027165045, |
|
"learning_rate": 0.00018362573308303718, |
|
"loss": 0.0907, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.5369369369369369, |
|
"grad_norm": 0.11210467216409752, |
|
"learning_rate": 0.00018327815731637612, |
|
"loss": 0.1007, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.12526617885340885, |
|
"learning_rate": 0.00018292726741066007, |
|
"loss": 0.1049, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5441441441441441, |
|
"grad_norm": 0.1477303393799172, |
|
"learning_rate": 0.00018257307732991008, |
|
"loss": 0.1516, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5477477477477477, |
|
"grad_norm": 0.14857702506705278, |
|
"learning_rate": 0.00018221560116948103, |
|
"loss": 0.1453, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5513513513513514, |
|
"grad_norm": 0.14008518634545825, |
|
"learning_rate": 0.0001818548531555006, |
|
"loss": 0.1297, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.554954954954955, |
|
"grad_norm": 0.12658212522638404, |
|
"learning_rate": 0.0001814908476443034, |
|
"loss": 0.1155, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.5585585585585585, |
|
"grad_norm": 0.149670716923037, |
|
"learning_rate": 0.00018112359912185924, |
|
"loss": 0.1211, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5621621621621622, |
|
"grad_norm": 0.11342605203968036, |
|
"learning_rate": 0.000180753122203197, |
|
"loss": 0.0899, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.5657657657657658, |
|
"grad_norm": 0.15888593819383173, |
|
"learning_rate": 0.00018037943163182283, |
|
"loss": 0.1445, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.5693693693693693, |
|
"grad_norm": 0.12437893978089608, |
|
"learning_rate": 0.00018000254227913348, |
|
"loss": 0.1152, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.572972972972973, |
|
"grad_norm": 0.11638937373238138, |
|
"learning_rate": 0.0001796224691438244, |
|
"loss": 0.1123, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5765765765765766, |
|
"grad_norm": 0.14812854362945038, |
|
"learning_rate": 0.00017923922735129302, |
|
"loss": 0.1263, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5801801801801801, |
|
"grad_norm": 0.10770071386782099, |
|
"learning_rate": 0.0001788528321530366, |
|
"loss": 0.0955, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.5837837837837838, |
|
"grad_norm": 0.1870539683925041, |
|
"learning_rate": 0.00017846329892604547, |
|
"loss": 0.1124, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.5873873873873874, |
|
"grad_norm": 0.1560374478952629, |
|
"learning_rate": 0.00017807064317219094, |
|
"loss": 0.122, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.590990990990991, |
|
"grad_norm": 0.14789972168680796, |
|
"learning_rate": 0.00017767488051760857, |
|
"loss": 0.0955, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.5945945945945946, |
|
"grad_norm": 0.17954009944461283, |
|
"learning_rate": 0.00017727602671207605, |
|
"loss": 0.1326, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5981981981981982, |
|
"grad_norm": 0.12473531577026101, |
|
"learning_rate": 0.00017687409762838664, |
|
"loss": 0.139, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6018018018018018, |
|
"grad_norm": 0.18890214448118112, |
|
"learning_rate": 0.00017646910926171747, |
|
"loss": 0.158, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6054054054054054, |
|
"grad_norm": 0.1158510197827391, |
|
"learning_rate": 0.00017606107772899287, |
|
"loss": 0.124, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.609009009009009, |
|
"grad_norm": 0.1513359972404607, |
|
"learning_rate": 0.00017565001926824313, |
|
"loss": 0.1535, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6126126126126126, |
|
"grad_norm": 0.11561240472832256, |
|
"learning_rate": 0.00017523595023795813, |
|
"loss": 0.097, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6162162162162163, |
|
"grad_norm": 0.14453378759822266, |
|
"learning_rate": 0.00017481888711643655, |
|
"loss": 0.1369, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.6198198198198198, |
|
"grad_norm": 0.10823698221755142, |
|
"learning_rate": 0.00017439884650112989, |
|
"loss": 0.0854, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6234234234234234, |
|
"grad_norm": 0.16461158555393735, |
|
"learning_rate": 0.0001739758451079821, |
|
"loss": 0.1327, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6270270270270271, |
|
"grad_norm": 0.13330810816894179, |
|
"learning_rate": 0.00017354989977076422, |
|
"loss": 0.0988, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.6306306306306306, |
|
"grad_norm": 0.1603897957937655, |
|
"learning_rate": 0.00017312102744040467, |
|
"loss": 0.1517, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6342342342342342, |
|
"grad_norm": 0.1387499574229483, |
|
"learning_rate": 0.00017268924518431438, |
|
"loss": 0.1159, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6378378378378379, |
|
"grad_norm": 0.15123399261590567, |
|
"learning_rate": 0.0001722545701857079, |
|
"loss": 0.135, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.6414414414414414, |
|
"grad_norm": 0.201686818845506, |
|
"learning_rate": 0.0001718170197429193, |
|
"loss": 0.1601, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.645045045045045, |
|
"grad_norm": 0.16050791333444517, |
|
"learning_rate": 0.0001713766112687139, |
|
"loss": 0.1376, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 0.13004224853328716, |
|
"learning_rate": 0.00017093336228959536, |
|
"loss": 0.1191, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6522522522522523, |
|
"grad_norm": 0.10061992398695434, |
|
"learning_rate": 0.000170487290445108, |
|
"loss": 0.0958, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.6558558558558558, |
|
"grad_norm": 0.09779721051938423, |
|
"learning_rate": 0.0001700384134871351, |
|
"loss": 0.098, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.6594594594594595, |
|
"grad_norm": 0.12862092154540355, |
|
"learning_rate": 0.0001695867492791921, |
|
"loss": 0.1083, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.6630630630630631, |
|
"grad_norm": 0.13476322854527875, |
|
"learning_rate": 0.00016913231579571608, |
|
"loss": 0.1466, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.09595530640274692, |
|
"learning_rate": 0.00016867513112135013, |
|
"loss": 0.0842, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6702702702702703, |
|
"grad_norm": 0.15679543098949758, |
|
"learning_rate": 0.00016821521345022377, |
|
"loss": 0.1338, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.6738738738738739, |
|
"grad_norm": 0.14388550615027906, |
|
"learning_rate": 0.00016775258108522908, |
|
"loss": 0.1125, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.6774774774774774, |
|
"grad_norm": 0.14073204006731552, |
|
"learning_rate": 0.0001672872524372919, |
|
"loss": 0.139, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.6810810810810811, |
|
"grad_norm": 0.09327785295917886, |
|
"learning_rate": 0.00016681924602463962, |
|
"loss": 0.0876, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.6846846846846847, |
|
"grad_norm": 0.0966354577674113, |
|
"learning_rate": 0.00016634858047206378, |
|
"loss": 0.0817, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6882882882882883, |
|
"grad_norm": 0.1298212529485729, |
|
"learning_rate": 0.00016587527451017938, |
|
"loss": 0.1248, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.6918918918918919, |
|
"grad_norm": 0.15190505228456444, |
|
"learning_rate": 0.00016539934697467894, |
|
"loss": 0.1346, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6954954954954955, |
|
"grad_norm": 0.12074435445615049, |
|
"learning_rate": 0.0001649208168055833, |
|
"loss": 0.1218, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.6990990990990991, |
|
"grad_norm": 0.11339361129121636, |
|
"learning_rate": 0.0001644397030464877, |
|
"loss": 0.0945, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7027027027027027, |
|
"grad_norm": 0.1480633681266718, |
|
"learning_rate": 0.00016395602484380406, |
|
"loss": 0.143, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7063063063063063, |
|
"grad_norm": 0.13202765755871132, |
|
"learning_rate": 0.0001634698014459988, |
|
"loss": 0.1256, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.7099099099099099, |
|
"grad_norm": 0.10905065599283695, |
|
"learning_rate": 0.00016298105220282713, |
|
"loss": 0.1024, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.7135135135135136, |
|
"grad_norm": 0.10616436723037755, |
|
"learning_rate": 0.00016248979656456275, |
|
"loss": 0.1066, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.7171171171171171, |
|
"grad_norm": 0.1063733952868901, |
|
"learning_rate": 0.0001619960540812239, |
|
"loss": 0.1065, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"grad_norm": 0.1648449550913926, |
|
"learning_rate": 0.00016149984440179537, |
|
"loss": 0.1416, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7243243243243244, |
|
"grad_norm": 0.14504142427358913, |
|
"learning_rate": 0.00016100118727344659, |
|
"loss": 0.1323, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.7279279279279279, |
|
"grad_norm": 0.15511743070098452, |
|
"learning_rate": 0.00016050010254074564, |
|
"loss": 0.1259, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.7315315315315315, |
|
"grad_norm": 0.12280785450706579, |
|
"learning_rate": 0.00015999661014486956, |
|
"loss": 0.1165, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7351351351351352, |
|
"grad_norm": 0.13888404263902684, |
|
"learning_rate": 0.00015949073012281093, |
|
"loss": 0.1047, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.7387387387387387, |
|
"grad_norm": 0.10651036692593711, |
|
"learning_rate": 0.00015898248260658016, |
|
"loss": 0.1181, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7423423423423423, |
|
"grad_norm": 0.10861974936989245, |
|
"learning_rate": 0.0001584718878224047, |
|
"loss": 0.1064, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.745945945945946, |
|
"grad_norm": 0.10231866176721904, |
|
"learning_rate": 0.00015795896608992378, |
|
"loss": 0.0988, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.7495495495495496, |
|
"grad_norm": 0.15024568241023914, |
|
"learning_rate": 0.00015744373782137992, |
|
"loss": 0.1489, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.7531531531531531, |
|
"grad_norm": 0.12371588452286458, |
|
"learning_rate": 0.00015692622352080662, |
|
"loss": 0.116, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.7567567567567568, |
|
"grad_norm": 0.11392721432010788, |
|
"learning_rate": 0.00015640644378321235, |
|
"loss": 0.1015, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7603603603603604, |
|
"grad_norm": 0.11201427932233406, |
|
"learning_rate": 0.00015588441929376097, |
|
"loss": 0.0863, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.7639639639639639, |
|
"grad_norm": 0.159849063390471, |
|
"learning_rate": 0.00015536017082694846, |
|
"loss": 0.1651, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.7675675675675676, |
|
"grad_norm": 0.13935698141384686, |
|
"learning_rate": 0.00015483371924577635, |
|
"loss": 0.1262, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.7711711711711712, |
|
"grad_norm": 0.15388913192797118, |
|
"learning_rate": 0.00015430508550092124, |
|
"loss": 0.1602, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.7747747747747747, |
|
"grad_norm": 0.11744911276482749, |
|
"learning_rate": 0.00015377429062990122, |
|
"loss": 0.1082, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.7783783783783784, |
|
"grad_norm": 0.14669529425537173, |
|
"learning_rate": 0.00015324135575623857, |
|
"loss": 0.1329, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.781981981981982, |
|
"grad_norm": 0.09725689202217797, |
|
"learning_rate": 0.00015270630208861916, |
|
"loss": 0.1001, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.7855855855855856, |
|
"grad_norm": 0.09066648478601479, |
|
"learning_rate": 0.00015216915092004847, |
|
"loss": 0.1005, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.7891891891891892, |
|
"grad_norm": 0.10556590806339675, |
|
"learning_rate": 0.00015162992362700406, |
|
"loss": 0.104, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.7927927927927928, |
|
"grad_norm": 0.10568504388848617, |
|
"learning_rate": 0.00015108864166858506, |
|
"loss": 0.1079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7963963963963964, |
|
"grad_norm": 0.13168798693648778, |
|
"learning_rate": 0.0001505453265856581, |
|
"loss": 0.1319, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.11471998852906086, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.1181, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.8036036036036036, |
|
"grad_norm": 0.11715811264986671, |
|
"learning_rate": 0.00014945268361343748, |
|
"loss": 0.1053, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.8072072072072072, |
|
"grad_norm": 0.10491568472945026, |
|
"learning_rate": 0.00014890339920698334, |
|
"loss": 0.0931, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.1381588520705617, |
|
"learning_rate": 0.00014835216863996975, |
|
"loss": 0.1417, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8144144144144144, |
|
"grad_norm": 0.15507649891779268, |
|
"learning_rate": 0.0001477990138491783, |
|
"loss": 0.1418, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.818018018018018, |
|
"grad_norm": 0.11750139732263555, |
|
"learning_rate": 0.0001472439568479671, |
|
"loss": 0.1207, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.8216216216216217, |
|
"grad_norm": 0.12736893690378323, |
|
"learning_rate": 0.00014668701972539458, |
|
"loss": 0.1277, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.8252252252252252, |
|
"grad_norm": 0.11333975714941213, |
|
"learning_rate": 0.00014612822464534059, |
|
"loss": 0.1113, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8288288288288288, |
|
"grad_norm": 0.11811846805876995, |
|
"learning_rate": 0.00014556759384562416, |
|
"loss": 0.1174, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8324324324324325, |
|
"grad_norm": 0.12819157247369997, |
|
"learning_rate": 0.00014500514963711883, |
|
"loss": 0.1143, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.836036036036036, |
|
"grad_norm": 0.11727175144557134, |
|
"learning_rate": 0.0001444409144028644, |
|
"loss": 0.1153, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.8396396396396396, |
|
"grad_norm": 0.13457703292067713, |
|
"learning_rate": 0.00014387491059717652, |
|
"loss": 0.1199, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.8432432432432433, |
|
"grad_norm": 0.11901299124274167, |
|
"learning_rate": 0.00014330716074475286, |
|
"loss": 0.1147, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.8468468468468469, |
|
"grad_norm": 0.10353994887251415, |
|
"learning_rate": 0.00014273768743977685, |
|
"loss": 0.1026, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8504504504504504, |
|
"grad_norm": 0.10419191980690304, |
|
"learning_rate": 0.0001421665133450184, |
|
"loss": 0.1063, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.8540540540540541, |
|
"grad_norm": 0.12748698891225302, |
|
"learning_rate": 0.00014159366119093214, |
|
"loss": 0.1079, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.8576576576576577, |
|
"grad_norm": 0.16200721887310557, |
|
"learning_rate": 0.00014101915377475274, |
|
"loss": 0.1152, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.8612612612612612, |
|
"grad_norm": 0.15795975577284813, |
|
"learning_rate": 0.0001404430139595877, |
|
"loss": 0.1542, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.14933463930244448, |
|
"learning_rate": 0.0001398652646735076, |
|
"loss": 0.1236, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8684684684684685, |
|
"grad_norm": 0.16198753222835588, |
|
"learning_rate": 0.0001392859289086334, |
|
"loss": 0.1375, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.872072072072072, |
|
"grad_norm": 0.13433467388254222, |
|
"learning_rate": 0.00013870502972022173, |
|
"loss": 0.1323, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.8756756756756757, |
|
"grad_norm": 0.12593674925296103, |
|
"learning_rate": 0.00013812259022574717, |
|
"loss": 0.1216, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.8792792792792793, |
|
"grad_norm": 0.13013719230493928, |
|
"learning_rate": 0.00013753863360398241, |
|
"loss": 0.1247, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.8828828828828829, |
|
"grad_norm": 0.11799267349520824, |
|
"learning_rate": 0.0001369531830940757, |
|
"loss": 0.1086, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.8864864864864865, |
|
"grad_norm": 0.08312084262618047, |
|
"learning_rate": 0.00013636626199462615, |
|
"loss": 0.0813, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.8900900900900901, |
|
"grad_norm": 0.1338651554767216, |
|
"learning_rate": 0.00013577789366275644, |
|
"loss": 0.137, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.8936936936936937, |
|
"grad_norm": 0.10150227632820087, |
|
"learning_rate": 0.0001351881015131833, |
|
"loss": 0.0975, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.8972972972972973, |
|
"grad_norm": 0.10189929428402296, |
|
"learning_rate": 0.00013459690901728588, |
|
"loss": 0.0923, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9009009009009009, |
|
"grad_norm": 0.1408210936693087, |
|
"learning_rate": 0.00013400433970217135, |
|
"loss": 0.1378, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9045045045045045, |
|
"grad_norm": 0.11765895193363322, |
|
"learning_rate": 0.000133410417149739, |
|
"loss": 0.1096, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.9081081081081082, |
|
"grad_norm": 0.1413792560787727, |
|
"learning_rate": 0.00013281516499574135, |
|
"loss": 0.1401, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.9117117117117117, |
|
"grad_norm": 0.08054406846656884, |
|
"learning_rate": 0.00013221860692884396, |
|
"loss": 0.0835, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.9153153153153153, |
|
"grad_norm": 0.12127761773938303, |
|
"learning_rate": 0.0001316207666896824, |
|
"loss": 0.118, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.918918918918919, |
|
"grad_norm": 0.10139113989817501, |
|
"learning_rate": 0.00013102166806991768, |
|
"loss": 0.0966, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9225225225225225, |
|
"grad_norm": 0.10511129293269068, |
|
"learning_rate": 0.00013042133491128935, |
|
"loss": 0.0846, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.9261261261261261, |
|
"grad_norm": 0.13928639672942275, |
|
"learning_rate": 0.00012981979110466654, |
|
"loss": 0.1106, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.9297297297297298, |
|
"grad_norm": 0.1575504268549112, |
|
"learning_rate": 0.00012921706058909756, |
|
"loss": 0.1022, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.09456528837585412, |
|
"learning_rate": 0.00012861316735085686, |
|
"loss": 0.0943, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.9369369369369369, |
|
"grad_norm": 0.11421875251828266, |
|
"learning_rate": 0.00012800813542249072, |
|
"loss": 0.0988, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9405405405405406, |
|
"grad_norm": 0.11985070545179864, |
|
"learning_rate": 0.00012740198888186064, |
|
"loss": 0.1238, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.9441441441441442, |
|
"grad_norm": 0.09679571111756961, |
|
"learning_rate": 0.00012679475185118535, |
|
"loss": 0.1063, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.9477477477477477, |
|
"grad_norm": 0.09782919038732428, |
|
"learning_rate": 0.0001261864484960807, |
|
"loss": 0.1039, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.9513513513513514, |
|
"grad_norm": 0.17653628828090737, |
|
"learning_rate": 0.00012557710302459803, |
|
"loss": 0.1354, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.954954954954955, |
|
"grad_norm": 0.1409157686607275, |
|
"learning_rate": 0.00012496673968626068, |
|
"loss": 0.1181, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.9585585585585585, |
|
"grad_norm": 0.16396955244736236, |
|
"learning_rate": 0.0001243553827710992, |
|
"loss": 0.1352, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.9621621621621622, |
|
"grad_norm": 0.11706567681214818, |
|
"learning_rate": 0.0001237430566086844, |
|
"loss": 0.1103, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.9657657657657658, |
|
"grad_norm": 0.11951814155751256, |
|
"learning_rate": 0.00012312978556715932, |
|
"loss": 0.1182, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.9693693693693693, |
|
"grad_norm": 0.1098976660754676, |
|
"learning_rate": 0.00012251559405226941, |
|
"loss": 0.0981, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.972972972972973, |
|
"grad_norm": 0.13497038508376635, |
|
"learning_rate": 0.00012190050650639131, |
|
"loss": 0.139, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9765765765765766, |
|
"grad_norm": 0.10505221561748224, |
|
"learning_rate": 0.00012128454740756014, |
|
"loss": 0.0968, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.9801801801801802, |
|
"grad_norm": 0.09400827669331373, |
|
"learning_rate": 0.00012066774126849529, |
|
"loss": 0.091, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.9837837837837838, |
|
"grad_norm": 0.13553635299634834, |
|
"learning_rate": 0.00012005011263562513, |
|
"loss": 0.1269, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.9873873873873874, |
|
"grad_norm": 0.12708467697016343, |
|
"learning_rate": 0.00011943168608810978, |
|
"loss": 0.1393, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.990990990990991, |
|
"grad_norm": 0.12975117728566488, |
|
"learning_rate": 0.00011881248623686338, |
|
"loss": 0.1305, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.9945945945945946, |
|
"grad_norm": 0.13065574753229398, |
|
"learning_rate": 0.00011819253772357442, |
|
"loss": 0.1236, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.9981981981981982, |
|
"grad_norm": 0.11060359555949814, |
|
"learning_rate": 0.00011757186521972512, |
|
"loss": 0.1018, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.9981981981981982, |
|
"eval_loss": 0.12383058667182922, |
|
"eval_runtime": 52.8086, |
|
"eval_samples_per_second": 4.431, |
|
"eval_steps_per_second": 0.568, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.0018018018018018, |
|
"grad_norm": 0.11956545731254414, |
|
"learning_rate": 0.00011695049342560968, |
|
"loss": 0.0926, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.0054054054054054, |
|
"grad_norm": 0.07533035620811855, |
|
"learning_rate": 0.00011632844706935124, |
|
"loss": 0.0797, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.009009009009009, |
|
"grad_norm": 0.07020760288792346, |
|
"learning_rate": 0.00011570575090591791, |
|
"loss": 0.0607, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0126126126126127, |
|
"grad_norm": 0.07819045444088978, |
|
"learning_rate": 0.00011508242971613741, |
|
"loss": 0.0735, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.0162162162162163, |
|
"grad_norm": 0.10053168911518578, |
|
"learning_rate": 0.0001144585083057111, |
|
"loss": 0.0835, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.0198198198198198, |
|
"grad_norm": 0.10526070984024917, |
|
"learning_rate": 0.0001138340115042267, |
|
"loss": 0.0951, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.0234234234234234, |
|
"grad_norm": 0.09945638649949284, |
|
"learning_rate": 0.00011320896416417026, |
|
"loss": 0.0767, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.027027027027027, |
|
"grad_norm": 0.07761913145188672, |
|
"learning_rate": 0.00011258339115993696, |
|
"loss": 0.0683, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.0306306306306305, |
|
"grad_norm": 0.09704735378738133, |
|
"learning_rate": 0.0001119573173868415, |
|
"loss": 0.0743, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.0342342342342343, |
|
"grad_norm": 0.07516525486775329, |
|
"learning_rate": 0.000111330767760127, |
|
"loss": 0.055, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.037837837837838, |
|
"grad_norm": 0.12817568478073565, |
|
"learning_rate": 0.00011070376721397373, |
|
"loss": 0.0812, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.0414414414414415, |
|
"grad_norm": 0.14184653764167465, |
|
"learning_rate": 0.00011007634070050684, |
|
"loss": 0.1011, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.045045045045045, |
|
"grad_norm": 0.12176639431416836, |
|
"learning_rate": 0.00010944851318880314, |
|
"loss": 0.0658, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0486486486486486, |
|
"grad_norm": 0.12307205920891376, |
|
"learning_rate": 0.00010882030966389766, |
|
"loss": 0.0681, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.0522522522522522, |
|
"grad_norm": 0.10538765068004156, |
|
"learning_rate": 0.00010819175512578926, |
|
"loss": 0.0641, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.055855855855856, |
|
"grad_norm": 0.1835426273917669, |
|
"learning_rate": 0.00010756287458844569, |
|
"loss": 0.0741, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.0594594594594595, |
|
"grad_norm": 0.15004556066173333, |
|
"learning_rate": 0.00010693369307880816, |
|
"loss": 0.0697, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.063063063063063, |
|
"grad_norm": 0.17087142751095086, |
|
"learning_rate": 0.00010630423563579551, |
|
"loss": 0.0908, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.11365528787847244, |
|
"learning_rate": 0.00010567452730930743, |
|
"loss": 0.0618, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.0702702702702702, |
|
"grad_norm": 0.1164578811980725, |
|
"learning_rate": 0.00010504459315922788, |
|
"loss": 0.0622, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.0738738738738738, |
|
"grad_norm": 0.188496193123827, |
|
"learning_rate": 0.00010441445825442772, |
|
"loss": 0.1077, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.0774774774774776, |
|
"grad_norm": 0.10295065488210545, |
|
"learning_rate": 0.00010378414767176705, |
|
"loss": 0.0735, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.15374433915948252, |
|
"learning_rate": 0.00010315368649509716, |
|
"loss": 0.085, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0846846846846847, |
|
"grad_norm": 0.09815844567459187, |
|
"learning_rate": 0.00010252309981426244, |
|
"loss": 0.054, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.0882882882882883, |
|
"grad_norm": 0.09380489686860213, |
|
"learning_rate": 0.0001018924127241019, |
|
"loss": 0.058, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.0918918918918918, |
|
"grad_norm": 0.1284280573750672, |
|
"learning_rate": 0.00010126165032345038, |
|
"loss": 0.0769, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.0954954954954954, |
|
"grad_norm": 0.1229186112985099, |
|
"learning_rate": 0.00010063083771413975, |
|
"loss": 0.0859, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.0990990990990992, |
|
"grad_norm": 0.09913923706830541, |
|
"learning_rate": 0.0001, |
|
"loss": 0.077, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.1027027027027028, |
|
"grad_norm": 0.10294321295155191, |
|
"learning_rate": 9.936916228586028e-05, |
|
"loss": 0.0756, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.1063063063063063, |
|
"grad_norm": 0.09262673197538004, |
|
"learning_rate": 9.873834967654964e-05, |
|
"loss": 0.0583, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.10990990990991, |
|
"grad_norm": 0.09476725567348886, |
|
"learning_rate": 9.810758727589813e-05, |
|
"loss": 0.0659, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.1135135135135135, |
|
"grad_norm": 0.10386525977756432, |
|
"learning_rate": 9.747690018573757e-05, |
|
"loss": 0.0604, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.117117117117117, |
|
"grad_norm": 0.1368700701066548, |
|
"learning_rate": 9.684631350490287e-05, |
|
"loss": 0.0728, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.1207207207207208, |
|
"grad_norm": 0.1432741421446136, |
|
"learning_rate": 9.621585232823298e-05, |
|
"loss": 0.0922, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.1243243243243244, |
|
"grad_norm": 0.11931342929926518, |
|
"learning_rate": 9.55855417455723e-05, |
|
"loss": 0.0763, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.127927927927928, |
|
"grad_norm": 0.10803701150902781, |
|
"learning_rate": 9.495540684077216e-05, |
|
"loss": 0.0661, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.1315315315315315, |
|
"grad_norm": 0.12884484782558658, |
|
"learning_rate": 9.432547269069261e-05, |
|
"loss": 0.0606, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.135135135135135, |
|
"grad_norm": 0.1344643405568192, |
|
"learning_rate": 9.36957643642045e-05, |
|
"loss": 0.0779, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.1387387387387387, |
|
"grad_norm": 0.12760304961299287, |
|
"learning_rate": 9.306630692119182e-05, |
|
"loss": 0.0603, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.1423423423423422, |
|
"grad_norm": 0.11600505233354397, |
|
"learning_rate": 9.243712541155436e-05, |
|
"loss": 0.0731, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.145945945945946, |
|
"grad_norm": 0.12470577794958661, |
|
"learning_rate": 9.180824487421077e-05, |
|
"loss": 0.0712, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.1495495495495496, |
|
"grad_norm": 0.1612422337136671, |
|
"learning_rate": 9.117969033610236e-05, |
|
"loss": 0.0683, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.1531531531531531, |
|
"grad_norm": 0.14102273355899492, |
|
"learning_rate": 9.055148681119688e-05, |
|
"loss": 0.0674, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.1567567567567567, |
|
"grad_norm": 0.14126790004481535, |
|
"learning_rate": 8.992365929949319e-05, |
|
"loss": 0.0812, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.1603603603603603, |
|
"grad_norm": 0.15229918606873402, |
|
"learning_rate": 8.929623278602627e-05, |
|
"loss": 0.0701, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.163963963963964, |
|
"grad_norm": 0.15483466842409133, |
|
"learning_rate": 8.866923223987302e-05, |
|
"loss": 0.0736, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.1675675675675676, |
|
"grad_norm": 0.14180233144913557, |
|
"learning_rate": 8.80426826131585e-05, |
|
"loss": 0.0783, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.1711711711711712, |
|
"grad_norm": 0.13636974208873606, |
|
"learning_rate": 8.741660884006303e-05, |
|
"loss": 0.0694, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.1747747747747748, |
|
"grad_norm": 0.09259441173399019, |
|
"learning_rate": 8.679103583582979e-05, |
|
"loss": 0.0524, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.1783783783783783, |
|
"grad_norm": 0.13715824142259708, |
|
"learning_rate": 8.616598849577333e-05, |
|
"loss": 0.08, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.181981981981982, |
|
"grad_norm": 0.12376655099748206, |
|
"learning_rate": 8.554149169428894e-05, |
|
"loss": 0.0784, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.1855855855855855, |
|
"grad_norm": 0.09319112288968635, |
|
"learning_rate": 8.491757028386263e-05, |
|
"loss": 0.0586, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.1891891891891893, |
|
"grad_norm": 0.10475390780150441, |
|
"learning_rate": 8.429424909408214e-05, |
|
"loss": 0.0563, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.1927927927927928, |
|
"grad_norm": 0.138568291584437, |
|
"learning_rate": 8.367155293064878e-05, |
|
"loss": 0.0894, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.1963963963963964, |
|
"grad_norm": 0.10228738965627485, |
|
"learning_rate": 8.304950657439033e-05, |
|
"loss": 0.0571, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.09804366197069557, |
|
"learning_rate": 8.242813478027492e-05, |
|
"loss": 0.0632, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.2036036036036035, |
|
"grad_norm": 0.09408876005476795, |
|
"learning_rate": 8.180746227642562e-05, |
|
"loss": 0.0553, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.2072072072072073, |
|
"grad_norm": 0.13733518876900813, |
|
"learning_rate": 8.118751376313664e-05, |
|
"loss": 0.074, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.2108108108108109, |
|
"grad_norm": 0.11537452385210425, |
|
"learning_rate": 8.056831391189023e-05, |
|
"loss": 0.0686, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.2144144144144144, |
|
"grad_norm": 0.1072867298305809, |
|
"learning_rate": 7.99498873643749e-05, |
|
"loss": 0.0628, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.218018018018018, |
|
"grad_norm": 0.10612605300014923, |
|
"learning_rate": 7.93322587315047e-05, |
|
"loss": 0.0678, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.2216216216216216, |
|
"grad_norm": 0.09775413101898157, |
|
"learning_rate": 7.87154525924399e-05, |
|
"loss": 0.0577, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.2252252252252251, |
|
"grad_norm": 0.11955490707565056, |
|
"learning_rate": 7.809949349360872e-05, |
|
"loss": 0.0576, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.2288288288288287, |
|
"grad_norm": 0.10380886011793584, |
|
"learning_rate": 7.74844059477306e-05, |
|
"loss": 0.0603, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.2324324324324325, |
|
"grad_norm": 0.13589511087320075, |
|
"learning_rate": 7.687021443284071e-05, |
|
"loss": 0.0773, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.236036036036036, |
|
"grad_norm": 0.1167184661521862, |
|
"learning_rate": 7.625694339131564e-05, |
|
"loss": 0.0677, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.2396396396396396, |
|
"grad_norm": 0.12624459810290067, |
|
"learning_rate": 7.564461722890081e-05, |
|
"loss": 0.0802, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.2432432432432432, |
|
"grad_norm": 0.12261184267145957, |
|
"learning_rate": 7.503326031373931e-05, |
|
"loss": 0.0649, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.2468468468468468, |
|
"grad_norm": 0.16140905294131228, |
|
"learning_rate": 7.442289697540201e-05, |
|
"loss": 0.0648, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.2504504504504506, |
|
"grad_norm": 0.1369989260957558, |
|
"learning_rate": 7.381355150391933e-05, |
|
"loss": 0.074, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.2540540540540541, |
|
"grad_norm": 0.10405503701690619, |
|
"learning_rate": 7.32052481488147e-05, |
|
"loss": 0.0683, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.2576576576576577, |
|
"grad_norm": 0.11234589174920957, |
|
"learning_rate": 7.25980111181394e-05, |
|
"loss": 0.0643, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.2612612612612613, |
|
"grad_norm": 0.09321884974416474, |
|
"learning_rate": 7.19918645775093e-05, |
|
"loss": 0.0571, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.2648648648648648, |
|
"grad_norm": 0.12641606453495435, |
|
"learning_rate": 7.138683264914314e-05, |
|
"loss": 0.0702, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.2684684684684684, |
|
"grad_norm": 0.09916971519672783, |
|
"learning_rate": 7.078293941090249e-05, |
|
"loss": 0.0669, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.272072072072072, |
|
"grad_norm": 0.11635429968669815, |
|
"learning_rate": 7.018020889533348e-05, |
|
"loss": 0.071, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.2756756756756757, |
|
"grad_norm": 0.1634329754196539, |
|
"learning_rate": 6.957866508871068e-05, |
|
"loss": 0.0956, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.2792792792792793, |
|
"grad_norm": 0.10617306865400682, |
|
"learning_rate": 6.897833193008231e-05, |
|
"loss": 0.0601, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.2828828828828829, |
|
"grad_norm": 0.09334802201378282, |
|
"learning_rate": 6.83792333103176e-05, |
|
"loss": 0.0633, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.2864864864864864, |
|
"grad_norm": 0.09803158330755328, |
|
"learning_rate": 6.77813930711561e-05, |
|
"loss": 0.059, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.29009009009009, |
|
"grad_norm": 0.1431245474215182, |
|
"learning_rate": 6.718483500425867e-05, |
|
"loss": 0.0942, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.2936936936936938, |
|
"grad_norm": 0.093782557814536, |
|
"learning_rate": 6.658958285026102e-05, |
|
"loss": 0.0606, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.2972972972972974, |
|
"grad_norm": 0.11393416329015467, |
|
"learning_rate": 6.599566029782863e-05, |
|
"loss": 0.0717, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.300900900900901, |
|
"grad_norm": 0.13923517110417213, |
|
"learning_rate": 6.540309098271416e-05, |
|
"loss": 0.0702, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.3045045045045045, |
|
"grad_norm": 0.11247902438190675, |
|
"learning_rate": 6.48118984868167e-05, |
|
"loss": 0.0671, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.308108108108108, |
|
"grad_norm": 0.08843201816576483, |
|
"learning_rate": 6.42221063372436e-05, |
|
"loss": 0.0528, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.3117117117117116, |
|
"grad_norm": 0.09716724428964281, |
|
"learning_rate": 6.363373800537387e-05, |
|
"loss": 0.064, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.3153153153153152, |
|
"grad_norm": 0.15158524613568697, |
|
"learning_rate": 6.304681690592431e-05, |
|
"loss": 0.0704, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.318918918918919, |
|
"grad_norm": 0.16465157701305033, |
|
"learning_rate": 6.246136639601764e-05, |
|
"loss": 0.0834, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.3225225225225226, |
|
"grad_norm": 0.12163370539770614, |
|
"learning_rate": 6.187740977425285e-05, |
|
"loss": 0.0734, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.3261261261261261, |
|
"grad_norm": 0.10264467748684972, |
|
"learning_rate": 6.129497027977829e-05, |
|
"loss": 0.0688, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.3297297297297297, |
|
"grad_norm": 0.0908877614772114, |
|
"learning_rate": 6.071407109136662e-05, |
|
"loss": 0.056, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.13375636202891336, |
|
"learning_rate": 6.0134735326492456e-05, |
|
"loss": 0.0775, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.336936936936937, |
|
"grad_norm": 0.11306366286295132, |
|
"learning_rate": 5.955698604041231e-05, |
|
"loss": 0.0609, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.3405405405405406, |
|
"grad_norm": 0.1273835553046169, |
|
"learning_rate": 5.8980846225247286e-05, |
|
"loss": 0.0653, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.3441441441441442, |
|
"grad_norm": 0.10437088258018255, |
|
"learning_rate": 5.8406338809067874e-05, |
|
"loss": 0.0639, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.3477477477477477, |
|
"grad_norm": 0.10184725937214306, |
|
"learning_rate": 5.7833486654981606e-05, |
|
"loss": 0.0531, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.1439045913318642, |
|
"learning_rate": 5.726231256022316e-05, |
|
"loss": 0.0756, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.3549549549549549, |
|
"grad_norm": 0.1161759648717375, |
|
"learning_rate": 5.669283925524715e-05, |
|
"loss": 0.0564, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.3585585585585584, |
|
"grad_norm": 0.14605792072752058, |
|
"learning_rate": 5.6125089402823485e-05, |
|
"loss": 0.0676, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.3621621621621622, |
|
"grad_norm": 0.11801235296837954, |
|
"learning_rate": 5.555908559713561e-05, |
|
"loss": 0.0922, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.3657657657657658, |
|
"grad_norm": 0.0986919703756885, |
|
"learning_rate": 5.4994850362881214e-05, |
|
"loss": 0.0624, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.3693693693693694, |
|
"grad_norm": 0.10287784532358656, |
|
"learning_rate": 5.443240615437586e-05, |
|
"loss": 0.0692, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.372972972972973, |
|
"grad_norm": 0.1340740211739596, |
|
"learning_rate": 5.387177535465945e-05, |
|
"loss": 0.0835, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.3765765765765765, |
|
"grad_norm": 0.138304807951036, |
|
"learning_rate": 5.331298027460539e-05, |
|
"loss": 0.0749, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.3801801801801803, |
|
"grad_norm": 0.11145520456541595, |
|
"learning_rate": 5.275604315203293e-05, |
|
"loss": 0.062, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.3837837837837839, |
|
"grad_norm": 0.09247928908715367, |
|
"learning_rate": 5.2200986150821696e-05, |
|
"loss": 0.0571, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.3873873873873874, |
|
"grad_norm": 0.1101460184702829, |
|
"learning_rate": 5.164783136003027e-05, |
|
"loss": 0.076, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.390990990990991, |
|
"grad_norm": 0.09455088802615286, |
|
"learning_rate": 5.109660079301668e-05, |
|
"loss": 0.0708, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.3945945945945946, |
|
"grad_norm": 0.12915569506877214, |
|
"learning_rate": 5.0547316386562507e-05, |
|
"loss": 0.0683, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.3981981981981981, |
|
"grad_norm": 0.14298967602118384, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0802, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.4018018018018017, |
|
"grad_norm": 0.13870069410611544, |
|
"learning_rate": 4.945467341434195e-05, |
|
"loss": 0.0849, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.4054054054054055, |
|
"grad_norm": 0.1542562430256231, |
|
"learning_rate": 4.891135833141495e-05, |
|
"loss": 0.0875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.409009009009009, |
|
"grad_norm": 0.11965550746042822, |
|
"learning_rate": 4.837007637299595e-05, |
|
"loss": 0.0599, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.4126126126126126, |
|
"grad_norm": 0.13624840902702007, |
|
"learning_rate": 4.783084907995156e-05, |
|
"loss": 0.0805, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.4162162162162162, |
|
"grad_norm": 0.0916114816428032, |
|
"learning_rate": 4.729369791138085e-05, |
|
"loss": 0.0523, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.4198198198198198, |
|
"grad_norm": 0.10008527767691701, |
|
"learning_rate": 4.675864424376146e-05, |
|
"loss": 0.0623, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.4234234234234235, |
|
"grad_norm": 0.1329413030801314, |
|
"learning_rate": 4.622570937009879e-05, |
|
"loss": 0.0768, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.427027027027027, |
|
"grad_norm": 0.09990109338082896, |
|
"learning_rate": 4.569491449907878e-05, |
|
"loss": 0.0624, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.4306306306306307, |
|
"grad_norm": 0.10994747606781914, |
|
"learning_rate": 4.5166280754223676e-05, |
|
"loss": 0.0554, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.4342342342342342, |
|
"grad_norm": 0.11716673038318011, |
|
"learning_rate": 4.4639829173051554e-05, |
|
"loss": 0.0487, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.4378378378378378, |
|
"grad_norm": 0.14740947528399398, |
|
"learning_rate": 4.411558070623907e-05, |
|
"loss": 0.0705, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.4414414414414414, |
|
"grad_norm": 0.10905802122912368, |
|
"learning_rate": 4.359355621678764e-05, |
|
"loss": 0.0669, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.445045045045045, |
|
"grad_norm": 0.1372304462996132, |
|
"learning_rate": 4.307377647919343e-05, |
|
"loss": 0.0754, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.4486486486486487, |
|
"grad_norm": 0.1220267603792114, |
|
"learning_rate": 4.255626217862013e-05, |
|
"loss": 0.061, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.4522522522522523, |
|
"grad_norm": 0.10857496168656595, |
|
"learning_rate": 4.204103391007623e-05, |
|
"loss": 0.0666, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.4558558558558559, |
|
"grad_norm": 0.12862098370827468, |
|
"learning_rate": 4.152811217759529e-05, |
|
"loss": 0.0702, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.4594594594594594, |
|
"grad_norm": 0.12121566089700932, |
|
"learning_rate": 4.1017517393419826e-05, |
|
"loss": 0.0701, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.463063063063063, |
|
"grad_norm": 0.12880561806718493, |
|
"learning_rate": 4.0509269877189106e-05, |
|
"loss": 0.0855, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 0.1103245174300563, |
|
"learning_rate": 4.000338985513046e-05, |
|
"loss": 0.0645, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.4702702702702704, |
|
"grad_norm": 0.11471559496567096, |
|
"learning_rate": 3.9499897459254375e-05, |
|
"loss": 0.069, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.473873873873874, |
|
"grad_norm": 0.1068585326163203, |
|
"learning_rate": 3.899881272655342e-05, |
|
"loss": 0.0584, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.4774774774774775, |
|
"grad_norm": 0.12180507516800447, |
|
"learning_rate": 3.8500155598204644e-05, |
|
"loss": 0.0767, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.481081081081081, |
|
"grad_norm": 0.10787694069789268, |
|
"learning_rate": 3.8003945918776143e-05, |
|
"loss": 0.0652, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.4846846846846846, |
|
"grad_norm": 0.11726705088810202, |
|
"learning_rate": 3.75102034354373e-05, |
|
"loss": 0.0629, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.4882882882882882, |
|
"grad_norm": 0.10762739211432558, |
|
"learning_rate": 3.701894779717286e-05, |
|
"loss": 0.0555, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.491891891891892, |
|
"grad_norm": 0.09308134521067353, |
|
"learning_rate": 3.653019855400123e-05, |
|
"loss": 0.0564, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.4954954954954955, |
|
"grad_norm": 0.1524049565519016, |
|
"learning_rate": 3.6043975156195987e-05, |
|
"loss": 0.0809, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.499099099099099, |
|
"grad_norm": 0.13279511071977756, |
|
"learning_rate": 3.5560296953512295e-05, |
|
"loss": 0.069, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.5027027027027027, |
|
"grad_norm": 0.13286192000673344, |
|
"learning_rate": 3.507918319441672e-05, |
|
"loss": 0.0748, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.5063063063063065, |
|
"grad_norm": 0.17493415887546465, |
|
"learning_rate": 3.460065302532108e-05, |
|
"loss": 0.0828, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.50990990990991, |
|
"grad_norm": 0.11057059837297634, |
|
"learning_rate": 3.4124725489820645e-05, |
|
"loss": 0.063, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.5135135135135136, |
|
"grad_norm": 0.12735355112497881, |
|
"learning_rate": 3.365141952793622e-05, |
|
"loss": 0.0732, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.5171171171171172, |
|
"grad_norm": 0.1632337270590702, |
|
"learning_rate": 3.3180753975360415e-05, |
|
"loss": 0.0775, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.5207207207207207, |
|
"grad_norm": 0.14697481590979153, |
|
"learning_rate": 3.2712747562708115e-05, |
|
"loss": 0.0863, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.5243243243243243, |
|
"grad_norm": 0.1660001616377742, |
|
"learning_rate": 3.224741891477095e-05, |
|
"loss": 0.082, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.5279279279279279, |
|
"grad_norm": 0.13048897912831442, |
|
"learning_rate": 3.178478654977624e-05, |
|
"loss": 0.0794, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.5315315315315314, |
|
"grad_norm": 0.11489435507746414, |
|
"learning_rate": 3.132486887864992e-05, |
|
"loss": 0.0694, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.535135135135135, |
|
"grad_norm": 0.07639223925814809, |
|
"learning_rate": 3.086768420428392e-05, |
|
"loss": 0.0413, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.5387387387387388, |
|
"grad_norm": 0.11453408038112112, |
|
"learning_rate": 3.0413250720807883e-05, |
|
"loss": 0.0658, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.5423423423423424, |
|
"grad_norm": 0.1667771876584119, |
|
"learning_rate": 2.9961586512864947e-05, |
|
"loss": 0.0798, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.545945945945946, |
|
"grad_norm": 0.11714414917895159, |
|
"learning_rate": 2.9512709554892003e-05, |
|
"loss": 0.0693, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.5495495495495497, |
|
"grad_norm": 0.1229110888950789, |
|
"learning_rate": 2.9066637710404675e-05, |
|
"loss": 0.0747, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.5531531531531533, |
|
"grad_norm": 0.1759221433787884, |
|
"learning_rate": 2.8623388731286093e-05, |
|
"loss": 0.0795, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.5567567567567568, |
|
"grad_norm": 0.09594983270629338, |
|
"learning_rate": 2.818298025708075e-05, |
|
"loss": 0.059, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.5603603603603604, |
|
"grad_norm": 0.090367685537466, |
|
"learning_rate": 2.7745429814292145e-05, |
|
"loss": 0.0531, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.563963963963964, |
|
"grad_norm": 0.13195671170326462, |
|
"learning_rate": 2.7310754815685624e-05, |
|
"loss": 0.075, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.5675675675675675, |
|
"grad_norm": 0.09101802024714302, |
|
"learning_rate": 2.687897255959536e-05, |
|
"loss": 0.0541, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.571171171171171, |
|
"grad_norm": 0.12030212380166792, |
|
"learning_rate": 2.6450100229235795e-05, |
|
"loss": 0.0739, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.5747747747747747, |
|
"grad_norm": 0.09282633668248956, |
|
"learning_rate": 2.6024154892017937e-05, |
|
"loss": 0.0585, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.5783783783783782, |
|
"grad_norm": 0.1437367048181832, |
|
"learning_rate": 2.5601153498870134e-05, |
|
"loss": 0.0744, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.581981981981982, |
|
"grad_norm": 0.12980503831327814, |
|
"learning_rate": 2.518111288356345e-05, |
|
"loss": 0.0741, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.5855855855855856, |
|
"grad_norm": 0.12335416962202174, |
|
"learning_rate": 2.4764049762041874e-05, |
|
"loss": 0.072, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.5891891891891892, |
|
"grad_norm": 0.10492372446642, |
|
"learning_rate": 2.4349980731756894e-05, |
|
"loss": 0.0716, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.592792792792793, |
|
"grad_norm": 0.14899587708147966, |
|
"learning_rate": 2.3938922271007147e-05, |
|
"loss": 0.0925, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.5963963963963965, |
|
"grad_norm": 0.13084935137363646, |
|
"learning_rate": 2.353089073828255e-05, |
|
"loss": 0.0823, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.13030296264089844, |
|
"learning_rate": 2.312590237161335e-05, |
|
"loss": 0.0725, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.6036036036036037, |
|
"grad_norm": 0.09700200874554347, |
|
"learning_rate": 2.2723973287923962e-05, |
|
"loss": 0.0664, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.6072072072072072, |
|
"grad_norm": 0.10633986810502474, |
|
"learning_rate": 2.2325119482391467e-05, |
|
"loss": 0.0679, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.6108108108108108, |
|
"grad_norm": 0.11142544471141212, |
|
"learning_rate": 2.1929356827809057e-05, |
|
"loss": 0.0614, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.6144144144144144, |
|
"grad_norm": 0.09250950825140586, |
|
"learning_rate": 2.1536701073954558e-05, |
|
"loss": 0.0588, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.618018018018018, |
|
"grad_norm": 0.1332480809778604, |
|
"learning_rate": 2.1147167846963422e-05, |
|
"loss": 0.0803, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 0.14121311299673173, |
|
"learning_rate": 2.0760772648707016e-05, |
|
"loss": 0.0947, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6252252252252253, |
|
"grad_norm": 0.10826628920408898, |
|
"learning_rate": 2.037753085617563e-05, |
|
"loss": 0.0704, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.6288288288288288, |
|
"grad_norm": 0.08498951848877008, |
|
"learning_rate": 1.999745772086655e-05, |
|
"loss": 0.0416, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.6324324324324324, |
|
"grad_norm": 0.15866638731251254, |
|
"learning_rate": 1.9620568368177184e-05, |
|
"loss": 0.0836, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.6360360360360362, |
|
"grad_norm": 0.11879058663162903, |
|
"learning_rate": 1.924687779680302e-05, |
|
"loss": 0.0721, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.6396396396396398, |
|
"grad_norm": 0.16212624210699175, |
|
"learning_rate": 1.8876400878140775e-05, |
|
"loss": 0.0788, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.6432432432432433, |
|
"grad_norm": 0.1205599470750737, |
|
"learning_rate": 1.8509152355696623e-05, |
|
"loss": 0.0891, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.646846846846847, |
|
"grad_norm": 0.10723522883909493, |
|
"learning_rate": 1.8145146844499383e-05, |
|
"loss": 0.0702, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.6504504504504505, |
|
"grad_norm": 0.10243191710751155, |
|
"learning_rate": 1.7784398830519e-05, |
|
"loss": 0.0558, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.654054054054054, |
|
"grad_norm": 0.08896982101996669, |
|
"learning_rate": 1.742692267008996e-05, |
|
"loss": 0.0603, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.6576576576576576, |
|
"grad_norm": 0.13618088732046185, |
|
"learning_rate": 1.7072732589339955e-05, |
|
"loss": 0.0744, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.6612612612612612, |
|
"grad_norm": 0.14798363521421304, |
|
"learning_rate": 1.672184268362391e-05, |
|
"loss": 0.0875, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.6648648648648647, |
|
"grad_norm": 0.10230547180028401, |
|
"learning_rate": 1.6374266916962832e-05, |
|
"loss": 0.0497, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.6684684684684683, |
|
"grad_norm": 0.10092039325506573, |
|
"learning_rate": 1.6030019121488227e-05, |
|
"loss": 0.0611, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.672072072072072, |
|
"grad_norm": 0.10528958039623297, |
|
"learning_rate": 1.5689112996891576e-05, |
|
"loss": 0.0719, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.6756756756756757, |
|
"grad_norm": 0.1357872146845403, |
|
"learning_rate": 1.535156210987917e-05, |
|
"loss": 0.0792, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.6792792792792792, |
|
"grad_norm": 0.16092347024456755, |
|
"learning_rate": 1.5017379893632255e-05, |
|
"loss": 0.0984, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.682882882882883, |
|
"grad_norm": 0.152681018425233, |
|
"learning_rate": 1.4686579647272336e-05, |
|
"loss": 0.0665, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.6864864864864866, |
|
"grad_norm": 0.10940364333158531, |
|
"learning_rate": 1.4359174535331999e-05, |
|
"loss": 0.0678, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.6900900900900901, |
|
"grad_norm": 0.10224571124448578, |
|
"learning_rate": 1.4035177587230996e-05, |
|
"loss": 0.0681, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.6936936936936937, |
|
"grad_norm": 0.1117508851307017, |
|
"learning_rate": 1.3714601696757712e-05, |
|
"loss": 0.0705, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.6972972972972973, |
|
"grad_norm": 0.11380708386619733, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.0787, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.7009009009009008, |
|
"grad_norm": 0.13014691697383945, |
|
"learning_rate": 1.3083763982618025e-05, |
|
"loss": 0.0746, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.7045045045045044, |
|
"grad_norm": 0.14178781462677711, |
|
"learning_rate": 1.2773527263780626e-05, |
|
"loss": 0.0802, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.708108108108108, |
|
"grad_norm": 0.13903277880623008, |
|
"learning_rate": 1.2466761811230098e-05, |
|
"loss": 0.0727, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.7117117117117115, |
|
"grad_norm": 0.14354542440047877, |
|
"learning_rate": 1.2163479833009894e-05, |
|
"loss": 0.0698, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.7153153153153153, |
|
"grad_norm": 0.12299158086851347, |
|
"learning_rate": 1.1863693398535114e-05, |
|
"loss": 0.0659, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.718918918918919, |
|
"grad_norm": 0.09882489616646957, |
|
"learning_rate": 1.1567414438112156e-05, |
|
"loss": 0.0626, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.7225225225225225, |
|
"grad_norm": 0.11570669617801674, |
|
"learning_rate": 1.1274654742463841e-05, |
|
"loss": 0.0646, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.7261261261261263, |
|
"grad_norm": 0.09504741769642676, |
|
"learning_rate": 1.0985425962260343e-05, |
|
"loss": 0.0587, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.7297297297297298, |
|
"grad_norm": 0.12981804877027894, |
|
"learning_rate": 1.0699739607655435e-05, |
|
"loss": 0.0646, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 0.10868480851483209, |
|
"learning_rate": 1.0417607047828426e-05, |
|
"loss": 0.0671, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.736936936936937, |
|
"grad_norm": 0.13696903097667856, |
|
"learning_rate": 1.01390395105318e-05, |
|
"loss": 0.0786, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.7405405405405405, |
|
"grad_norm": 0.1157828731078967, |
|
"learning_rate": 9.864048081644261e-06, |
|
"loss": 0.0714, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.744144144144144, |
|
"grad_norm": 0.08900480095300936, |
|
"learning_rate": 9.592643704729753e-06, |
|
"loss": 0.0544, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.7477477477477477, |
|
"grad_norm": 0.10882498606926169, |
|
"learning_rate": 9.324837180601741e-06, |
|
"loss": 0.0645, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.7513513513513512, |
|
"grad_norm": 0.1879024889958637, |
|
"learning_rate": 9.060639166893493e-06, |
|
"loss": 0.0682, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.7549549549549548, |
|
"grad_norm": 0.1072698067292599, |
|
"learning_rate": 8.80006017763395e-06, |
|
"loss": 0.0558, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.7585585585585586, |
|
"grad_norm": 0.1177990807822665, |
|
"learning_rate": 8.543110582829272e-06, |
|
"loss": 0.0592, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.7621621621621621, |
|
"grad_norm": 0.1144052781708329, |
|
"learning_rate": 8.289800608050202e-06, |
|
"loss": 0.0685, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.7657657657657657, |
|
"grad_norm": 0.13290723111377578, |
|
"learning_rate": 8.040140334025082e-06, |
|
"loss": 0.0787, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.7693693693693695, |
|
"grad_norm": 0.14325581569648474, |
|
"learning_rate": 7.794139696238645e-06, |
|
"loss": 0.0767, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.772972972972973, |
|
"grad_norm": 0.13940373462189817, |
|
"learning_rate": 7.551808484536782e-06, |
|
"loss": 0.0713, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.7765765765765766, |
|
"grad_norm": 0.13465478912453277, |
|
"learning_rate": 7.313156342736738e-06, |
|
"loss": 0.0838, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.7801801801801802, |
|
"grad_norm": 0.10122791673864097, |
|
"learning_rate": 7.078192768243486e-06, |
|
"loss": 0.0577, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.7837837837837838, |
|
"grad_norm": 0.1571652297573745, |
|
"learning_rate": 6.846927111671686e-06, |
|
"loss": 0.0905, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.7873873873873873, |
|
"grad_norm": 0.12718467808306833, |
|
"learning_rate": 6.61936857647355e-06, |
|
"loss": 0.0566, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.790990990990991, |
|
"grad_norm": 0.15255418113431185, |
|
"learning_rate": 6.395526218572723e-06, |
|
"loss": 0.0646, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.7945945945945945, |
|
"grad_norm": 0.13721542773233897, |
|
"learning_rate": 6.175408946003703e-06, |
|
"loss": 0.0752, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.798198198198198, |
|
"grad_norm": 0.11503986327868332, |
|
"learning_rate": 5.959025518557437e-06, |
|
"loss": 0.0753, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"grad_norm": 0.09966839333280018, |
|
"learning_rate": 5.746384547432737e-06, |
|
"loss": 0.0658, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8054054054054054, |
|
"grad_norm": 0.13097929394232063, |
|
"learning_rate": 5.5374944948935135e-06, |
|
"loss": 0.0647, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.809009009009009, |
|
"grad_norm": 0.11916565098885482, |
|
"learning_rate": 5.332363673932106e-06, |
|
"loss": 0.0683, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.8126126126126128, |
|
"grad_norm": 0.14740354567299807, |
|
"learning_rate": 5.131000247938367e-06, |
|
"loss": 0.0855, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.8162162162162163, |
|
"grad_norm": 0.12344512852482942, |
|
"learning_rate": 4.933412230374812e-06, |
|
"loss": 0.0689, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.8198198198198199, |
|
"grad_norm": 0.15079351397270196, |
|
"learning_rate": 4.7396074844577975e-06, |
|
"loss": 0.0865, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.8234234234234235, |
|
"grad_norm": 0.13387613481208308, |
|
"learning_rate": 4.549593722844492e-06, |
|
"loss": 0.0761, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.827027027027027, |
|
"grad_norm": 0.07611010028345803, |
|
"learning_rate": 4.363378507325955e-06, |
|
"loss": 0.0431, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.8306306306306306, |
|
"grad_norm": 0.13798397287470085, |
|
"learning_rate": 4.180969248526334e-06, |
|
"loss": 0.0806, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.8342342342342342, |
|
"grad_norm": 0.17673529491591075, |
|
"learning_rate": 4.002373205607723e-06, |
|
"loss": 0.0974, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.8378378378378377, |
|
"grad_norm": 0.09469787111672756, |
|
"learning_rate": 3.827597485981527e-06, |
|
"loss": 0.0592, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.8414414414414413, |
|
"grad_norm": 0.10244301409759372, |
|
"learning_rate": 3.6566490450254286e-06, |
|
"loss": 0.0629, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.845045045045045, |
|
"grad_norm": 0.09239653678543874, |
|
"learning_rate": 3.4895346858066724e-06, |
|
"loss": 0.0637, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.8486486486486486, |
|
"grad_norm": 0.136056185401784, |
|
"learning_rate": 3.3262610588113307e-06, |
|
"loss": 0.0784, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.8522522522522522, |
|
"grad_norm": 0.12238600483310333, |
|
"learning_rate": 3.1668346616795963e-06, |
|
"loss": 0.0703, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.855855855855856, |
|
"grad_norm": 0.15777844339693373, |
|
"learning_rate": 3.011261838947277e-06, |
|
"loss": 0.0861, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.8594594594594596, |
|
"grad_norm": 0.149256801647933, |
|
"learning_rate": 2.859548781793242e-06, |
|
"loss": 0.0816, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.8630630630630631, |
|
"grad_norm": 0.1158625228168308, |
|
"learning_rate": 2.711701527793031e-06, |
|
"loss": 0.0757, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.127467190710186, |
|
"learning_rate": 2.5677259606786684e-06, |
|
"loss": 0.0718, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.8702702702702703, |
|
"grad_norm": 0.11178985379767997, |
|
"learning_rate": 2.4276278101044046e-06, |
|
"loss": 0.0729, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.8738738738738738, |
|
"grad_norm": 0.13318873038147186, |
|
"learning_rate": 2.291412651418778e-06, |
|
"loss": 0.0874, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.8774774774774774, |
|
"grad_norm": 0.11095794613520046, |
|
"learning_rate": 2.159085905442737e-06, |
|
"loss": 0.0632, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.881081081081081, |
|
"grad_norm": 0.13466988936312552, |
|
"learning_rate": 2.03065283825381e-06, |
|
"loss": 0.0696, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.8846846846846845, |
|
"grad_norm": 0.1501784745699571, |
|
"learning_rate": 1.9061185609766995e-06, |
|
"loss": 0.0781, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.8882882882882883, |
|
"grad_norm": 0.12439813775960844, |
|
"learning_rate": 1.7854880295797405e-06, |
|
"loss": 0.0778, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 0.10543026463333441, |
|
"learning_rate": 1.6687660446777277e-06, |
|
"loss": 0.0658, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.8954954954954955, |
|
"grad_norm": 0.1493387910647338, |
|
"learning_rate": 1.5559572513409338e-06, |
|
"loss": 0.0733, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.8990990990990992, |
|
"grad_norm": 0.11530728589499968, |
|
"learning_rate": 1.4470661389100804e-06, |
|
"loss": 0.066, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.9027027027027028, |
|
"grad_norm": 0.0937287880770091, |
|
"learning_rate": 1.3420970408178913e-06, |
|
"loss": 0.0508, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.9063063063063064, |
|
"grad_norm": 0.1140049075150384, |
|
"learning_rate": 1.241054134416464e-06, |
|
"loss": 0.0722, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.90990990990991, |
|
"grad_norm": 0.10812439212587106, |
|
"learning_rate": 1.143941440811147e-06, |
|
"loss": 0.0607, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.9135135135135135, |
|
"grad_norm": 0.10082812889823659, |
|
"learning_rate": 1.0507628247004465e-06, |
|
"loss": 0.0611, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.917117117117117, |
|
"grad_norm": 0.1646769037018901, |
|
"learning_rate": 9.615219942222474e-07, |
|
"loss": 0.0907, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.9207207207207206, |
|
"grad_norm": 0.13342387053057897, |
|
"learning_rate": 8.762225008062674e-07, |
|
"loss": 0.0705, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.9243243243243242, |
|
"grad_norm": 0.10099025782756708, |
|
"learning_rate": 7.948677390326786e-07, |
|
"loss": 0.0594, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.9279279279279278, |
|
"grad_norm": 0.10984097417427825, |
|
"learning_rate": 7.174609464970505e-07, |
|
"loss": 0.064, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.9315315315315316, |
|
"grad_norm": 0.09250058769796181, |
|
"learning_rate": 6.440052036815081e-07, |
|
"loss": 0.0488, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.9351351351351351, |
|
"grad_norm": 0.11346138285376553, |
|
"learning_rate": 5.745034338321187e-07, |
|
"loss": 0.0709, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.9387387387387387, |
|
"grad_norm": 0.12145882785931153, |
|
"learning_rate": 5.089584028425743e-07, |
|
"loss": 0.0628, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.9423423423423425, |
|
"grad_norm": 0.09764192386128882, |
|
"learning_rate": 4.4737271914411236e-07, |
|
"loss": 0.0581, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.945945945945946, |
|
"grad_norm": 0.11336911485782475, |
|
"learning_rate": 3.8974883360169966e-07, |
|
"loss": 0.0652, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.9495495495495496, |
|
"grad_norm": 0.1197688852133187, |
|
"learning_rate": 3.360890394165539e-07, |
|
"loss": 0.0797, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.9531531531531532, |
|
"grad_norm": 0.12068524258623581, |
|
"learning_rate": 2.86395472034795e-07, |
|
"loss": 0.0796, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.9567567567567568, |
|
"grad_norm": 0.10022614951335301, |
|
"learning_rate": 2.4067010906254626e-07, |
|
"loss": 0.0651, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.9603603603603603, |
|
"grad_norm": 0.1030646452744324, |
|
"learning_rate": 1.989147701871641e-07, |
|
"loss": 0.0536, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.9639639639639639, |
|
"grad_norm": 0.07714530191454869, |
|
"learning_rate": 1.611311171048735e-07, |
|
"loss": 0.0494, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.9675675675675675, |
|
"grad_norm": 0.1157905922358179, |
|
"learning_rate": 1.2732065345462118e-07, |
|
"loss": 0.075, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.971171171171171, |
|
"grad_norm": 0.07486976400470267, |
|
"learning_rate": 9.748472475823444e-08, |
|
"loss": 0.0454, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.9747747747747748, |
|
"grad_norm": 0.09262953733370366, |
|
"learning_rate": 7.162451836685291e-08, |
|
"loss": 0.0556, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.9783783783783784, |
|
"grad_norm": 0.11616787129919331, |
|
"learning_rate": 4.974106341374407e-08, |
|
"loss": 0.0767, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.981981981981982, |
|
"grad_norm": 0.11058833058189024, |
|
"learning_rate": 3.183523077324724e-08, |
|
"loss": 0.0618, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.9855855855855857, |
|
"grad_norm": 0.10561626473162405, |
|
"learning_rate": 1.7907733026223394e-08, |
|
"loss": 0.0719, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.9891891891891893, |
|
"grad_norm": 0.1403058364172833, |
|
"learning_rate": 7.959124431622389e-09, |
|
"loss": 0.092, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.9927927927927929, |
|
"grad_norm": 0.1326365804299446, |
|
"learning_rate": 1.989800904445005e-09, |
|
"loss": 0.0678, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.9963963963963964, |
|
"grad_norm": 0.14065192361832085, |
|
"learning_rate": 0.0, |
|
"loss": 0.0822, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.9963963963963964, |
|
"eval_loss": 0.13327383995056152, |
|
"eval_runtime": 50.2794, |
|
"eval_samples_per_second": 4.654, |
|
"eval_steps_per_second": 0.597, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.9321739130434783, |
|
"grad_norm": 0.2707502844732912, |
|
"learning_rate": 6.683334509453465e-07, |
|
"loss": 0.1107, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.9356521739130435, |
|
"grad_norm": 0.3664545335936956, |
|
"learning_rate": 5.999024760054095e-07, |
|
"loss": 0.1484, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.9391304347826086, |
|
"grad_norm": 0.2879184291998937, |
|
"learning_rate": 5.351560625760254e-07, |
|
"loss": 0.1073, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.942608695652174, |
|
"grad_norm": 0.32527132334089365, |
|
"learning_rate": 4.7409661067642217e-07, |
|
"loss": 0.1341, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.9460869565217391, |
|
"grad_norm": 0.27961707371818556, |
|
"learning_rate": 4.167263836575286e-07, |
|
"loss": 0.0999, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.9495652173913043, |
|
"grad_norm": 0.2705336974198295, |
|
"learning_rate": 3.630475081181861e-07, |
|
"loss": 0.0858, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.9530434782608697, |
|
"grad_norm": 0.2630558680757462, |
|
"learning_rate": 3.1306197382624526e-07, |
|
"loss": 0.1232, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.2875514596758492, |
|
"learning_rate": 2.667716336448356e-07, |
|
"loss": 0.1385, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.3682978167407908, |
|
"learning_rate": 2.2417820346367635e-07, |
|
"loss": 0.1103, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.9634782608695653, |
|
"grad_norm": 0.26822875711798694, |
|
"learning_rate": 1.8528326213548274e-07, |
|
"loss": 0.1138, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.9669565217391303, |
|
"grad_norm": 0.2418187498704634, |
|
"learning_rate": 1.50088251417424e-07, |
|
"loss": 0.1105, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.9704347826086956, |
|
"grad_norm": 0.3401484224136178, |
|
"learning_rate": 1.1859447591769934e-07, |
|
"loss": 0.1377, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.973913043478261, |
|
"grad_norm": 0.23339563722483295, |
|
"learning_rate": 9.080310304716567e-08, |
|
"loss": 0.1081, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.977391304347826, |
|
"grad_norm": 0.33523600802230064, |
|
"learning_rate": 6.671516297606095e-08, |
|
"loss": 0.1395, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.9808695652173913, |
|
"grad_norm": 0.22230937288937844, |
|
"learning_rate": 4.6331548595845984e-08, |
|
"loss": 0.0859, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.9843478260869565, |
|
"grad_norm": 0.37789798217710224, |
|
"learning_rate": 2.965301548606414e-08, |
|
"loss": 0.1846, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.9878260869565216, |
|
"grad_norm": 0.29502175807159936, |
|
"learning_rate": 1.6680181886352676e-08, |
|
"loss": 0.1208, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.991304347826087, |
|
"grad_norm": 0.3456726449106421, |
|
"learning_rate": 7.413528673549941e-09, |
|
"loss": 0.1142, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.9947826086956522, |
|
"grad_norm": 0.34376689538870997, |
|
"learning_rate": 1.8533993438318852e-09, |
|
"loss": 0.1442, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.9982608695652173, |
|
"grad_norm": 0.3072940536392916, |
|
"learning_rate": 0.0, |
|
"loss": 0.1067, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.9982608695652173, |
|
"step": 574, |
|
"total_flos": 466200922914816.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 1.1199, |
|
"train_samples_per_second": 8208.056, |
|
"train_steps_per_second": 512.557 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 574, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 466200922914816.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|