|
{ |
|
"best_metric": 1.0187755823135376, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.027062683941679915, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00027062683941679914, |
|
"grad_norm": 0.5000702738761902, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.8662, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00027062683941679914, |
|
"eval_loss": 1.2486605644226074, |
|
"eval_runtime": 576.5633, |
|
"eval_samples_per_second": 10.795, |
|
"eval_steps_per_second": 2.699, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005412536788335983, |
|
"grad_norm": 0.7091050744056702, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.8844, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0008118805182503975, |
|
"grad_norm": 0.7198283076286316, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.8736, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0010825073576671966, |
|
"grad_norm": 0.7402541041374207, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9226, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0013531341970839958, |
|
"grad_norm": 0.6890178918838501, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.9618, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001623761036500795, |
|
"grad_norm": 0.7613320350646973, |
|
"learning_rate": 3e-06, |
|
"loss": 0.9509, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001894387875917594, |
|
"grad_norm": 0.8056195378303528, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.9464, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.002165014715334393, |
|
"grad_norm": 0.8153071403503418, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9766, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0024356415547511926, |
|
"grad_norm": 0.8731986284255981, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.9854, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0027062683941679916, |
|
"grad_norm": 0.8255438804626465, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0802, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0029768952335847906, |
|
"grad_norm": 0.8778969645500183, |
|
"learning_rate": 4.99847706754774e-06, |
|
"loss": 0.9281, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00324752207300159, |
|
"grad_norm": 0.7399764657020569, |
|
"learning_rate": 4.993910125649561e-06, |
|
"loss": 1.0072, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003518148912418389, |
|
"grad_norm": 0.9628943800926208, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 0.9862, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.003788775751835188, |
|
"grad_norm": 0.8942387104034424, |
|
"learning_rate": 4.975670171853926e-06, |
|
"loss": 1.0352, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004059402591251988, |
|
"grad_norm": 0.9490991234779358, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 1.0584, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004330029430668786, |
|
"grad_norm": 0.9122024178504944, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 1.0072, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004600656270085586, |
|
"grad_norm": 0.7996208667755127, |
|
"learning_rate": 4.925739315689991e-06, |
|
"loss": 1.1048, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.004871283109502385, |
|
"grad_norm": 0.9378483891487122, |
|
"learning_rate": 4.903154239845798e-06, |
|
"loss": 1.0365, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005141909948919184, |
|
"grad_norm": 0.8539978861808777, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.9979, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005412536788335983, |
|
"grad_norm": 0.9930011630058289, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 1.0151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005683163627752783, |
|
"grad_norm": 0.9113985300064087, |
|
"learning_rate": 4.817959636416969e-06, |
|
"loss": 1.0266, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005953790467169581, |
|
"grad_norm": 0.9310271739959717, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 0.998, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.006224417306586381, |
|
"grad_norm": 1.0234174728393555, |
|
"learning_rate": 4.746985115747918e-06, |
|
"loss": 1.0837, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00649504414600318, |
|
"grad_norm": 0.890159010887146, |
|
"learning_rate": 4.707368982147318e-06, |
|
"loss": 1.0044, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006765670985419979, |
|
"grad_norm": 1.1107178926467896, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 1.1168, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007036297824836778, |
|
"grad_norm": 1.1049308776855469, |
|
"learning_rate": 4.620120240391065e-06, |
|
"loss": 1.1942, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.007306924664253578, |
|
"grad_norm": 1.1453604698181152, |
|
"learning_rate": 4.572593931387604e-06, |
|
"loss": 1.0531, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.007577551503670376, |
|
"grad_norm": 1.0684839487075806, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 1.0973, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.007848178343087176, |
|
"grad_norm": 1.1629831790924072, |
|
"learning_rate": 4.470026884016805e-06, |
|
"loss": 1.0673, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.008118805182503975, |
|
"grad_norm": 1.0914651155471802, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 1.1575, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008389432021920775, |
|
"grad_norm": 1.3187044858932495, |
|
"learning_rate": 4.357862063693486e-06, |
|
"loss": 1.223, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.008660058861337572, |
|
"grad_norm": 1.2369117736816406, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"loss": 1.1662, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.008930685700754372, |
|
"grad_norm": 1.3803397417068481, |
|
"learning_rate": 4.236645926147493e-06, |
|
"loss": 1.1548, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.009201312540171171, |
|
"grad_norm": 1.3240615129470825, |
|
"learning_rate": 4.172826515897146e-06, |
|
"loss": 1.1982, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00947193937958797, |
|
"grad_norm": 1.5324469804763794, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 1.1681, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00974256621900477, |
|
"grad_norm": 1.323723554611206, |
|
"learning_rate": 4.039153688314146e-06, |
|
"loss": 1.1543, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01001319305842157, |
|
"grad_norm": 1.5759074687957764, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 1.1227, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.010283819897838367, |
|
"grad_norm": 1.6370131969451904, |
|
"learning_rate": 3.897982258676867e-06, |
|
"loss": 1.1549, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.010554446737255167, |
|
"grad_norm": 1.5213303565979004, |
|
"learning_rate": 3.824798160583012e-06, |
|
"loss": 1.1308, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.010825073576671966, |
|
"grad_norm": 1.753510594367981, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.2172, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011095700416088766, |
|
"grad_norm": 1.6697436571121216, |
|
"learning_rate": 3.6736789069647273e-06, |
|
"loss": 1.1446, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.011366327255505565, |
|
"grad_norm": 1.6956145763397217, |
|
"learning_rate": 3.595927866972694e-06, |
|
"loss": 1.0812, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.011636954094922363, |
|
"grad_norm": 1.6840723752975464, |
|
"learning_rate": 3.516841607689501e-06, |
|
"loss": 1.1333, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.011907580934339163, |
|
"grad_norm": 1.8532711267471313, |
|
"learning_rate": 3.436516483539781e-06, |
|
"loss": 1.2552, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.012178207773755962, |
|
"grad_norm": 2.4561421871185303, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"loss": 1.3493, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.012448834613172761, |
|
"grad_norm": 2.4903533458709717, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.2094, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.012719461452589561, |
|
"grad_norm": 3.978550910949707, |
|
"learning_rate": 3.189093389542498e-06, |
|
"loss": 1.508, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01299008829200636, |
|
"grad_norm": 6.7383856773376465, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"loss": 1.5481, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.013260715131423158, |
|
"grad_norm": 7.621368885040283, |
|
"learning_rate": 3.019779227044398e-06, |
|
"loss": 1.7839, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.013531341970839958, |
|
"grad_norm": 7.821316242218018, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"loss": 1.8119, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013531341970839958, |
|
"eval_loss": 1.0674021244049072, |
|
"eval_runtime": 581.3896, |
|
"eval_samples_per_second": 10.705, |
|
"eval_steps_per_second": 2.676, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013801968810256757, |
|
"grad_norm": 0.5559863448143005, |
|
"learning_rate": 2.847932752400164e-06, |
|
"loss": 0.7655, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.014072595649673557, |
|
"grad_norm": 0.678560197353363, |
|
"learning_rate": 2.761321158169134e-06, |
|
"loss": 0.803, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.014343222489090356, |
|
"grad_norm": 0.6894142627716064, |
|
"learning_rate": 2.6743911843603134e-06, |
|
"loss": 0.8397, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.014613849328507155, |
|
"grad_norm": 0.7735072374343872, |
|
"learning_rate": 2.587248741756253e-06, |
|
"loss": 0.8653, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.014884476167923953, |
|
"grad_norm": 0.7143723964691162, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8341, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.015155103007340753, |
|
"grad_norm": 0.7415910959243774, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"loss": 0.8152, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.015425729846757552, |
|
"grad_norm": 0.8374602794647217, |
|
"learning_rate": 2.325608815639687e-06, |
|
"loss": 0.8897, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01569635668617435, |
|
"grad_norm": 0.8707734942436218, |
|
"learning_rate": 2.238678841830867e-06, |
|
"loss": 0.9219, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.01596698352559115, |
|
"grad_norm": 0.815484881401062, |
|
"learning_rate": 2.1520672475998374e-06, |
|
"loss": 0.8614, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01623761036500795, |
|
"grad_norm": 0.7766671776771545, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"loss": 0.8877, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01650823720442475, |
|
"grad_norm": 0.8103740215301514, |
|
"learning_rate": 1.9802207729556023e-06, |
|
"loss": 0.9241, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.01677886404384155, |
|
"grad_norm": 0.9171229004859924, |
|
"learning_rate": 1.895195261000831e-06, |
|
"loss": 0.9715, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.017049490883258345, |
|
"grad_norm": 0.823610246181488, |
|
"learning_rate": 1.8109066104575023e-06, |
|
"loss": 0.9178, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.017320117722675145, |
|
"grad_norm": 0.8297237157821655, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 0.9384, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.017590744562091944, |
|
"grad_norm": 0.8308224678039551, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"loss": 0.91, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.017861371401508744, |
|
"grad_norm": 0.8265281319618225, |
|
"learning_rate": 1.56348351646022e-06, |
|
"loss": 0.8696, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.018131998240925543, |
|
"grad_norm": 0.8544564843177795, |
|
"learning_rate": 1.4831583923105e-06, |
|
"loss": 0.9598, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.018402625080342343, |
|
"grad_norm": 0.9564810991287231, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"loss": 0.9123, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.018673251919759142, |
|
"grad_norm": 0.9041321873664856, |
|
"learning_rate": 1.3263210930352737e-06, |
|
"loss": 0.9157, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01894387875917594, |
|
"grad_norm": 0.9885373711585999, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 0.9612, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01921450559859274, |
|
"grad_norm": 0.9514999389648438, |
|
"learning_rate": 1.1752018394169882e-06, |
|
"loss": 0.969, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.01948513243800954, |
|
"grad_norm": 0.9459763169288635, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"loss": 0.9962, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.01975575927742634, |
|
"grad_norm": 0.9458332657814026, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 0.9358, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02002638611684314, |
|
"grad_norm": 0.9821689128875732, |
|
"learning_rate": 9.608463116858544e-07, |
|
"loss": 1.0361, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.020297012956259936, |
|
"grad_norm": 0.951698362827301, |
|
"learning_rate": 8.930309757836517e-07, |
|
"loss": 1.0104, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.020567639795676735, |
|
"grad_norm": 1.0073827505111694, |
|
"learning_rate": 8.271734841028553e-07, |
|
"loss": 0.9367, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.020838266635093534, |
|
"grad_norm": 1.1683809757232666, |
|
"learning_rate": 7.633540738525066e-07, |
|
"loss": 1.0249, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.021108893474510334, |
|
"grad_norm": 1.0686205625534058, |
|
"learning_rate": 7.016504991533727e-07, |
|
"loss": 1.0685, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.021379520313927133, |
|
"grad_norm": 0.9999198317527771, |
|
"learning_rate": 6.421379363065142e-07, |
|
"loss": 0.9795, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.021650147153343933, |
|
"grad_norm": 1.199865460395813, |
|
"learning_rate": 5.848888922025553e-07, |
|
"loss": 1.1232, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021920773992760732, |
|
"grad_norm": 1.196691870689392, |
|
"learning_rate": 5.299731159831953e-07, |
|
"loss": 1.0305, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.022191400832177532, |
|
"grad_norm": 1.3286807537078857, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 1.0715, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.02246202767159433, |
|
"grad_norm": 1.3111743927001953, |
|
"learning_rate": 4.27406068612396e-07, |
|
"loss": 1.053, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.02273265451101113, |
|
"grad_norm": 1.197509527206421, |
|
"learning_rate": 3.798797596089351e-07, |
|
"loss": 1.1335, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02300328135042793, |
|
"grad_norm": 1.2589415311813354, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 1.0684, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.023273908189844726, |
|
"grad_norm": 1.5249485969543457, |
|
"learning_rate": 2.9263101785268253e-07, |
|
"loss": 1.1123, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.023544535029261526, |
|
"grad_norm": 1.3184010982513428, |
|
"learning_rate": 2.53014884252083e-07, |
|
"loss": 1.0367, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.023815161868678325, |
|
"grad_norm": 1.3574773073196411, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"loss": 1.1233, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.024085788708095125, |
|
"grad_norm": 1.364808440208435, |
|
"learning_rate": 1.8204036358303173e-07, |
|
"loss": 1.0077, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.024356415547511924, |
|
"grad_norm": 1.426081657409668, |
|
"learning_rate": 1.507684480352292e-07, |
|
"loss": 1.0295, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.024627042386928723, |
|
"grad_norm": 1.6618572473526, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 0.9982, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.024897669226345523, |
|
"grad_norm": 1.6236708164215088, |
|
"learning_rate": 9.684576015420277e-08, |
|
"loss": 1.058, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.025168296065762322, |
|
"grad_norm": 1.7425477504730225, |
|
"learning_rate": 7.426068431000883e-08, |
|
"loss": 1.0255, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.025438922905179122, |
|
"grad_norm": 1.9277325868606567, |
|
"learning_rate": 5.463099816548578e-08, |
|
"loss": 1.2349, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.02570954974459592, |
|
"grad_norm": 1.8921107053756714, |
|
"learning_rate": 3.798061746947995e-08, |
|
"loss": 1.1955, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02598017658401272, |
|
"grad_norm": 2.8468291759490967, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"loss": 1.4125, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02625080342342952, |
|
"grad_norm": 3.475755453109741, |
|
"learning_rate": 1.3695261579316776e-08, |
|
"loss": 1.3095, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.026521430262846316, |
|
"grad_norm": 4.424863815307617, |
|
"learning_rate": 6.089874350439507e-09, |
|
"loss": 1.3732, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.026792057102263116, |
|
"grad_norm": 5.109591960906982, |
|
"learning_rate": 1.5229324522605949e-09, |
|
"loss": 1.2857, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.027062683941679915, |
|
"grad_norm": 6.888213157653809, |
|
"learning_rate": 0.0, |
|
"loss": 1.4212, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027062683941679915, |
|
"eval_loss": 1.0187755823135376, |
|
"eval_runtime": 580.4087, |
|
"eval_samples_per_second": 10.723, |
|
"eval_steps_per_second": 2.681, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.539540430796882e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|