|
{ |
|
"best_metric": 0.34765490889549255, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.0020115563914689895, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0115563914689893e-05, |
|
"grad_norm": 0.29655277729034424, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4437, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0115563914689893e-05, |
|
"eval_loss": 1.1714186668395996, |
|
"eval_runtime": 7328.1947, |
|
"eval_samples_per_second": 2.856, |
|
"eval_steps_per_second": 1.428, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 4.023112782937979e-05, |
|
"grad_norm": 0.5731308460235596, |
|
"learning_rate": 8e-05, |
|
"loss": 0.3697, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 6.034669174406968e-05, |
|
"grad_norm": 0.6497276425361633, |
|
"learning_rate": 0.00012, |
|
"loss": 0.4203, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 8.046225565875957e-05, |
|
"grad_norm": 0.9947681427001953, |
|
"learning_rate": 0.00016, |
|
"loss": 0.5204, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00010057781957344947, |
|
"grad_norm": 0.7415378093719482, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4131, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00012069338348813936, |
|
"grad_norm": 0.8263693451881409, |
|
"learning_rate": 0.00019994532573409262, |
|
"loss": 0.5349, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00014080894740282925, |
|
"grad_norm": 0.8251563310623169, |
|
"learning_rate": 0.00019978136272187747, |
|
"loss": 0.1855, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00016092451131751915, |
|
"grad_norm": 0.7272641658782959, |
|
"learning_rate": 0.00019950829025450114, |
|
"loss": 0.1285, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00018104007523220904, |
|
"grad_norm": 0.6202177405357361, |
|
"learning_rate": 0.00019912640693269752, |
|
"loss": 0.1598, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00020115563914689893, |
|
"grad_norm": 1.1333729028701782, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 0.3572, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00022127120306158883, |
|
"grad_norm": 0.8228061199188232, |
|
"learning_rate": 0.00019803799658748094, |
|
"loss": 0.2095, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00024138676697627872, |
|
"grad_norm": 2.149914026260376, |
|
"learning_rate": 0.0001973326597248006, |
|
"loss": 0.4364, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00026150233089096864, |
|
"grad_norm": 0.8230195045471191, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 0.233, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0002816178948056585, |
|
"grad_norm": 1.5093271732330322, |
|
"learning_rate": 0.00019560357815343577, |
|
"loss": 0.4418, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00030173345872034843, |
|
"grad_norm": 0.5346623063087463, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 0.1451, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0003218490226350383, |
|
"grad_norm": 0.546474039554596, |
|
"learning_rate": 0.0001934564464599461, |
|
"loss": 0.1511, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0003419645865497282, |
|
"grad_norm": 0.7137768268585205, |
|
"learning_rate": 0.00019222897549773848, |
|
"loss": 0.1932, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0003620801504644181, |
|
"grad_norm": 1.1197693347930908, |
|
"learning_rate": 0.00019090065350491626, |
|
"loss": 0.34, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.000382195714379108, |
|
"grad_norm": 1.2347760200500488, |
|
"learning_rate": 0.00018947293298207635, |
|
"loss": 0.2232, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00040231127829379787, |
|
"grad_norm": 0.9798358082771301, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 0.2199, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0004224268422084878, |
|
"grad_norm": 1.1938731670379639, |
|
"learning_rate": 0.00018632564809575742, |
|
"loss": 0.3553, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00044254240612317766, |
|
"grad_norm": 1.013114094734192, |
|
"learning_rate": 0.00018460952524209355, |
|
"loss": 0.2323, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0004626579700378676, |
|
"grad_norm": 0.8954797387123108, |
|
"learning_rate": 0.00018280088311480201, |
|
"loss": 0.1924, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00048277353395255744, |
|
"grad_norm": 1.2280372381210327, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.3561, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0005028890978672474, |
|
"grad_norm": 0.9228379726409912, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 0.331, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0005028890978672474, |
|
"eval_loss": 0.38219088315963745, |
|
"eval_runtime": 7333.2365, |
|
"eval_samples_per_second": 2.854, |
|
"eval_steps_per_second": 1.427, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0005230046617819373, |
|
"grad_norm": 1.3667868375778198, |
|
"learning_rate": 0.00017684011108568592, |
|
"loss": 0.3585, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0005431202256966271, |
|
"grad_norm": 0.8309668898582458, |
|
"learning_rate": 0.0001746821476984154, |
|
"loss": 0.2539, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.000563235789611317, |
|
"grad_norm": 0.8600310683250427, |
|
"learning_rate": 0.00017244252047910892, |
|
"loss": 0.4283, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0005833513535260069, |
|
"grad_norm": 1.5213987827301025, |
|
"learning_rate": 0.00017012367842724887, |
|
"loss": 0.5058, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0006034669174406969, |
|
"grad_norm": 1.0365674495697021, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 0.439, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0006235824813553867, |
|
"grad_norm": 1.1248812675476074, |
|
"learning_rate": 0.00016525857615241687, |
|
"loss": 0.1794, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0006436980452700766, |
|
"grad_norm": 2.210256576538086, |
|
"learning_rate": 0.0001627176358473537, |
|
"loss": 0.7656, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0006638136091847665, |
|
"grad_norm": 1.926735758781433, |
|
"learning_rate": 0.00016010811472830252, |
|
"loss": 0.5577, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0006839291730994564, |
|
"grad_norm": 2.0872886180877686, |
|
"learning_rate": 0.00015743286626829437, |
|
"loss": 0.2975, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0007040447370141462, |
|
"grad_norm": 1.9924815893173218, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 0.5404, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0007241603009288362, |
|
"grad_norm": 0.763292670249939, |
|
"learning_rate": 0.00015189695737812152, |
|
"loss": 0.2239, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0007442758648435261, |
|
"grad_norm": 1.0943641662597656, |
|
"learning_rate": 0.00014904235038305083, |
|
"loss": 0.1645, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.000764391428758216, |
|
"grad_norm": 1.2765237092971802, |
|
"learning_rate": 0.0001461341162978688, |
|
"loss": 0.2827, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0007845069926729058, |
|
"grad_norm": 0.7692890763282776, |
|
"learning_rate": 0.00014317543523384928, |
|
"loss": 0.1748, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0008046225565875957, |
|
"grad_norm": 0.8414583206176758, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 0.2124, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0008247381205022857, |
|
"grad_norm": 1.9153568744659424, |
|
"learning_rate": 0.00013711972489182208, |
|
"loss": 0.8024, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0008448536844169756, |
|
"grad_norm": 1.7804728746414185, |
|
"learning_rate": 0.00013402931744416433, |
|
"loss": 0.3997, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0008649692483316654, |
|
"grad_norm": 1.816100835800171, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.6297, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0008850848122463553, |
|
"grad_norm": 0.5582104921340942, |
|
"learning_rate": 0.00012774029087618446, |
|
"loss": 0.1037, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0009052003761610452, |
|
"grad_norm": 1.3087366819381714, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 0.3838, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0009253159400757352, |
|
"grad_norm": 1.9638283252716064, |
|
"learning_rate": 0.0001213299630743747, |
|
"loss": 0.5667, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.000945431503990425, |
|
"grad_norm": 1.8329553604125977, |
|
"learning_rate": 0.000118088053433211, |
|
"loss": 0.5189, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0009655470679051149, |
|
"grad_norm": 1.815300464630127, |
|
"learning_rate": 0.0001148263647711842, |
|
"loss": 0.5543, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0009856626318198048, |
|
"grad_norm": 2.2588958740234375, |
|
"learning_rate": 0.00011154846369695863, |
|
"loss": 0.3947, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0010057781957344947, |
|
"grad_norm": 2.2676048278808594, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 0.6438, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0010057781957344947, |
|
"eval_loss": 0.3769894242286682, |
|
"eval_runtime": 7329.2477, |
|
"eval_samples_per_second": 2.856, |
|
"eval_steps_per_second": 1.428, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0010258937596491846, |
|
"grad_norm": 0.3635750710964203, |
|
"learning_rate": 0.00010495837546732224, |
|
"loss": 0.2263, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0010460093235638746, |
|
"grad_norm": 0.3851054906845093, |
|
"learning_rate": 0.00010165339447663587, |
|
"loss": 0.1225, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0010661248874785643, |
|
"grad_norm": 1.072131872177124, |
|
"learning_rate": 9.834660552336415e-05, |
|
"loss": 0.4456, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0010862404513932542, |
|
"grad_norm": 0.7393021583557129, |
|
"learning_rate": 9.504162453267777e-05, |
|
"loss": 0.3782, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0011063560153079441, |
|
"grad_norm": 0.5794671177864075, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 0.2947, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.001126471579222634, |
|
"grad_norm": 0.6361809372901917, |
|
"learning_rate": 8.845153630304139e-05, |
|
"loss": 0.1859, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.001146587143137324, |
|
"grad_norm": 0.8540146946907043, |
|
"learning_rate": 8.517363522881579e-05, |
|
"loss": 0.3623, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0011667027070520139, |
|
"grad_norm": 0.6807648539543152, |
|
"learning_rate": 8.191194656678904e-05, |
|
"loss": 0.2334, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0011868182709667038, |
|
"grad_norm": 0.8112823963165283, |
|
"learning_rate": 7.867003692562534e-05, |
|
"loss": 0.4587, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0012069338348813937, |
|
"grad_norm": 0.7327591776847839, |
|
"learning_rate": 7.54514512859201e-05, |
|
"loss": 0.3364, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0012270493987960834, |
|
"grad_norm": 0.8124757409095764, |
|
"learning_rate": 7.225970912381556e-05, |
|
"loss": 0.3287, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0012471649627107733, |
|
"grad_norm": 0.8952487707138062, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.3561, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0012672805266254633, |
|
"grad_norm": 0.7453446388244629, |
|
"learning_rate": 6.59706825558357e-05, |
|
"loss": 0.2724, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0012873960905401532, |
|
"grad_norm": 0.7678841352462769, |
|
"learning_rate": 6.28802751081779e-05, |
|
"loss": 0.2843, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.001307511654454843, |
|
"grad_norm": 0.6977100968360901, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 0.188, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.001327627218369533, |
|
"grad_norm": 0.7905102372169495, |
|
"learning_rate": 5.6824564766150726e-05, |
|
"loss": 0.4339, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.001347742782284223, |
|
"grad_norm": 0.5195531249046326, |
|
"learning_rate": 5.386588370213124e-05, |
|
"loss": 0.1489, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0013678583461989129, |
|
"grad_norm": 2.699000597000122, |
|
"learning_rate": 5.095764961694922e-05, |
|
"loss": 0.6969, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0013879739101136026, |
|
"grad_norm": 0.8617449402809143, |
|
"learning_rate": 4.810304262187852e-05, |
|
"loss": 0.3883, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0014080894740282925, |
|
"grad_norm": 0.8697649240493774, |
|
"learning_rate": 4.530518418775733e-05, |
|
"loss": 0.2275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0014282050379429824, |
|
"grad_norm": 0.7477262020111084, |
|
"learning_rate": 4.256713373170564e-05, |
|
"loss": 0.3324, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0014483206018576723, |
|
"grad_norm": 0.6884785294532776, |
|
"learning_rate": 3.9891885271697496e-05, |
|
"loss": 0.1723, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0014684361657723622, |
|
"grad_norm": 0.5269164443016052, |
|
"learning_rate": 3.7282364152646297e-05, |
|
"loss": 0.1525, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0014885517296870522, |
|
"grad_norm": 1.167458415031433, |
|
"learning_rate": 3.4741423847583134e-05, |
|
"loss": 0.479, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.001508667293601742, |
|
"grad_norm": 0.5435552597045898, |
|
"learning_rate": 3.227184283742591e-05, |
|
"loss": 0.2228, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.001508667293601742, |
|
"eval_loss": 0.34874510765075684, |
|
"eval_runtime": 7329.6366, |
|
"eval_samples_per_second": 2.856, |
|
"eval_steps_per_second": 1.428, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.001528782857516432, |
|
"grad_norm": 0.7186751365661621, |
|
"learning_rate": 2.9876321572751144e-05, |
|
"loss": 0.1929, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0015488984214311217, |
|
"grad_norm": 0.8120232224464417, |
|
"learning_rate": 2.7557479520891104e-05, |
|
"loss": 0.1906, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0015690139853458116, |
|
"grad_norm": 0.5622069835662842, |
|
"learning_rate": 2.5317852301584643e-05, |
|
"loss": 0.158, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0015891295492605016, |
|
"grad_norm": 0.6373627185821533, |
|
"learning_rate": 2.315988891431412e-05, |
|
"loss": 0.158, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0016092451131751915, |
|
"grad_norm": 0.3394858241081238, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 0.0549, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0016293606770898814, |
|
"grad_norm": 0.9142265915870667, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.1498, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0016494762410045713, |
|
"grad_norm": 0.5488215088844299, |
|
"learning_rate": 1.7199116885197995e-05, |
|
"loss": 0.086, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0016695918049192612, |
|
"grad_norm": 0.6231745481491089, |
|
"learning_rate": 1.5390474757906446e-05, |
|
"loss": 0.1239, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0016897073688339512, |
|
"grad_norm": 0.7093625664710999, |
|
"learning_rate": 1.3674351904242611e-05, |
|
"loss": 0.1512, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0017098229327486409, |
|
"grad_norm": 1.516791820526123, |
|
"learning_rate": 1.2052624879351104e-05, |
|
"loss": 0.3605, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0017299384966633308, |
|
"grad_norm": 0.8480135202407837, |
|
"learning_rate": 1.0527067017923654e-05, |
|
"loss": 0.2031, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0017500540605780207, |
|
"grad_norm": 0.6130940318107605, |
|
"learning_rate": 9.09934649508375e-06, |
|
"loss": 0.1745, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0017701696244927106, |
|
"grad_norm": 1.8014602661132812, |
|
"learning_rate": 7.771024502261526e-06, |
|
"loss": 0.4315, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0017902851884074005, |
|
"grad_norm": 0.6862621307373047, |
|
"learning_rate": 6.543553540053926e-06, |
|
"loss": 0.1008, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0018104007523220905, |
|
"grad_norm": 0.6760240197181702, |
|
"learning_rate": 5.418275829936537e-06, |
|
"loss": 0.1387, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0018305163162367804, |
|
"grad_norm": 2.04972767829895, |
|
"learning_rate": 4.3964218465642355e-06, |
|
"loss": 0.4774, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0018506318801514703, |
|
"grad_norm": 3.159485101699829, |
|
"learning_rate": 3.4791089722651436e-06, |
|
"loss": 0.4218, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.00187074744406616, |
|
"grad_norm": 2.143138885498047, |
|
"learning_rate": 2.667340275199426e-06, |
|
"loss": 0.556, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.00189086300798085, |
|
"grad_norm": 1.6069244146347046, |
|
"learning_rate": 1.9620034125190644e-06, |
|
"loss": 0.4657, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0019109785718955398, |
|
"grad_norm": 1.5652406215667725, |
|
"learning_rate": 1.3638696597277679e-06, |
|
"loss": 0.4702, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0019310941358102298, |
|
"grad_norm": 2.650838613510132, |
|
"learning_rate": 8.735930673024806e-07, |
|
"loss": 0.6213, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0019512096997249197, |
|
"grad_norm": 0.9253861904144287, |
|
"learning_rate": 4.917097454988584e-07, |
|
"loss": 0.2009, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0019713252636396096, |
|
"grad_norm": 2.9420182704925537, |
|
"learning_rate": 2.1863727812254653e-07, |
|
"loss": 0.6738, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0019914408275542995, |
|
"grad_norm": 3.0107245445251465, |
|
"learning_rate": 5.467426590739511e-08, |
|
"loss": 0.8191, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0020115563914689895, |
|
"grad_norm": 1.9796432256698608, |
|
"learning_rate": 0.0, |
|
"loss": 0.4038, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0020115563914689895, |
|
"eval_loss": 0.34765490889549255, |
|
"eval_runtime": 7329.2018, |
|
"eval_samples_per_second": 2.856, |
|
"eval_steps_per_second": 1.428, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4874840035386982e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|