|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 200, |
|
"global_step": 1004, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00199203187250996, |
|
"grad_norm": 5.302925716458761, |
|
"learning_rate": 9.99997552222299e-06, |
|
"loss": 0.4962, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00398406374501992, |
|
"grad_norm": 4.0068584615292755, |
|
"learning_rate": 9.999902089131626e-06, |
|
"loss": 0.5131, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00597609561752988, |
|
"grad_norm": 3.2685057084225533, |
|
"learning_rate": 9.999779701444897e-06, |
|
"loss": 0.3642, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00796812749003984, |
|
"grad_norm": 2.969388810564799, |
|
"learning_rate": 9.999608360361114e-06, |
|
"loss": 0.4366, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0099601593625498, |
|
"grad_norm": 2.3257277817224877, |
|
"learning_rate": 9.9993880675579e-06, |
|
"loss": 0.2805, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01195219123505976, |
|
"grad_norm": 2.316100445863212, |
|
"learning_rate": 9.999118825192162e-06, |
|
"loss": 0.3663, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.013944223107569721, |
|
"grad_norm": 2.0546227682793825, |
|
"learning_rate": 9.998800635900085e-06, |
|
"loss": 0.2761, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01593625498007968, |
|
"grad_norm": 2.6542621051493294, |
|
"learning_rate": 9.998433502797097e-06, |
|
"loss": 0.3528, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.017928286852589643, |
|
"grad_norm": 2.184358255514989, |
|
"learning_rate": 9.998017429477834e-06, |
|
"loss": 0.2891, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0199203187250996, |
|
"grad_norm": 2.210497121507057, |
|
"learning_rate": 9.99755242001612e-06, |
|
"loss": 0.3052, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021912350597609563, |
|
"grad_norm": 2.391291694670009, |
|
"learning_rate": 9.99703847896491e-06, |
|
"loss": 0.3088, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02390438247011952, |
|
"grad_norm": 1.8867370891248774, |
|
"learning_rate": 9.996475611356265e-06, |
|
"loss": 0.2799, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.025896414342629483, |
|
"grad_norm": 1.472434411377998, |
|
"learning_rate": 9.995863822701278e-06, |
|
"loss": 0.1877, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.027888446215139442, |
|
"grad_norm": 1.550344203694589, |
|
"learning_rate": 9.99520311899004e-06, |
|
"loss": 0.1703, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.029880478087649404, |
|
"grad_norm": 2.6379971141320264, |
|
"learning_rate": 9.994493506691577e-06, |
|
"loss": 0.2986, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03187250996015936, |
|
"grad_norm": 2.1294276467465734, |
|
"learning_rate": 9.993734992753777e-06, |
|
"loss": 0.2666, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03386454183266932, |
|
"grad_norm": 1.9769257581715476, |
|
"learning_rate": 9.992927584603339e-06, |
|
"loss": 0.2376, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.035856573705179286, |
|
"grad_norm": 2.262018155817254, |
|
"learning_rate": 9.992071290145684e-06, |
|
"loss": 0.2436, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.037848605577689244, |
|
"grad_norm": 2.1357744572584605, |
|
"learning_rate": 9.991166117764885e-06, |
|
"loss": 0.2337, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0398406374501992, |
|
"grad_norm": 2.065609976313799, |
|
"learning_rate": 9.990212076323587e-06, |
|
"loss": 0.2614, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04183266932270916, |
|
"grad_norm": 2.0110212882357894, |
|
"learning_rate": 9.989209175162912e-06, |
|
"loss": 0.2282, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.043824701195219126, |
|
"grad_norm": 2.0333703620396433, |
|
"learning_rate": 9.988157424102381e-06, |
|
"loss": 0.2166, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.045816733067729085, |
|
"grad_norm": 1.7747686699151117, |
|
"learning_rate": 9.9870568334398e-06, |
|
"loss": 0.1755, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04780876494023904, |
|
"grad_norm": 1.610758994875989, |
|
"learning_rate": 9.98590741395118e-06, |
|
"loss": 0.1979, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.049800796812749, |
|
"grad_norm": 1.8977306151271889, |
|
"learning_rate": 9.98470917689061e-06, |
|
"loss": 0.2244, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05179282868525897, |
|
"grad_norm": 1.7649852344717443, |
|
"learning_rate": 9.983462133990163e-06, |
|
"loss": 0.1947, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.053784860557768925, |
|
"grad_norm": 2.1142885695917877, |
|
"learning_rate": 9.982166297459775e-06, |
|
"loss": 0.2156, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.055776892430278883, |
|
"grad_norm": 2.278915052614827, |
|
"learning_rate": 9.980821679987125e-06, |
|
"loss": 0.1901, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05776892430278884, |
|
"grad_norm": 2.038139109307458, |
|
"learning_rate": 9.979428294737509e-06, |
|
"loss": 0.2118, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05976095617529881, |
|
"grad_norm": 2.133948012877493, |
|
"learning_rate": 9.97798615535372e-06, |
|
"loss": 0.2146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.061752988047808766, |
|
"grad_norm": 2.176892858037584, |
|
"learning_rate": 9.976495275955904e-06, |
|
"loss": 0.2097, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06374501992031872, |
|
"grad_norm": 1.9943827766271132, |
|
"learning_rate": 9.974955671141425e-06, |
|
"loss": 0.1968, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06573705179282868, |
|
"grad_norm": 1.9835967650496324, |
|
"learning_rate": 9.973367355984724e-06, |
|
"loss": 0.2116, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06772908366533864, |
|
"grad_norm": 2.0204571026763434, |
|
"learning_rate": 9.971730346037172e-06, |
|
"loss": 0.2026, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0697211155378486, |
|
"grad_norm": 1.9322301469201182, |
|
"learning_rate": 9.970044657326913e-06, |
|
"loss": 0.2053, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07171314741035857, |
|
"grad_norm": 1.947308039962251, |
|
"learning_rate": 9.968310306358715e-06, |
|
"loss": 0.1917, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07370517928286853, |
|
"grad_norm": 2.046915884396975, |
|
"learning_rate": 9.966527310113798e-06, |
|
"loss": 0.2365, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07569721115537849, |
|
"grad_norm": 2.09697417316823, |
|
"learning_rate": 9.964695686049676e-06, |
|
"loss": 0.2343, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07768924302788845, |
|
"grad_norm": 2.6849483307841076, |
|
"learning_rate": 9.962815452099985e-06, |
|
"loss": 0.2169, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0796812749003984, |
|
"grad_norm": 1.8234840582625904, |
|
"learning_rate": 9.960886626674302e-06, |
|
"loss": 0.2119, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08167330677290836, |
|
"grad_norm": 1.7256360371957784, |
|
"learning_rate": 9.95890922865797e-06, |
|
"loss": 0.1769, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08366533864541832, |
|
"grad_norm": 1.682991846433481, |
|
"learning_rate": 9.956883277411914e-06, |
|
"loss": 0.1724, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08565737051792828, |
|
"grad_norm": 2.167281430513665, |
|
"learning_rate": 9.954808792772447e-06, |
|
"loss": 0.2185, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08764940239043825, |
|
"grad_norm": 1.7035143668249393, |
|
"learning_rate": 9.952685795051078e-06, |
|
"loss": 0.1701, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08964143426294821, |
|
"grad_norm": 1.9522204858955863, |
|
"learning_rate": 9.95051430503431e-06, |
|
"loss": 0.1977, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09163346613545817, |
|
"grad_norm": 1.9293284789264147, |
|
"learning_rate": 9.948294343983446e-06, |
|
"loss": 0.1995, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09362549800796813, |
|
"grad_norm": 2.221989822189503, |
|
"learning_rate": 9.94602593363437e-06, |
|
"loss": 0.2135, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09561752988047809, |
|
"grad_norm": 2.5630931672539123, |
|
"learning_rate": 9.943709096197334e-06, |
|
"loss": 0.1935, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09760956175298804, |
|
"grad_norm": 2.2472057427928256, |
|
"learning_rate": 9.941343854356757e-06, |
|
"loss": 0.1932, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.099601593625498, |
|
"grad_norm": 2.2907511035454076, |
|
"learning_rate": 9.938930231270982e-06, |
|
"loss": 0.2136, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10159362549800798, |
|
"grad_norm": 2.1818748427349854, |
|
"learning_rate": 9.93646825057206e-06, |
|
"loss": 0.2028, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10358565737051793, |
|
"grad_norm": 1.9532878719288238, |
|
"learning_rate": 9.933957936365515e-06, |
|
"loss": 0.1867, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.10557768924302789, |
|
"grad_norm": 2.2638694550211667, |
|
"learning_rate": 9.931399313230112e-06, |
|
"loss": 0.2124, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.10756972111553785, |
|
"grad_norm": 2.0453202854269597, |
|
"learning_rate": 9.928792406217615e-06, |
|
"loss": 0.2013, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.10956175298804781, |
|
"grad_norm": 2.092666084016267, |
|
"learning_rate": 9.926137240852539e-06, |
|
"loss": 0.1815, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11155378486055777, |
|
"grad_norm": 1.7643433151250012, |
|
"learning_rate": 9.9234338431319e-06, |
|
"loss": 0.1758, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11354581673306773, |
|
"grad_norm": 1.7711425457667331, |
|
"learning_rate": 9.920682239524968e-06, |
|
"loss": 0.1732, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11553784860557768, |
|
"grad_norm": 1.97738896833117, |
|
"learning_rate": 9.917882456972999e-06, |
|
"loss": 0.1995, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11752988047808766, |
|
"grad_norm": 1.4375108861286383, |
|
"learning_rate": 9.915034522888972e-06, |
|
"loss": 0.1485, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11952191235059761, |
|
"grad_norm": 2.0375847131853293, |
|
"learning_rate": 9.912138465157325e-06, |
|
"loss": 0.2021, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12151394422310757, |
|
"grad_norm": 2.168757999506768, |
|
"learning_rate": 9.909194312133681e-06, |
|
"loss": 0.172, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12350597609561753, |
|
"grad_norm": 1.859742755561109, |
|
"learning_rate": 9.90620209264457e-06, |
|
"loss": 0.1538, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1254980079681275, |
|
"grad_norm": 1.800175258878161, |
|
"learning_rate": 9.90316183598714e-06, |
|
"loss": 0.1643, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12749003984063745, |
|
"grad_norm": 1.780289250472305, |
|
"learning_rate": 9.900073571928887e-06, |
|
"loss": 0.1448, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1294820717131474, |
|
"grad_norm": 1.8989241084218624, |
|
"learning_rate": 9.896937330707341e-06, |
|
"loss": 0.1661, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13147410358565736, |
|
"grad_norm": 1.7382723265927287, |
|
"learning_rate": 9.893753143029792e-06, |
|
"loss": 0.1794, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13346613545816732, |
|
"grad_norm": 1.6940222793391266, |
|
"learning_rate": 9.89052104007297e-06, |
|
"loss": 0.154, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13545816733067728, |
|
"grad_norm": 2.091556995024388, |
|
"learning_rate": 9.887241053482756e-06, |
|
"loss": 0.1795, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13745019920318724, |
|
"grad_norm": 2.378952761035815, |
|
"learning_rate": 9.883913215373862e-06, |
|
"loss": 0.172, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1394422310756972, |
|
"grad_norm": 2.0726149359464143, |
|
"learning_rate": 9.880537558329518e-06, |
|
"loss": 0.16, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14143426294820718, |
|
"grad_norm": 1.8509199481149878, |
|
"learning_rate": 9.877114115401159e-06, |
|
"loss": 0.151, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14342629482071714, |
|
"grad_norm": 1.9383364231106874, |
|
"learning_rate": 9.87364292010809e-06, |
|
"loss": 0.161, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1454183266932271, |
|
"grad_norm": 1.9835120874739343, |
|
"learning_rate": 9.870124006437172e-06, |
|
"loss": 0.1885, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14741035856573706, |
|
"grad_norm": 1.7057694570966788, |
|
"learning_rate": 9.866557408842479e-06, |
|
"loss": 0.1619, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14940239043824702, |
|
"grad_norm": 1.7963768846634989, |
|
"learning_rate": 9.86294316224496e-06, |
|
"loss": 0.1438, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15139442231075698, |
|
"grad_norm": 1.8886735096348262, |
|
"learning_rate": 9.859281302032107e-06, |
|
"loss": 0.1753, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15338645418326693, |
|
"grad_norm": 2.099235241932376, |
|
"learning_rate": 9.855571864057598e-06, |
|
"loss": 0.1837, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1553784860557769, |
|
"grad_norm": 2.1695503922620683, |
|
"learning_rate": 9.85181488464095e-06, |
|
"loss": 0.1776, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15737051792828685, |
|
"grad_norm": 2.2241373204323884, |
|
"learning_rate": 9.848010400567167e-06, |
|
"loss": 0.1762, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1593625498007968, |
|
"grad_norm": 2.0073012916773556, |
|
"learning_rate": 9.844158449086372e-06, |
|
"loss": 0.1643, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16135458167330677, |
|
"grad_norm": 1.900677106194213, |
|
"learning_rate": 9.84025906791345e-06, |
|
"loss": 0.1647, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16334661354581673, |
|
"grad_norm": 2.2420248448336646, |
|
"learning_rate": 9.836312295227674e-06, |
|
"loss": 0.1904, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16533864541832669, |
|
"grad_norm": 2.0352049260820966, |
|
"learning_rate": 9.832318169672334e-06, |
|
"loss": 0.1652, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.16733067729083664, |
|
"grad_norm": 2.1040895543260834, |
|
"learning_rate": 9.828276730354353e-06, |
|
"loss": 0.1681, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1693227091633466, |
|
"grad_norm": 2.0323467551141543, |
|
"learning_rate": 9.824188016843915e-06, |
|
"loss": 0.1646, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17131474103585656, |
|
"grad_norm": 1.7648807229029235, |
|
"learning_rate": 9.820052069174062e-06, |
|
"loss": 0.1581, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17330677290836655, |
|
"grad_norm": 2.256460995644407, |
|
"learning_rate": 9.81586892784032e-06, |
|
"loss": 0.1937, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1752988047808765, |
|
"grad_norm": 2.1166732483303745, |
|
"learning_rate": 9.811638633800287e-06, |
|
"loss": 0.189, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17729083665338646, |
|
"grad_norm": 2.1883580991082527, |
|
"learning_rate": 9.807361228473241e-06, |
|
"loss": 0.2019, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.17928286852589642, |
|
"grad_norm": 1.8188619314670527, |
|
"learning_rate": 9.803036753739733e-06, |
|
"loss": 0.1633, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18127490039840638, |
|
"grad_norm": 2.130272928076839, |
|
"learning_rate": 9.798665251941172e-06, |
|
"loss": 0.1858, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18326693227091634, |
|
"grad_norm": 1.9280374177349535, |
|
"learning_rate": 9.794246765879421e-06, |
|
"loss": 0.1367, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1852589641434263, |
|
"grad_norm": 2.0068283142732564, |
|
"learning_rate": 9.789781338816362e-06, |
|
"loss": 0.1552, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.18725099601593626, |
|
"grad_norm": 2.1363237967962387, |
|
"learning_rate": 9.785269014473487e-06, |
|
"loss": 0.1635, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1892430278884462, |
|
"grad_norm": 2.2218212298896978, |
|
"learning_rate": 9.780709837031464e-06, |
|
"loss": 0.1625, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19123505976095617, |
|
"grad_norm": 1.9767577152960487, |
|
"learning_rate": 9.776103851129706e-06, |
|
"loss": 0.1632, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19322709163346613, |
|
"grad_norm": 2.3121968917141094, |
|
"learning_rate": 9.77145110186593e-06, |
|
"loss": 0.1621, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1952191235059761, |
|
"grad_norm": 1.7755655818003055, |
|
"learning_rate": 9.766751634795719e-06, |
|
"loss": 0.1669, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19721115537848605, |
|
"grad_norm": 2.240571790906682, |
|
"learning_rate": 9.762005495932076e-06, |
|
"loss": 0.1637, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.199203187250996, |
|
"grad_norm": 1.6446940187832602, |
|
"learning_rate": 9.757212731744973e-06, |
|
"loss": 0.1302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20119521912350596, |
|
"grad_norm": 2.0617960985029504, |
|
"learning_rate": 9.752373389160896e-06, |
|
"loss": 0.1707, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.20318725099601595, |
|
"grad_norm": 2.1835252538751413, |
|
"learning_rate": 9.747487515562384e-06, |
|
"loss": 0.1649, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2051792828685259, |
|
"grad_norm": 2.0129569799572407, |
|
"learning_rate": 9.742555158787567e-06, |
|
"loss": 0.1637, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.20717131474103587, |
|
"grad_norm": 2.524863713870793, |
|
"learning_rate": 9.737576367129694e-06, |
|
"loss": 0.1816, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.20916334661354583, |
|
"grad_norm": 1.9649026368356415, |
|
"learning_rate": 9.73255118933667e-06, |
|
"loss": 0.1216, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21115537848605578, |
|
"grad_norm": 1.6210946660994265, |
|
"learning_rate": 9.727479674610565e-06, |
|
"loss": 0.1424, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21314741035856574, |
|
"grad_norm": 2.371276035809818, |
|
"learning_rate": 9.722361872607142e-06, |
|
"loss": 0.1943, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2151394422310757, |
|
"grad_norm": 1.760306649155103, |
|
"learning_rate": 9.717197833435367e-06, |
|
"loss": 0.1588, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.21713147410358566, |
|
"grad_norm": 1.850921085250863, |
|
"learning_rate": 9.71198760765692e-06, |
|
"loss": 0.135, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.21912350597609562, |
|
"grad_norm": 1.91926012170198, |
|
"learning_rate": 9.706731246285701e-06, |
|
"loss": 0.146, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22111553784860558, |
|
"grad_norm": 1.9074743120890272, |
|
"learning_rate": 9.701428800787325e-06, |
|
"loss": 0.1546, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22310756972111553, |
|
"grad_norm": 1.8574536892339737, |
|
"learning_rate": 9.696080323078621e-06, |
|
"loss": 0.15, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2250996015936255, |
|
"grad_norm": 1.9674896794532148, |
|
"learning_rate": 9.690685865527132e-06, |
|
"loss": 0.1605, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.22709163346613545, |
|
"grad_norm": 1.683408319737727, |
|
"learning_rate": 9.685245480950584e-06, |
|
"loss": 0.1483, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2290836653386454, |
|
"grad_norm": 1.8357325701673104, |
|
"learning_rate": 9.679759222616389e-06, |
|
"loss": 0.1342, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23107569721115537, |
|
"grad_norm": 1.7645234238909926, |
|
"learning_rate": 9.67422714424111e-06, |
|
"loss": 0.1496, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.23306772908366533, |
|
"grad_norm": 2.3580150329396097, |
|
"learning_rate": 9.668649299989939e-06, |
|
"loss": 0.1602, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2350597609561753, |
|
"grad_norm": 1.9878477683225815, |
|
"learning_rate": 9.663025744476167e-06, |
|
"loss": 0.1592, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.23705179282868527, |
|
"grad_norm": 2.1066728943604685, |
|
"learning_rate": 9.657356532760647e-06, |
|
"loss": 0.1586, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.23904382470119523, |
|
"grad_norm": 2.4857773499345814, |
|
"learning_rate": 9.651641720351262e-06, |
|
"loss": 0.1706, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2410358565737052, |
|
"grad_norm": 1.9669140495950204, |
|
"learning_rate": 9.645881363202371e-06, |
|
"loss": 0.135, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24302788844621515, |
|
"grad_norm": 2.129283733459999, |
|
"learning_rate": 9.640075517714272e-06, |
|
"loss": 0.1785, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2450199203187251, |
|
"grad_norm": 1.9937349989457298, |
|
"learning_rate": 9.634224240732641e-06, |
|
"loss": 0.1453, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.24701195219123506, |
|
"grad_norm": 2.1385569770971298, |
|
"learning_rate": 9.628327589547977e-06, |
|
"loss": 0.1475, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.24900398406374502, |
|
"grad_norm": 2.28108046376594, |
|
"learning_rate": 9.622385621895046e-06, |
|
"loss": 0.1813, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.250996015936255, |
|
"grad_norm": 2.148801989847443, |
|
"learning_rate": 9.616398395952313e-06, |
|
"loss": 0.1683, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.25298804780876494, |
|
"grad_norm": 1.8652803065183108, |
|
"learning_rate": 9.610365970341369e-06, |
|
"loss": 0.1494, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2549800796812749, |
|
"grad_norm": 1.8989160712067141, |
|
"learning_rate": 9.604288404126362e-06, |
|
"loss": 0.146, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.25697211155378485, |
|
"grad_norm": 1.7384157916220273, |
|
"learning_rate": 9.598165756813418e-06, |
|
"loss": 0.1359, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2589641434262948, |
|
"grad_norm": 2.2085278004769324, |
|
"learning_rate": 9.591998088350055e-06, |
|
"loss": 0.1784, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26095617529880477, |
|
"grad_norm": 1.6343923035740249, |
|
"learning_rate": 9.585785459124595e-06, |
|
"loss": 0.1376, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26294820717131473, |
|
"grad_norm": 2.1234396006125396, |
|
"learning_rate": 9.579527929965581e-06, |
|
"loss": 0.1702, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2649402390438247, |
|
"grad_norm": 1.7801773475854494, |
|
"learning_rate": 9.573225562141174e-06, |
|
"loss": 0.1474, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.26693227091633465, |
|
"grad_norm": 1.4560862756744588, |
|
"learning_rate": 9.566878417358559e-06, |
|
"loss": 0.1246, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2689243027888446, |
|
"grad_norm": 2.025058996733023, |
|
"learning_rate": 9.56048655776333e-06, |
|
"loss": 0.1639, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27091633466135456, |
|
"grad_norm": 1.8323049535503093, |
|
"learning_rate": 9.554050045938893e-06, |
|
"loss": 0.1452, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2729083665338645, |
|
"grad_norm": 1.8874907449560725, |
|
"learning_rate": 9.54756894490585e-06, |
|
"loss": 0.1502, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2749003984063745, |
|
"grad_norm": 2.5711934437137316, |
|
"learning_rate": 9.541043318121379e-06, |
|
"loss": 0.1767, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.27689243027888444, |
|
"grad_norm": 2.6684694094439223, |
|
"learning_rate": 9.534473229478613e-06, |
|
"loss": 0.1809, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2788844621513944, |
|
"grad_norm": 1.8024037893437692, |
|
"learning_rate": 9.52785874330602e-06, |
|
"loss": 0.139, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28087649402390436, |
|
"grad_norm": 2.0746711127055155, |
|
"learning_rate": 9.521199924366766e-06, |
|
"loss": 0.1645, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.28286852589641437, |
|
"grad_norm": 1.8375760908575898, |
|
"learning_rate": 9.514496837858085e-06, |
|
"loss": 0.1409, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.2848605577689243, |
|
"grad_norm": 2.155790335928457, |
|
"learning_rate": 9.507749549410641e-06, |
|
"loss": 0.1527, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.2868525896414343, |
|
"grad_norm": 1.9735656021269132, |
|
"learning_rate": 9.500958125087882e-06, |
|
"loss": 0.1454, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.28884462151394424, |
|
"grad_norm": 1.9447279614136819, |
|
"learning_rate": 9.494122631385397e-06, |
|
"loss": 0.147, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2908366533864542, |
|
"grad_norm": 1.8420179664244611, |
|
"learning_rate": 9.487243135230259e-06, |
|
"loss": 0.1266, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.29282868525896416, |
|
"grad_norm": 2.0895817258939613, |
|
"learning_rate": 9.480319703980382e-06, |
|
"loss": 0.1752, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2948207171314741, |
|
"grad_norm": 2.368984572890111, |
|
"learning_rate": 9.473352405423845e-06, |
|
"loss": 0.1716, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2968127490039841, |
|
"grad_norm": 2.3614919475438336, |
|
"learning_rate": 9.466341307778239e-06, |
|
"loss": 0.1853, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.29880478087649404, |
|
"grad_norm": 1.7497898226508322, |
|
"learning_rate": 9.459286479690002e-06, |
|
"loss": 0.1487, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.300796812749004, |
|
"grad_norm": 2.319602310333304, |
|
"learning_rate": 9.452187990233737e-06, |
|
"loss": 0.1564, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.30278884462151395, |
|
"grad_norm": 1.9314144164174814, |
|
"learning_rate": 9.445045908911536e-06, |
|
"loss": 0.1574, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3047808764940239, |
|
"grad_norm": 2.0498544875744726, |
|
"learning_rate": 9.437860305652314e-06, |
|
"loss": 0.1609, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.30677290836653387, |
|
"grad_norm": 2.1271214704509895, |
|
"learning_rate": 9.430631250811107e-06, |
|
"loss": 0.1453, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.30876494023904383, |
|
"grad_norm": 1.5971982417533808, |
|
"learning_rate": 9.42335881516839e-06, |
|
"loss": 0.1293, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3107569721115538, |
|
"grad_norm": 1.7060046369161204, |
|
"learning_rate": 9.416043069929389e-06, |
|
"loss": 0.1308, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.31274900398406374, |
|
"grad_norm": 2.2558369441074744, |
|
"learning_rate": 9.408684086723375e-06, |
|
"loss": 0.1894, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3147410358565737, |
|
"grad_norm": 2.040622936675236, |
|
"learning_rate": 9.401281937602966e-06, |
|
"loss": 0.1536, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.31673306772908366, |
|
"grad_norm": 2.021178408176851, |
|
"learning_rate": 9.393836695043429e-06, |
|
"loss": 0.1316, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3187250996015936, |
|
"grad_norm": 1.9053653174415077, |
|
"learning_rate": 9.386348431941953e-06, |
|
"loss": 0.1445, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3207171314741036, |
|
"grad_norm": 2.3821069319286576, |
|
"learning_rate": 9.378817221616955e-06, |
|
"loss": 0.1498, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.32270916334661354, |
|
"grad_norm": 1.987196571819016, |
|
"learning_rate": 9.371243137807353e-06, |
|
"loss": 0.1351, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3247011952191235, |
|
"grad_norm": 2.012726967199412, |
|
"learning_rate": 9.363626254671835e-06, |
|
"loss": 0.1263, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.32669322709163345, |
|
"grad_norm": 1.8276010793351352, |
|
"learning_rate": 9.355966646788152e-06, |
|
"loss": 0.1357, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3286852589641434, |
|
"grad_norm": 1.8020618013754388, |
|
"learning_rate": 9.34826438915237e-06, |
|
"loss": 0.1235, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.33067729083665337, |
|
"grad_norm": 1.854178246940745, |
|
"learning_rate": 9.340519557178149e-06, |
|
"loss": 0.1481, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.33266932270916333, |
|
"grad_norm": 1.9660957079874257, |
|
"learning_rate": 9.332732226695997e-06, |
|
"loss": 0.1354, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3346613545816733, |
|
"grad_norm": 1.903678628134176, |
|
"learning_rate": 9.324902473952529e-06, |
|
"loss": 0.154, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.33665338645418325, |
|
"grad_norm": 1.8793502875666162, |
|
"learning_rate": 9.317030375609721e-06, |
|
"loss": 0.1415, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3386454183266932, |
|
"grad_norm": 2.2655417937058355, |
|
"learning_rate": 9.309116008744164e-06, |
|
"loss": 0.1805, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34063745019920316, |
|
"grad_norm": 2.224518864361946, |
|
"learning_rate": 9.301159450846296e-06, |
|
"loss": 0.1555, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3426294820717131, |
|
"grad_norm": 2.594561985506128, |
|
"learning_rate": 9.293160779819658e-06, |
|
"loss": 0.1695, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.34462151394422313, |
|
"grad_norm": 2.0293944680745257, |
|
"learning_rate": 9.285120073980127e-06, |
|
"loss": 0.147, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3466135458167331, |
|
"grad_norm": 1.8842194158472625, |
|
"learning_rate": 9.277037412055143e-06, |
|
"loss": 0.1397, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.34860557768924305, |
|
"grad_norm": 2.1876611599257294, |
|
"learning_rate": 9.268912873182945e-06, |
|
"loss": 0.1646, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.350597609561753, |
|
"grad_norm": 2.0439657178756083, |
|
"learning_rate": 9.260746536911792e-06, |
|
"loss": 0.1583, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.35258964143426297, |
|
"grad_norm": 2.3673755824284095, |
|
"learning_rate": 9.25253848319919e-06, |
|
"loss": 0.1779, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3545816733067729, |
|
"grad_norm": 1.849959819313811, |
|
"learning_rate": 9.244288792411099e-06, |
|
"loss": 0.145, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3565737051792829, |
|
"grad_norm": 1.8494333009850685, |
|
"learning_rate": 9.235997545321156e-06, |
|
"loss": 0.1402, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.35856573705179284, |
|
"grad_norm": 1.8409139333031814, |
|
"learning_rate": 9.227664823109884e-06, |
|
"loss": 0.1472, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3605577689243028, |
|
"grad_norm": 2.198400559444892, |
|
"learning_rate": 9.219290707363885e-06, |
|
"loss": 0.1683, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.36254980079681276, |
|
"grad_norm": 1.847947075455182, |
|
"learning_rate": 9.210875280075056e-06, |
|
"loss": 0.1472, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3645418326693227, |
|
"grad_norm": 1.9180035039553798, |
|
"learning_rate": 9.202418623639779e-06, |
|
"loss": 0.1473, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3665338645418327, |
|
"grad_norm": 2.1866401096601358, |
|
"learning_rate": 9.193920820858113e-06, |
|
"loss": 0.1744, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.36852589641434264, |
|
"grad_norm": 1.8712622384696984, |
|
"learning_rate": 9.185381954932984e-06, |
|
"loss": 0.1566, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3705179282868526, |
|
"grad_norm": 1.7798834216487947, |
|
"learning_rate": 9.17680210946938e-06, |
|
"loss": 0.1236, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.37250996015936255, |
|
"grad_norm": 1.82049432105803, |
|
"learning_rate": 9.168181368473514e-06, |
|
"loss": 0.146, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3745019920318725, |
|
"grad_norm": 2.0877726702358923, |
|
"learning_rate": 9.159519816352021e-06, |
|
"loss": 0.1389, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.37649402390438247, |
|
"grad_norm": 2.051030898440553, |
|
"learning_rate": 9.150817537911111e-06, |
|
"loss": 0.1448, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3784860557768924, |
|
"grad_norm": 1.7874327218426942, |
|
"learning_rate": 9.142074618355763e-06, |
|
"loss": 0.1214, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3804780876494024, |
|
"grad_norm": 2.347061822643037, |
|
"learning_rate": 9.133291143288865e-06, |
|
"loss": 0.1751, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.38247011952191234, |
|
"grad_norm": 1.8742517441369677, |
|
"learning_rate": 9.124467198710401e-06, |
|
"loss": 0.1326, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3844621513944223, |
|
"grad_norm": 1.8857929156382558, |
|
"learning_rate": 9.115602871016585e-06, |
|
"loss": 0.1261, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.38645418326693226, |
|
"grad_norm": 1.9824443306668558, |
|
"learning_rate": 9.106698246999036e-06, |
|
"loss": 0.1461, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3884462151394422, |
|
"grad_norm": 2.0467435985376348, |
|
"learning_rate": 9.097753413843909e-06, |
|
"loss": 0.1384, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3904382470119522, |
|
"grad_norm": 2.141133625722717, |
|
"learning_rate": 9.08876845913106e-06, |
|
"loss": 0.1497, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.39243027888446214, |
|
"grad_norm": 2.234122705708421, |
|
"learning_rate": 9.079743470833177e-06, |
|
"loss": 0.145, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.3944223107569721, |
|
"grad_norm": 2.682514937204839, |
|
"learning_rate": 9.070678537314919e-06, |
|
"loss": 0.1762, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.39641434262948205, |
|
"grad_norm": 2.2720011535768885, |
|
"learning_rate": 9.061573747332053e-06, |
|
"loss": 0.154, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.398406374501992, |
|
"grad_norm": 1.9823400380634495, |
|
"learning_rate": 9.052429190030589e-06, |
|
"loss": 0.1573, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.398406374501992, |
|
"eval_loss": 0.14404042065143585, |
|
"eval_runtime": 3.1342, |
|
"eval_samples_per_second": 13.082, |
|
"eval_steps_per_second": 3.51, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40039840637450197, |
|
"grad_norm": 2.4201576423705538, |
|
"learning_rate": 9.0432449549459e-06, |
|
"loss": 0.1734, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.40239043824701193, |
|
"grad_norm": 1.8987324574568356, |
|
"learning_rate": 9.03402113200185e-06, |
|
"loss": 0.1357, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4043824701195219, |
|
"grad_norm": 1.8455215459039969, |
|
"learning_rate": 9.02475781150991e-06, |
|
"loss": 0.1412, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4063745019920319, |
|
"grad_norm": 2.1617514944433367, |
|
"learning_rate": 9.015455084168279e-06, |
|
"loss": 0.1658, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.40836653386454186, |
|
"grad_norm": 2.1331463500913665, |
|
"learning_rate": 9.00611304106099e-06, |
|
"loss": 0.1647, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4103585657370518, |
|
"grad_norm": 1.8588463567752136, |
|
"learning_rate": 8.996731773657022e-06, |
|
"loss": 0.1615, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4123505976095618, |
|
"grad_norm": 1.5786356112405115, |
|
"learning_rate": 8.987311373809405e-06, |
|
"loss": 0.1381, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.41434262948207173, |
|
"grad_norm": 2.069308545418217, |
|
"learning_rate": 8.977851933754317e-06, |
|
"loss": 0.1485, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4163346613545817, |
|
"grad_norm": 1.8545162202812966, |
|
"learning_rate": 8.968353546110181e-06, |
|
"loss": 0.1311, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.41832669322709165, |
|
"grad_norm": 1.7202119799614204, |
|
"learning_rate": 8.95881630387677e-06, |
|
"loss": 0.1265, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4203187250996016, |
|
"grad_norm": 2.1311229993111276, |
|
"learning_rate": 8.949240300434272e-06, |
|
"loss": 0.1544, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.42231075697211157, |
|
"grad_norm": 1.932887530350482, |
|
"learning_rate": 8.939625629542401e-06, |
|
"loss": 0.1358, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4243027888446215, |
|
"grad_norm": 1.996095976241167, |
|
"learning_rate": 8.929972385339466e-06, |
|
"loss": 0.1395, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4262948207171315, |
|
"grad_norm": 1.7960261511185895, |
|
"learning_rate": 8.92028066234145e-06, |
|
"loss": 0.1436, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.42828685258964144, |
|
"grad_norm": 1.6930133554882265, |
|
"learning_rate": 8.910550555441085e-06, |
|
"loss": 0.1246, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4302788844621514, |
|
"grad_norm": 2.225395808052067, |
|
"learning_rate": 8.900782159906927e-06, |
|
"loss": 0.165, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.43227091633466136, |
|
"grad_norm": 1.942615677966355, |
|
"learning_rate": 8.890975571382419e-06, |
|
"loss": 0.1418, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4342629482071713, |
|
"grad_norm": 2.151401945627201, |
|
"learning_rate": 8.881130885884955e-06, |
|
"loss": 0.1405, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4362549800796813, |
|
"grad_norm": 2.101742983984682, |
|
"learning_rate": 8.871248199804944e-06, |
|
"loss": 0.1415, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.43824701195219123, |
|
"grad_norm": 2.0803368131274973, |
|
"learning_rate": 8.861327609904859e-06, |
|
"loss": 0.1401, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4402390438247012, |
|
"grad_norm": 1.9987975497294423, |
|
"learning_rate": 8.851369213318293e-06, |
|
"loss": 0.1582, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.44223107569721115, |
|
"grad_norm": 1.882395487923144, |
|
"learning_rate": 8.841373107549014e-06, |
|
"loss": 0.1363, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4442231075697211, |
|
"grad_norm": 1.7141338375852764, |
|
"learning_rate": 8.831339390469998e-06, |
|
"loss": 0.1304, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.44621513944223107, |
|
"grad_norm": 1.8845992727826126, |
|
"learning_rate": 8.821268160322482e-06, |
|
"loss": 0.1251, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.448207171314741, |
|
"grad_norm": 1.807320122447534, |
|
"learning_rate": 8.811159515714998e-06, |
|
"loss": 0.1202, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.450199203187251, |
|
"grad_norm": 1.993372692147205, |
|
"learning_rate": 8.801013555622403e-06, |
|
"loss": 0.1624, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.45219123505976094, |
|
"grad_norm": 1.8931180135396861, |
|
"learning_rate": 8.790830379384918e-06, |
|
"loss": 0.1408, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.4541832669322709, |
|
"grad_norm": 1.9138251654922296, |
|
"learning_rate": 8.780610086707149e-06, |
|
"loss": 0.1365, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.45617529880478086, |
|
"grad_norm": 1.9534031291438365, |
|
"learning_rate": 8.770352777657112e-06, |
|
"loss": 0.1363, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4581673306772908, |
|
"grad_norm": 1.7954406324400658, |
|
"learning_rate": 8.760058552665262e-06, |
|
"loss": 0.1201, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4601593625498008, |
|
"grad_norm": 2.0677561411750163, |
|
"learning_rate": 8.749727512523491e-06, |
|
"loss": 0.1479, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.46215139442231074, |
|
"grad_norm": 2.27753550624284, |
|
"learning_rate": 8.739359758384162e-06, |
|
"loss": 0.1417, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.4641434262948207, |
|
"grad_norm": 2.2517352339516523, |
|
"learning_rate": 8.728955391759102e-06, |
|
"loss": 0.1535, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.46613545816733065, |
|
"grad_norm": 2.070124167431417, |
|
"learning_rate": 8.718514514518617e-06, |
|
"loss": 0.1553, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4681274900398406, |
|
"grad_norm": 2.064421588096535, |
|
"learning_rate": 8.708037228890494e-06, |
|
"loss": 0.1459, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4701195219123506, |
|
"grad_norm": 1.8120445831055432, |
|
"learning_rate": 8.697523637458997e-06, |
|
"loss": 0.1391, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.4721115537848606, |
|
"grad_norm": 1.8340032254229388, |
|
"learning_rate": 8.686973843163868e-06, |
|
"loss": 0.1445, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.47410358565737054, |
|
"grad_norm": 2.139642376885461, |
|
"learning_rate": 8.676387949299307e-06, |
|
"loss": 0.1343, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4760956175298805, |
|
"grad_norm": 2.0856211998803973, |
|
"learning_rate": 8.665766059512977e-06, |
|
"loss": 0.1422, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.47808764940239046, |
|
"grad_norm": 2.0329687486696906, |
|
"learning_rate": 8.655108277804975e-06, |
|
"loss": 0.124, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4800796812749004, |
|
"grad_norm": 1.8725092328941733, |
|
"learning_rate": 8.644414708526824e-06, |
|
"loss": 0.1319, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4820717131474104, |
|
"grad_norm": 2.2032287252466585, |
|
"learning_rate": 8.63368545638045e-06, |
|
"loss": 0.1499, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.48406374501992033, |
|
"grad_norm": 2.04185058829803, |
|
"learning_rate": 8.622920626417141e-06, |
|
"loss": 0.1501, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4860557768924303, |
|
"grad_norm": 2.003978218913384, |
|
"learning_rate": 8.612120324036548e-06, |
|
"loss": 0.1382, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.48804780876494025, |
|
"grad_norm": 1.8579294263600015, |
|
"learning_rate": 8.601284654985623e-06, |
|
"loss": 0.1342, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4900398406374502, |
|
"grad_norm": 2.072837174686408, |
|
"learning_rate": 8.590413725357605e-06, |
|
"loss": 0.142, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.49203187250996017, |
|
"grad_norm": 1.9780709854474574, |
|
"learning_rate": 8.57950764159097e-06, |
|
"loss": 0.1395, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.4940239043824701, |
|
"grad_norm": 2.5022268847929054, |
|
"learning_rate": 8.568566510468392e-06, |
|
"loss": 0.1649, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.4960159362549801, |
|
"grad_norm": 1.7767948533765374, |
|
"learning_rate": 8.557590439115697e-06, |
|
"loss": 0.129, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.49800796812749004, |
|
"grad_norm": 1.8237813486682513, |
|
"learning_rate": 8.546579535000819e-06, |
|
"loss": 0.1429, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.108585214242547, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.151, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.50199203187251, |
|
"grad_norm": 1.8936980275001982, |
|
"learning_rate": 8.524453660060434e-06, |
|
"loss": 0.135, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5039840637450199, |
|
"grad_norm": 1.953069313184316, |
|
"learning_rate": 8.513338905871819e-06, |
|
"loss": 0.1377, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5059760956175299, |
|
"grad_norm": 1.6215779669447803, |
|
"learning_rate": 8.502189752192685e-06, |
|
"loss": 0.1269, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5079681274900398, |
|
"grad_norm": 1.9115865609618454, |
|
"learning_rate": 8.491006308185632e-06, |
|
"loss": 0.1329, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5099601593625498, |
|
"grad_norm": 1.5921580761982146, |
|
"learning_rate": 8.479788683348996e-06, |
|
"loss": 0.1022, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5119521912350598, |
|
"grad_norm": 2.0973700556988786, |
|
"learning_rate": 8.468536987515788e-06, |
|
"loss": 0.134, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5139442231075697, |
|
"grad_norm": 2.045549212493722, |
|
"learning_rate": 8.457251330852608e-06, |
|
"loss": 0.1385, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5159362549800797, |
|
"grad_norm": 1.9007334882981535, |
|
"learning_rate": 8.445931823858568e-06, |
|
"loss": 0.1566, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5179282868525896, |
|
"grad_norm": 1.5993440106427084, |
|
"learning_rate": 8.434578577364218e-06, |
|
"loss": 0.1066, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5199203187250996, |
|
"grad_norm": 2.2603377167523973, |
|
"learning_rate": 8.423191702530453e-06, |
|
"loss": 0.1367, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5219123505976095, |
|
"grad_norm": 2.096603538865956, |
|
"learning_rate": 8.411771310847426e-06, |
|
"loss": 0.1558, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5239043824701195, |
|
"grad_norm": 2.104670890811567, |
|
"learning_rate": 8.400317514133454e-06, |
|
"loss": 0.1245, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5258964143426295, |
|
"grad_norm": 2.1577997074465483, |
|
"learning_rate": 8.388830424533935e-06, |
|
"loss": 0.1587, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5278884462151394, |
|
"grad_norm": 1.9717314334953373, |
|
"learning_rate": 8.377310154520232e-06, |
|
"loss": 0.1445, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5298804780876494, |
|
"grad_norm": 2.18540940693717, |
|
"learning_rate": 8.365756816888586e-06, |
|
"loss": 0.1369, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5318725099601593, |
|
"grad_norm": 1.8217822120680458, |
|
"learning_rate": 8.354170524759008e-06, |
|
"loss": 0.1407, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5338645418326693, |
|
"grad_norm": 1.4966007977041007, |
|
"learning_rate": 8.342551391574165e-06, |
|
"loss": 0.1101, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5358565737051793, |
|
"grad_norm": 1.5671763413455242, |
|
"learning_rate": 8.33089953109828e-06, |
|
"loss": 0.1185, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5378486055776892, |
|
"grad_norm": 1.8560918618066358, |
|
"learning_rate": 8.319215057416007e-06, |
|
"loss": 0.1246, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5398406374501992, |
|
"grad_norm": 1.8485098665344575, |
|
"learning_rate": 8.307498084931327e-06, |
|
"loss": 0.1324, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5418326693227091, |
|
"grad_norm": 2.2839051894864237, |
|
"learning_rate": 8.295748728366414e-06, |
|
"loss": 0.1342, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5438247011952191, |
|
"grad_norm": 1.6783837554851726, |
|
"learning_rate": 8.283967102760518e-06, |
|
"loss": 0.1192, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.545816733067729, |
|
"grad_norm": 2.0329474847075097, |
|
"learning_rate": 8.272153323468842e-06, |
|
"loss": 0.139, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.547808764940239, |
|
"grad_norm": 1.7759478728678548, |
|
"learning_rate": 8.260307506161407e-06, |
|
"loss": 0.1396, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.549800796812749, |
|
"grad_norm": 1.8489007641542803, |
|
"learning_rate": 8.248429766821925e-06, |
|
"loss": 0.1035, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5517928286852589, |
|
"grad_norm": 1.3042776152655502, |
|
"learning_rate": 8.236520221746657e-06, |
|
"loss": 0.0979, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5537848605577689, |
|
"grad_norm": 1.938450288353747, |
|
"learning_rate": 8.22457898754328e-06, |
|
"loss": 0.1355, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5557768924302788, |
|
"grad_norm": 1.9473763473782129, |
|
"learning_rate": 8.212606181129737e-06, |
|
"loss": 0.1354, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5577689243027888, |
|
"grad_norm": 2.3529581735987684, |
|
"learning_rate": 8.200601919733106e-06, |
|
"loss": 0.1478, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5597609561752988, |
|
"grad_norm": 2.2045125976242277, |
|
"learning_rate": 8.18856632088844e-06, |
|
"loss": 0.1403, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5617529880478087, |
|
"grad_norm": 1.8741947740872564, |
|
"learning_rate": 8.176499502437621e-06, |
|
"loss": 0.1224, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5637450199203188, |
|
"grad_norm": 1.6785392877999905, |
|
"learning_rate": 8.164401582528202e-06, |
|
"loss": 0.1114, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5657370517928287, |
|
"grad_norm": 1.805219996685288, |
|
"learning_rate": 8.15227267961226e-06, |
|
"loss": 0.1281, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5677290836653387, |
|
"grad_norm": 2.262380478016552, |
|
"learning_rate": 8.14011291244523e-06, |
|
"loss": 0.1495, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5697211155378487, |
|
"grad_norm": 1.6184059179144714, |
|
"learning_rate": 8.127922400084736e-06, |
|
"loss": 0.1077, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5717131474103586, |
|
"grad_norm": 1.7035754466739166, |
|
"learning_rate": 8.115701261889437e-06, |
|
"loss": 0.1081, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5737051792828686, |
|
"grad_norm": 2.260155985593039, |
|
"learning_rate": 8.10344961751785e-06, |
|
"loss": 0.1367, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5756972111553785, |
|
"grad_norm": 1.985868190263633, |
|
"learning_rate": 8.091167586927184e-06, |
|
"loss": 0.1184, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5776892430278885, |
|
"grad_norm": 2.2887926342626144, |
|
"learning_rate": 8.078855290372161e-06, |
|
"loss": 0.1721, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5796812749003984, |
|
"grad_norm": 2.027120648554562, |
|
"learning_rate": 8.066512848403837e-06, |
|
"loss": 0.1342, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5816733067729084, |
|
"grad_norm": 1.9925359431101095, |
|
"learning_rate": 8.054140381868435e-06, |
|
"loss": 0.1353, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5836653386454184, |
|
"grad_norm": 2.0044583230657027, |
|
"learning_rate": 8.041738011906144e-06, |
|
"loss": 0.1321, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5856573705179283, |
|
"grad_norm": 2.1494533777158993, |
|
"learning_rate": 8.02930585994994e-06, |
|
"loss": 0.132, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5876494023904383, |
|
"grad_norm": 2.1658323964218322, |
|
"learning_rate": 8.016844047724404e-06, |
|
"loss": 0.156, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5896414342629482, |
|
"grad_norm": 1.928648624837369, |
|
"learning_rate": 8.004352697244516e-06, |
|
"loss": 0.142, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5916334661354582, |
|
"grad_norm": 1.612340205944657, |
|
"learning_rate": 7.991831930814475e-06, |
|
"loss": 0.1101, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5936254980079682, |
|
"grad_norm": 1.6508868869854736, |
|
"learning_rate": 7.979281871026493e-06, |
|
"loss": 0.1281, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5956175298804781, |
|
"grad_norm": 1.8982891507623567, |
|
"learning_rate": 7.966702640759598e-06, |
|
"loss": 0.1321, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5976095617529881, |
|
"grad_norm": 1.8987961958988515, |
|
"learning_rate": 7.954094363178421e-06, |
|
"loss": 0.1229, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.599601593625498, |
|
"grad_norm": 2.1488376153773725, |
|
"learning_rate": 7.941457161732011e-06, |
|
"loss": 0.1544, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.601593625498008, |
|
"grad_norm": 1.563913063819495, |
|
"learning_rate": 7.928791160152603e-06, |
|
"loss": 0.0999, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.603585657370518, |
|
"grad_norm": 2.113124277431984, |
|
"learning_rate": 7.916096482454425e-06, |
|
"loss": 0.1452, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6055776892430279, |
|
"grad_norm": 1.4930277954467297, |
|
"learning_rate": 7.903373252932474e-06, |
|
"loss": 0.1182, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6075697211155379, |
|
"grad_norm": 1.9567309939982196, |
|
"learning_rate": 7.890621596161295e-06, |
|
"loss": 0.1485, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6095617529880478, |
|
"grad_norm": 2.115801942619243, |
|
"learning_rate": 7.877841636993777e-06, |
|
"loss": 0.1343, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6115537848605578, |
|
"grad_norm": 1.6597909053499225, |
|
"learning_rate": 7.865033500559916e-06, |
|
"loss": 0.1221, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6135458167330677, |
|
"grad_norm": 2.268140954664932, |
|
"learning_rate": 7.852197312265592e-06, |
|
"loss": 0.1548, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6155378486055777, |
|
"grad_norm": 1.7438803215243461, |
|
"learning_rate": 7.83933319779135e-06, |
|
"loss": 0.1197, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6175298804780877, |
|
"grad_norm": 1.7475479034468657, |
|
"learning_rate": 7.826441283091158e-06, |
|
"loss": 0.1205, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6195219123505976, |
|
"grad_norm": 1.917559079622195, |
|
"learning_rate": 7.813521694391183e-06, |
|
"loss": 0.1466, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6215139442231076, |
|
"grad_norm": 1.9007500389944925, |
|
"learning_rate": 7.800574558188548e-06, |
|
"loss": 0.1297, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6235059760956175, |
|
"grad_norm": 2.3138697129523624, |
|
"learning_rate": 7.787600001250098e-06, |
|
"loss": 0.1435, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6254980079681275, |
|
"grad_norm": 1.803331449338951, |
|
"learning_rate": 7.77459815061116e-06, |
|
"loss": 0.1265, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6274900398406374, |
|
"grad_norm": 1.729785988240638, |
|
"learning_rate": 7.761569133574291e-06, |
|
"loss": 0.1208, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6294820717131474, |
|
"grad_norm": 1.5580779702404448, |
|
"learning_rate": 7.748513077708044e-06, |
|
"loss": 0.1279, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6314741035856574, |
|
"grad_norm": 1.5557075595562477, |
|
"learning_rate": 7.735430110845707e-06, |
|
"loss": 0.1218, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6334661354581673, |
|
"grad_norm": 2.190732094534515, |
|
"learning_rate": 7.722320361084057e-06, |
|
"loss": 0.1435, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6354581673306773, |
|
"grad_norm": 2.066394679381035, |
|
"learning_rate": 7.70918395678211e-06, |
|
"loss": 0.1552, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6374501992031872, |
|
"grad_norm": 2.386874848230383, |
|
"learning_rate": 7.69602102655985e-06, |
|
"loss": 0.1511, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6394422310756972, |
|
"grad_norm": 1.9135694055276182, |
|
"learning_rate": 7.682831699296991e-06, |
|
"loss": 0.1494, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6414342629482072, |
|
"grad_norm": 1.9705232701476278, |
|
"learning_rate": 7.669616104131697e-06, |
|
"loss": 0.1452, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6434262948207171, |
|
"grad_norm": 1.853209913025916, |
|
"learning_rate": 7.656374370459321e-06, |
|
"loss": 0.1336, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6454183266932271, |
|
"grad_norm": 1.772429733282671, |
|
"learning_rate": 7.643106627931148e-06, |
|
"loss": 0.1131, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.647410358565737, |
|
"grad_norm": 2.331769433940343, |
|
"learning_rate": 7.629813006453114e-06, |
|
"loss": 0.1417, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.649402390438247, |
|
"grad_norm": 2.1257162988182037, |
|
"learning_rate": 7.616493636184538e-06, |
|
"loss": 0.1537, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.651394422310757, |
|
"grad_norm": 1.6032004847610595, |
|
"learning_rate": 7.603148647536853e-06, |
|
"loss": 0.1208, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6533864541832669, |
|
"grad_norm": 1.9414518467604416, |
|
"learning_rate": 7.5897781711723215e-06, |
|
"loss": 0.1189, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6553784860557769, |
|
"grad_norm": 1.8035099668212278, |
|
"learning_rate": 7.576382338002759e-06, |
|
"loss": 0.1289, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6573705179282868, |
|
"grad_norm": 2.5935545240432347, |
|
"learning_rate": 7.56296127918825e-06, |
|
"loss": 0.1589, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6593625498007968, |
|
"grad_norm": 2.0001976484989887, |
|
"learning_rate": 7.549515126135871e-06, |
|
"loss": 0.1329, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6613545816733067, |
|
"grad_norm": 2.2072498495488135, |
|
"learning_rate": 7.536044010498396e-06, |
|
"loss": 0.1562, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6633466135458167, |
|
"grad_norm": 1.942689121900593, |
|
"learning_rate": 7.5225480641730084e-06, |
|
"loss": 0.138, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6653386454183267, |
|
"grad_norm": 2.025836699652673, |
|
"learning_rate": 7.509027419300017e-06, |
|
"loss": 0.1442, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6673306772908366, |
|
"grad_norm": 1.75497256540315, |
|
"learning_rate": 7.495482208261554e-06, |
|
"loss": 0.1324, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6693227091633466, |
|
"grad_norm": 1.730691502621294, |
|
"learning_rate": 7.48191256368028e-06, |
|
"loss": 0.1037, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6713147410358565, |
|
"grad_norm": 2.1818943000426825, |
|
"learning_rate": 7.468318618418089e-06, |
|
"loss": 0.1197, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6733067729083665, |
|
"grad_norm": 2.282870874976595, |
|
"learning_rate": 7.454700505574805e-06, |
|
"loss": 0.1651, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6752988047808764, |
|
"grad_norm": 1.9598715085355076, |
|
"learning_rate": 7.44105835848688e-06, |
|
"loss": 0.13, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6772908366533864, |
|
"grad_norm": 1.32121304959289, |
|
"learning_rate": 7.427392310726088e-06, |
|
"loss": 0.088, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6792828685258964, |
|
"grad_norm": 2.0337415726445296, |
|
"learning_rate": 7.413702496098218e-06, |
|
"loss": 0.1425, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6812749003984063, |
|
"grad_norm": 2.1827633175836008, |
|
"learning_rate": 7.39998904864176e-06, |
|
"loss": 0.1507, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6832669322709163, |
|
"grad_norm": 2.10204580287355, |
|
"learning_rate": 7.3862521026265986e-06, |
|
"loss": 0.1411, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6852589641434262, |
|
"grad_norm": 2.1566650379770675, |
|
"learning_rate": 7.372491792552694e-06, |
|
"loss": 0.1427, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.6872509960159362, |
|
"grad_norm": 1.9617078978276397, |
|
"learning_rate": 7.3587082531487675e-06, |
|
"loss": 0.1167, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6892430278884463, |
|
"grad_norm": 1.912009857403253, |
|
"learning_rate": 7.344901619370977e-06, |
|
"loss": 0.1234, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.6912350597609562, |
|
"grad_norm": 2.127034247138793, |
|
"learning_rate": 7.331072026401611e-06, |
|
"loss": 0.1176, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.6932270916334662, |
|
"grad_norm": 2.2559426016327078, |
|
"learning_rate": 7.31721960964774e-06, |
|
"loss": 0.1343, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.6952191235059761, |
|
"grad_norm": 1.9327539929872006, |
|
"learning_rate": 7.303344504739914e-06, |
|
"loss": 0.1277, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.6972111553784861, |
|
"grad_norm": 1.6413171465166834, |
|
"learning_rate": 7.289446847530822e-06, |
|
"loss": 0.0954, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6992031872509961, |
|
"grad_norm": 1.7965552697782228, |
|
"learning_rate": 7.2755267740939664e-06, |
|
"loss": 0.1182, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.701195219123506, |
|
"grad_norm": 2.264078406151102, |
|
"learning_rate": 7.261584420722328e-06, |
|
"loss": 0.1239, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.703187250996016, |
|
"grad_norm": 2.1024709135826423, |
|
"learning_rate": 7.2476199239270354e-06, |
|
"loss": 0.1252, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7051792828685259, |
|
"grad_norm": 1.850775717751215, |
|
"learning_rate": 7.2336334204360206e-06, |
|
"loss": 0.096, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7071713147410359, |
|
"grad_norm": 1.9294667982978444, |
|
"learning_rate": 7.21962504719269e-06, |
|
"loss": 0.1288, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7091633466135459, |
|
"grad_norm": 2.0263901644198645, |
|
"learning_rate": 7.20559494135458e-06, |
|
"loss": 0.1273, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7111553784860558, |
|
"grad_norm": 2.519289066543413, |
|
"learning_rate": 7.19154324029201e-06, |
|
"loss": 0.1442, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7131474103585658, |
|
"grad_norm": 2.3400623940562886, |
|
"learning_rate": 7.177470081586743e-06, |
|
"loss": 0.1531, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7151394422310757, |
|
"grad_norm": 1.913600852089038, |
|
"learning_rate": 7.163375603030634e-06, |
|
"loss": 0.1142, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7171314741035857, |
|
"grad_norm": 2.4654502175383657, |
|
"learning_rate": 7.149259942624287e-06, |
|
"loss": 0.1498, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7191235059760956, |
|
"grad_norm": 1.7809542538273848, |
|
"learning_rate": 7.135123238575693e-06, |
|
"loss": 0.0992, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7211155378486056, |
|
"grad_norm": 2.236498970023622, |
|
"learning_rate": 7.120965629298891e-06, |
|
"loss": 0.1231, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7231075697211156, |
|
"grad_norm": 1.5579483159438388, |
|
"learning_rate": 7.1067872534126004e-06, |
|
"loss": 0.1208, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7250996015936255, |
|
"grad_norm": 2.623828745113357, |
|
"learning_rate": 7.092588249738871e-06, |
|
"loss": 0.1127, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7270916334661355, |
|
"grad_norm": 1.926947918523208, |
|
"learning_rate": 7.0783687573017215e-06, |
|
"loss": 0.1114, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7290836653386454, |
|
"grad_norm": 1.5604394877235856, |
|
"learning_rate": 7.064128915325777e-06, |
|
"loss": 0.1116, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7310756972111554, |
|
"grad_norm": 1.9819642302802614, |
|
"learning_rate": 7.049868863234911e-06, |
|
"loss": 0.1231, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7330677290836654, |
|
"grad_norm": 2.04013986237716, |
|
"learning_rate": 7.03558874065087e-06, |
|
"loss": 0.1304, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7350597609561753, |
|
"grad_norm": 1.8528858096630048, |
|
"learning_rate": 7.021288687391917e-06, |
|
"loss": 0.1232, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7370517928286853, |
|
"grad_norm": 1.931351631379841, |
|
"learning_rate": 7.00696884347146e-06, |
|
"loss": 0.1202, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7390438247011952, |
|
"grad_norm": 2.3674440717388068, |
|
"learning_rate": 6.9926293490966755e-06, |
|
"loss": 0.1578, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7410358565737052, |
|
"grad_norm": 1.6376485083203747, |
|
"learning_rate": 6.978270344667143e-06, |
|
"loss": 0.0929, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7430278884462151, |
|
"grad_norm": 1.705119215989148, |
|
"learning_rate": 6.963891970773465e-06, |
|
"loss": 0.1146, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7450199203187251, |
|
"grad_norm": 1.7904536633328327, |
|
"learning_rate": 6.949494368195896e-06, |
|
"loss": 0.0929, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7470119521912351, |
|
"grad_norm": 1.7463821286816135, |
|
"learning_rate": 6.935077677902955e-06, |
|
"loss": 0.1146, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.749003984063745, |
|
"grad_norm": 1.8343889129423383, |
|
"learning_rate": 6.920642041050055e-06, |
|
"loss": 0.1231, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.750996015936255, |
|
"grad_norm": 2.112401907355922, |
|
"learning_rate": 6.9061875989781165e-06, |
|
"loss": 0.1323, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7529880478087649, |
|
"grad_norm": 2.3269817857970456, |
|
"learning_rate": 6.891714493212183e-06, |
|
"loss": 0.1461, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7549800796812749, |
|
"grad_norm": 2.164617790401808, |
|
"learning_rate": 6.877222865460037e-06, |
|
"loss": 0.1465, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7569721115537849, |
|
"grad_norm": 1.8394467625888513, |
|
"learning_rate": 6.862712857610812e-06, |
|
"loss": 0.1078, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7589641434262948, |
|
"grad_norm": 1.7772473532916493, |
|
"learning_rate": 6.848184611733602e-06, |
|
"loss": 0.1173, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7609561752988048, |
|
"grad_norm": 1.90451509004974, |
|
"learning_rate": 6.833638270076071e-06, |
|
"loss": 0.1164, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7629482071713147, |
|
"grad_norm": 1.3933704850807993, |
|
"learning_rate": 6.819073975063064e-06, |
|
"loss": 0.0916, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7649402390438247, |
|
"grad_norm": 2.1083722503136073, |
|
"learning_rate": 6.804491869295207e-06, |
|
"loss": 0.1482, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7669322709163346, |
|
"grad_norm": 2.1302855766956466, |
|
"learning_rate": 6.789892095547511e-06, |
|
"loss": 0.1255, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7689243027888446, |
|
"grad_norm": 2.0218204203878747, |
|
"learning_rate": 6.7752747967679825e-06, |
|
"loss": 0.1414, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7709163346613546, |
|
"grad_norm": 1.6484036328649272, |
|
"learning_rate": 6.7606401160762105e-06, |
|
"loss": 0.1006, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7729083665338645, |
|
"grad_norm": 1.9790346630572022, |
|
"learning_rate": 6.745988196761976e-06, |
|
"loss": 0.1253, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7749003984063745, |
|
"grad_norm": 2.4027541617777923, |
|
"learning_rate": 6.731319182283844e-06, |
|
"loss": 0.1516, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7768924302788844, |
|
"grad_norm": 2.076146090352596, |
|
"learning_rate": 6.71663321626776e-06, |
|
"loss": 0.1324, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7788844621513944, |
|
"grad_norm": 2.176712178015417, |
|
"learning_rate": 6.7019304425056484e-06, |
|
"loss": 0.1133, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7808764940239044, |
|
"grad_norm": 2.270343647007193, |
|
"learning_rate": 6.687211004953992e-06, |
|
"loss": 0.1543, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.7828685258964143, |
|
"grad_norm": 2.2157431300656114, |
|
"learning_rate": 6.672475047732436e-06, |
|
"loss": 0.1419, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7848605577689243, |
|
"grad_norm": 1.8472598515997243, |
|
"learning_rate": 6.657722715122372e-06, |
|
"loss": 0.1171, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.7868525896414342, |
|
"grad_norm": 1.9746467863711765, |
|
"learning_rate": 6.6429541515655215e-06, |
|
"loss": 0.1214, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7888446215139442, |
|
"grad_norm": 1.838974412736928, |
|
"learning_rate": 6.628169501662527e-06, |
|
"loss": 0.1288, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.7908366533864541, |
|
"grad_norm": 2.315243867151159, |
|
"learning_rate": 6.613368910171533e-06, |
|
"loss": 0.1328, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.7928286852589641, |
|
"grad_norm": 2.081415971304626, |
|
"learning_rate": 6.598552522006772e-06, |
|
"loss": 0.1259, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.7948207171314741, |
|
"grad_norm": 2.3461895895379414, |
|
"learning_rate": 6.583720482237143e-06, |
|
"loss": 0.1449, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"grad_norm": 2.2278972284384233, |
|
"learning_rate": 6.568872936084789e-06, |
|
"loss": 0.153, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"eval_loss": 0.12241560965776443, |
|
"eval_runtime": 3.1403, |
|
"eval_samples_per_second": 13.056, |
|
"eval_steps_per_second": 3.503, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.798804780876494, |
|
"grad_norm": 1.5055565131366404, |
|
"learning_rate": 6.554010028923682e-06, |
|
"loss": 0.0904, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8007968127490039, |
|
"grad_norm": 1.9134714393025904, |
|
"learning_rate": 6.539131906278189e-06, |
|
"loss": 0.1221, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8027888446215139, |
|
"grad_norm": 1.676345510524098, |
|
"learning_rate": 6.524238713821661e-06, |
|
"loss": 0.1049, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8047808764940239, |
|
"grad_norm": 1.8483113242007039, |
|
"learning_rate": 6.509330597374993e-06, |
|
"loss": 0.1209, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8067729083665338, |
|
"grad_norm": 2.1209942031252096, |
|
"learning_rate": 6.494407702905207e-06, |
|
"loss": 0.1274, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8087649402390438, |
|
"grad_norm": 2.090229556416619, |
|
"learning_rate": 6.479470176524015e-06, |
|
"loss": 0.1378, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8107569721115537, |
|
"grad_norm": 1.5536560670439392, |
|
"learning_rate": 6.464518164486395e-06, |
|
"loss": 0.1042, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8127490039840638, |
|
"grad_norm": 2.136864457352028, |
|
"learning_rate": 6.44955181318915e-06, |
|
"loss": 0.1356, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8147410358565738, |
|
"grad_norm": 1.6345271519484204, |
|
"learning_rate": 6.434571269169487e-06, |
|
"loss": 0.1103, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8167330677290837, |
|
"grad_norm": 2.2602024940481877, |
|
"learning_rate": 6.419576679103571e-06, |
|
"loss": 0.1412, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8187250996015937, |
|
"grad_norm": 1.950297407773567, |
|
"learning_rate": 6.404568189805095e-06, |
|
"loss": 0.1177, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8207171314741036, |
|
"grad_norm": 1.7261505783927313, |
|
"learning_rate": 6.389545948223841e-06, |
|
"loss": 0.1061, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8227091633466136, |
|
"grad_norm": 1.7921074892509365, |
|
"learning_rate": 6.374510101444242e-06, |
|
"loss": 0.1149, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8247011952191236, |
|
"grad_norm": 1.693484859639379, |
|
"learning_rate": 6.359460796683937e-06, |
|
"loss": 0.1013, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8266932270916335, |
|
"grad_norm": 1.8929838681555156, |
|
"learning_rate": 6.344398181292338e-06, |
|
"loss": 0.1117, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8286852589641435, |
|
"grad_norm": 1.9178716343933822, |
|
"learning_rate": 6.329322402749181e-06, |
|
"loss": 0.1133, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8306772908366534, |
|
"grad_norm": 2.06968861690023, |
|
"learning_rate": 6.314233608663085e-06, |
|
"loss": 0.1347, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8326693227091634, |
|
"grad_norm": 2.150136253809127, |
|
"learning_rate": 6.299131946770104e-06, |
|
"loss": 0.1102, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8346613545816733, |
|
"grad_norm": 2.0515375853571105, |
|
"learning_rate": 6.284017564932284e-06, |
|
"loss": 0.1321, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8366533864541833, |
|
"grad_norm": 1.972608566259217, |
|
"learning_rate": 6.2688906111362115e-06, |
|
"loss": 0.1317, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8386454183266933, |
|
"grad_norm": 2.1835844876053176, |
|
"learning_rate": 6.253751233491565e-06, |
|
"loss": 0.1368, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8406374501992032, |
|
"grad_norm": 1.9358655395330886, |
|
"learning_rate": 6.238599580229673e-06, |
|
"loss": 0.1242, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8426294820717132, |
|
"grad_norm": 1.9756294879268275, |
|
"learning_rate": 6.2234357997020475e-06, |
|
"loss": 0.1228, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8446215139442231, |
|
"grad_norm": 2.0407229239665616, |
|
"learning_rate": 6.208260040378946e-06, |
|
"loss": 0.1069, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8466135458167331, |
|
"grad_norm": 2.469118035586134, |
|
"learning_rate": 6.193072450847909e-06, |
|
"loss": 0.1353, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.848605577689243, |
|
"grad_norm": 1.3150969469878047, |
|
"learning_rate": 6.1778731798123105e-06, |
|
"loss": 0.0766, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.850597609561753, |
|
"grad_norm": 1.7880637008772997, |
|
"learning_rate": 6.162662376089894e-06, |
|
"loss": 0.1111, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.852589641434263, |
|
"grad_norm": 1.7519177689555412, |
|
"learning_rate": 6.147440188611324e-06, |
|
"loss": 0.1172, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8545816733067729, |
|
"grad_norm": 1.6790695470314452, |
|
"learning_rate": 6.132206766418728e-06, |
|
"loss": 0.1307, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8565737051792829, |
|
"grad_norm": 2.329181770786997, |
|
"learning_rate": 6.116962258664228e-06, |
|
"loss": 0.1398, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8585657370517928, |
|
"grad_norm": 2.1360997891269298, |
|
"learning_rate": 6.10170681460849e-06, |
|
"loss": 0.1348, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8605577689243028, |
|
"grad_norm": 2.0074048591988745, |
|
"learning_rate": 6.0864405836192575e-06, |
|
"loss": 0.1257, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8625498007968128, |
|
"grad_norm": 1.9966192443343993, |
|
"learning_rate": 6.071163715169889e-06, |
|
"loss": 0.1233, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8645418326693227, |
|
"grad_norm": 1.6688986826934915, |
|
"learning_rate": 6.055876358837894e-06, |
|
"loss": 0.1034, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8665338645418327, |
|
"grad_norm": 2.1720920201705574, |
|
"learning_rate": 6.040578664303476e-06, |
|
"loss": 0.122, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8685258964143426, |
|
"grad_norm": 2.2349377112258884, |
|
"learning_rate": 6.025270781348055e-06, |
|
"loss": 0.1305, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8705179282868526, |
|
"grad_norm": 1.9735977301953636, |
|
"learning_rate": 6.009952859852809e-06, |
|
"loss": 0.1209, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8725099601593626, |
|
"grad_norm": 2.1227111042570446, |
|
"learning_rate": 5.994625049797206e-06, |
|
"loss": 0.1313, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8745019920318725, |
|
"grad_norm": 1.8425960578334477, |
|
"learning_rate": 5.979287501257531e-06, |
|
"loss": 0.1045, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8764940239043825, |
|
"grad_norm": 1.9096751566518075, |
|
"learning_rate": 5.963940364405425e-06, |
|
"loss": 0.1094, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8784860557768924, |
|
"grad_norm": 2.0206284611558396, |
|
"learning_rate": 5.9485837895064e-06, |
|
"loss": 0.1147, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.8804780876494024, |
|
"grad_norm": 1.7978035110634043, |
|
"learning_rate": 5.933217926918386e-06, |
|
"loss": 0.1039, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.8824701195219123, |
|
"grad_norm": 1.3448754139603254, |
|
"learning_rate": 5.9178429270902445e-06, |
|
"loss": 0.0751, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.8844621513944223, |
|
"grad_norm": 1.8906404995180646, |
|
"learning_rate": 5.902458940560304e-06, |
|
"loss": 0.1118, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.8864541832669323, |
|
"grad_norm": 2.248607528732412, |
|
"learning_rate": 5.88706611795488e-06, |
|
"loss": 0.1344, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8884462151394422, |
|
"grad_norm": 2.0500336844672566, |
|
"learning_rate": 5.871664609986804e-06, |
|
"loss": 0.1172, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.8904382470119522, |
|
"grad_norm": 1.7749390516298367, |
|
"learning_rate": 5.85625456745395e-06, |
|
"loss": 0.096, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.8924302788844621, |
|
"grad_norm": 1.6650651187511873, |
|
"learning_rate": 5.8408361412377475e-06, |
|
"loss": 0.112, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.8944223107569721, |
|
"grad_norm": 1.8698848014452956, |
|
"learning_rate": 5.8254094823017195e-06, |
|
"loss": 0.1094, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.896414342629482, |
|
"grad_norm": 2.31893715626561, |
|
"learning_rate": 5.80997474168999e-06, |
|
"loss": 0.1524, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.898406374501992, |
|
"grad_norm": 1.7326523317597042, |
|
"learning_rate": 5.794532070525817e-06, |
|
"loss": 0.1145, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.900398406374502, |
|
"grad_norm": 1.8551714873826095, |
|
"learning_rate": 5.779081620010104e-06, |
|
"loss": 0.1064, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9023904382470119, |
|
"grad_norm": 2.000481014536217, |
|
"learning_rate": 5.763623541419925e-06, |
|
"loss": 0.1152, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9043824701195219, |
|
"grad_norm": 1.514470739576286, |
|
"learning_rate": 5.748157986107038e-06, |
|
"loss": 0.0937, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9063745019920318, |
|
"grad_norm": 1.7848046901968333, |
|
"learning_rate": 5.73268510549641e-06, |
|
"loss": 0.1182, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9083665338645418, |
|
"grad_norm": 2.123090333270882, |
|
"learning_rate": 5.717205051084731e-06, |
|
"loss": 0.1207, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9103585657370518, |
|
"grad_norm": 1.9230718298635634, |
|
"learning_rate": 5.7017179744389276e-06, |
|
"loss": 0.1105, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9123505976095617, |
|
"grad_norm": 2.218085940556976, |
|
"learning_rate": 5.686224027194682e-06, |
|
"loss": 0.1313, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9143426294820717, |
|
"grad_norm": 1.8057081110378266, |
|
"learning_rate": 5.6707233610549505e-06, |
|
"loss": 0.1248, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9163346613545816, |
|
"grad_norm": 2.3840484640685715, |
|
"learning_rate": 5.655216127788472e-06, |
|
"loss": 0.1432, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9183266932270916, |
|
"grad_norm": 2.020040126603478, |
|
"learning_rate": 5.639702479228286e-06, |
|
"loss": 0.1113, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9203187250996016, |
|
"grad_norm": 2.074741713662112, |
|
"learning_rate": 5.6241825672702444e-06, |
|
"loss": 0.1067, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9223107569721115, |
|
"grad_norm": 2.0160262890682854, |
|
"learning_rate": 5.608656543871524e-06, |
|
"loss": 0.1058, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9243027888446215, |
|
"grad_norm": 1.576835822191225, |
|
"learning_rate": 5.593124561049141e-06, |
|
"loss": 0.1148, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9262948207171314, |
|
"grad_norm": 1.7157626959132268, |
|
"learning_rate": 5.57758677087846e-06, |
|
"loss": 0.101, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9282868525896414, |
|
"grad_norm": 2.0879882110494465, |
|
"learning_rate": 5.5620433254917075e-06, |
|
"loss": 0.1223, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9302788844621513, |
|
"grad_norm": 1.6905070173375571, |
|
"learning_rate": 5.546494377076478e-06, |
|
"loss": 0.0903, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9322709163346613, |
|
"grad_norm": 1.8563993306347122, |
|
"learning_rate": 5.530940077874248e-06, |
|
"loss": 0.1065, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9342629482071713, |
|
"grad_norm": 1.9938558563881068, |
|
"learning_rate": 5.515380580178887e-06, |
|
"loss": 0.1157, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9362549800796812, |
|
"grad_norm": 1.9502947540268583, |
|
"learning_rate": 5.499816036335157e-06, |
|
"loss": 0.1213, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9382470119521913, |
|
"grad_norm": 2.0454369850534997, |
|
"learning_rate": 5.484246598737234e-06, |
|
"loss": 0.1321, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9402390438247012, |
|
"grad_norm": 2.215669709261003, |
|
"learning_rate": 5.468672419827208e-06, |
|
"loss": 0.1102, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9422310756972112, |
|
"grad_norm": 1.9756062583329823, |
|
"learning_rate": 5.453093652093588e-06, |
|
"loss": 0.1232, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9442231075697212, |
|
"grad_norm": 1.799263076671508, |
|
"learning_rate": 5.437510448069815e-06, |
|
"loss": 0.1081, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9462151394422311, |
|
"grad_norm": 2.3780944126238475, |
|
"learning_rate": 5.421922960332767e-06, |
|
"loss": 0.1121, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9482071713147411, |
|
"grad_norm": 1.8280247291319718, |
|
"learning_rate": 5.406331341501264e-06, |
|
"loss": 0.1324, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.950199203187251, |
|
"grad_norm": 2.15631656729653, |
|
"learning_rate": 5.390735744234573e-06, |
|
"loss": 0.1287, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.952191235059761, |
|
"grad_norm": 1.7263537522584287, |
|
"learning_rate": 5.375136321230915e-06, |
|
"loss": 0.086, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.954183266932271, |
|
"grad_norm": 2.3095667041223753, |
|
"learning_rate": 5.359533225225971e-06, |
|
"loss": 0.1374, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9561752988047809, |
|
"grad_norm": 1.7602753374015587, |
|
"learning_rate": 5.34392660899138e-06, |
|
"loss": 0.1078, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9581673306772909, |
|
"grad_norm": 2.1582500460632263, |
|
"learning_rate": 5.328316625333251e-06, |
|
"loss": 0.1415, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9601593625498008, |
|
"grad_norm": 1.8078245176783199, |
|
"learning_rate": 5.312703427090665e-06, |
|
"loss": 0.1202, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.9621513944223108, |
|
"grad_norm": 1.6747521348323762, |
|
"learning_rate": 5.297087167134176e-06, |
|
"loss": 0.1105, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9641434262948207, |
|
"grad_norm": 2.108324269500925, |
|
"learning_rate": 5.281467998364314e-06, |
|
"loss": 0.1166, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9661354581673307, |
|
"grad_norm": 1.899301062782763, |
|
"learning_rate": 5.265846073710093e-06, |
|
"loss": 0.1032, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9681274900398407, |
|
"grad_norm": 1.7986533480735443, |
|
"learning_rate": 5.250221546127508e-06, |
|
"loss": 0.1023, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9701195219123506, |
|
"grad_norm": 1.7143716184742455, |
|
"learning_rate": 5.2345945685980404e-06, |
|
"loss": 0.1056, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9721115537848606, |
|
"grad_norm": 1.8989132351788962, |
|
"learning_rate": 5.218965294127155e-06, |
|
"loss": 0.1246, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9741035856573705, |
|
"grad_norm": 1.692065620829879, |
|
"learning_rate": 5.203333875742814e-06, |
|
"loss": 0.1002, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.9760956175298805, |
|
"grad_norm": 2.1756954006047207, |
|
"learning_rate": 5.187700466493966e-06, |
|
"loss": 0.1316, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9780876494023905, |
|
"grad_norm": 1.5992575019291566, |
|
"learning_rate": 5.1720652194490504e-06, |
|
"loss": 0.0879, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9800796812749004, |
|
"grad_norm": 1.8188244998440222, |
|
"learning_rate": 5.156428287694508e-06, |
|
"loss": 0.113, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.9820717131474104, |
|
"grad_norm": 1.8310353091664167, |
|
"learning_rate": 5.140789824333266e-06, |
|
"loss": 0.0925, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.9840637450199203, |
|
"grad_norm": 2.2645161516308674, |
|
"learning_rate": 5.125149982483255e-06, |
|
"loss": 0.1333, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.9860557768924303, |
|
"grad_norm": 2.38898722297588, |
|
"learning_rate": 5.109508915275898e-06, |
|
"loss": 0.1262, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9880478087649402, |
|
"grad_norm": 1.894617587966868, |
|
"learning_rate": 5.093866775854618e-06, |
|
"loss": 0.0988, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.9900398406374502, |
|
"grad_norm": 1.8643326954112833, |
|
"learning_rate": 5.078223717373334e-06, |
|
"loss": 0.1151, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.9920318725099602, |
|
"grad_norm": 2.381725171456074, |
|
"learning_rate": 5.062579892994966e-06, |
|
"loss": 0.1243, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.9940239043824701, |
|
"grad_norm": 1.7622591670635168, |
|
"learning_rate": 5.046935455889933e-06, |
|
"loss": 0.1052, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.9960159362549801, |
|
"grad_norm": 2.133367063969825, |
|
"learning_rate": 5.03129055923465e-06, |
|
"loss": 0.1098, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.99800796812749, |
|
"grad_norm": 1.800851217571952, |
|
"learning_rate": 5.0156453562100325e-06, |
|
"loss": 0.1187, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.515740716232249, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0727, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.00199203187251, |
|
"grad_norm": 1.454807758568271, |
|
"learning_rate": 4.984354643789968e-06, |
|
"loss": 0.0645, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.00398406374502, |
|
"grad_norm": 1.0906220710604444, |
|
"learning_rate": 4.968709440765352e-06, |
|
"loss": 0.0463, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.0059760956175299, |
|
"grad_norm": 1.5597035712618907, |
|
"learning_rate": 4.953064544110069e-06, |
|
"loss": 0.0638, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.0079681274900398, |
|
"grad_norm": 1.3271308187362474, |
|
"learning_rate": 4.9374201070050345e-06, |
|
"loss": 0.0564, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.0099601593625498, |
|
"grad_norm": 1.6371457735090003, |
|
"learning_rate": 4.9217762826266665e-06, |
|
"loss": 0.0652, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.0119521912350598, |
|
"grad_norm": 1.3010690622611256, |
|
"learning_rate": 4.906133224145384e-06, |
|
"loss": 0.0478, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.0139442231075697, |
|
"grad_norm": 1.6385417730692136, |
|
"learning_rate": 4.8904910847241025e-06, |
|
"loss": 0.0571, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.0159362549800797, |
|
"grad_norm": 1.460130563576597, |
|
"learning_rate": 4.874850017516746e-06, |
|
"loss": 0.0478, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0179282868525896, |
|
"grad_norm": 1.218172055244865, |
|
"learning_rate": 4.8592101756667345e-06, |
|
"loss": 0.0406, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.0199203187250996, |
|
"grad_norm": 1.380650876589631, |
|
"learning_rate": 4.843571712305493e-06, |
|
"loss": 0.0488, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0219123505976095, |
|
"grad_norm": 1.4404478206417628, |
|
"learning_rate": 4.82793478055095e-06, |
|
"loss": 0.0423, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0239043824701195, |
|
"grad_norm": 1.4907418600568456, |
|
"learning_rate": 4.8122995335060365e-06, |
|
"loss": 0.0524, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0258964143426295, |
|
"grad_norm": 1.3967454054494113, |
|
"learning_rate": 4.796666124257187e-06, |
|
"loss": 0.0427, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.0278884462151394, |
|
"grad_norm": 1.3774126162481908, |
|
"learning_rate": 4.781034705872846e-06, |
|
"loss": 0.0424, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.0298804780876494, |
|
"grad_norm": 1.4995564243577384, |
|
"learning_rate": 4.765405431401961e-06, |
|
"loss": 0.0429, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.0318725099601593, |
|
"grad_norm": 1.4776829368859978, |
|
"learning_rate": 4.7497784538724925e-06, |
|
"loss": 0.0439, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.0338645418326693, |
|
"grad_norm": 1.5766123608089488, |
|
"learning_rate": 4.7341539262899075e-06, |
|
"loss": 0.047, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.0358565737051793, |
|
"grad_norm": 2.010013759539753, |
|
"learning_rate": 4.7185320016356865e-06, |
|
"loss": 0.0495, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.0378486055776892, |
|
"grad_norm": 1.7137673308746173, |
|
"learning_rate": 4.7029128328658255e-06, |
|
"loss": 0.0519, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.0398406374501992, |
|
"grad_norm": 1.8846039575121651, |
|
"learning_rate": 4.687296572909336e-06, |
|
"loss": 0.0518, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.0418326693227091, |
|
"grad_norm": 1.517157653051372, |
|
"learning_rate": 4.671683374666751e-06, |
|
"loss": 0.0376, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.043824701195219, |
|
"grad_norm": 1.5477788120437967, |
|
"learning_rate": 4.656073391008622e-06, |
|
"loss": 0.0509, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.045816733067729, |
|
"grad_norm": 1.5961149461761268, |
|
"learning_rate": 4.64046677477403e-06, |
|
"loss": 0.0406, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.047808764940239, |
|
"grad_norm": 1.7872243106969083, |
|
"learning_rate": 4.624863678769086e-06, |
|
"loss": 0.0484, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.049800796812749, |
|
"grad_norm": 2.0305402714348038, |
|
"learning_rate": 4.609264255765429e-06, |
|
"loss": 0.06, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.051792828685259, |
|
"grad_norm": 1.5196304776495169, |
|
"learning_rate": 4.593668658498737e-06, |
|
"loss": 0.0379, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0537848605577689, |
|
"grad_norm": 1.8563914910463752, |
|
"learning_rate": 4.578077039667235e-06, |
|
"loss": 0.0579, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.0557768924302788, |
|
"grad_norm": 2.0399962964080927, |
|
"learning_rate": 4.562489551930187e-06, |
|
"loss": 0.0554, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.0577689243027888, |
|
"grad_norm": 1.5558523714376704, |
|
"learning_rate": 4.546906347906414e-06, |
|
"loss": 0.0487, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.0597609561752988, |
|
"grad_norm": 2.067131962038375, |
|
"learning_rate": 4.531327580172794e-06, |
|
"loss": 0.0616, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.0617529880478087, |
|
"grad_norm": 1.8515752338818168, |
|
"learning_rate": 4.515753401262767e-06, |
|
"loss": 0.0476, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.0637450199203187, |
|
"grad_norm": 1.6512814928931692, |
|
"learning_rate": 4.5001839636648456e-06, |
|
"loss": 0.0457, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.0657370517928286, |
|
"grad_norm": 1.6309985963797304, |
|
"learning_rate": 4.484619419821116e-06, |
|
"loss": 0.0479, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.0677290836653386, |
|
"grad_norm": 1.3534892174684972, |
|
"learning_rate": 4.469059922125753e-06, |
|
"loss": 0.0351, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.0697211155378485, |
|
"grad_norm": 1.5618587225235019, |
|
"learning_rate": 4.453505622923524e-06, |
|
"loss": 0.0514, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.0717131474103585, |
|
"grad_norm": 1.8799695188430705, |
|
"learning_rate": 4.437956674508295e-06, |
|
"loss": 0.0564, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.0737051792828685, |
|
"grad_norm": 1.6371009327782438, |
|
"learning_rate": 4.422413229121541e-06, |
|
"loss": 0.05, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.0756972111553784, |
|
"grad_norm": 1.7149994790253227, |
|
"learning_rate": 4.4068754389508616e-06, |
|
"loss": 0.0526, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0776892430278884, |
|
"grad_norm": 1.5057791531519986, |
|
"learning_rate": 4.391343456128479e-06, |
|
"loss": 0.0401, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.0796812749003983, |
|
"grad_norm": 1.6556428738570654, |
|
"learning_rate": 4.375817432729759e-06, |
|
"loss": 0.0501, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.0816733067729083, |
|
"grad_norm": 1.8035332314518027, |
|
"learning_rate": 4.360297520771716e-06, |
|
"loss": 0.0598, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.0836653386454183, |
|
"grad_norm": 1.9373725794619105, |
|
"learning_rate": 4.34478387221153e-06, |
|
"loss": 0.0493, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.0856573705179282, |
|
"grad_norm": 1.2034769948690562, |
|
"learning_rate": 4.329276638945051e-06, |
|
"loss": 0.0397, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.0876494023904382, |
|
"grad_norm": 1.329498987282073, |
|
"learning_rate": 4.3137759728053206e-06, |
|
"loss": 0.0489, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.0896414342629481, |
|
"grad_norm": 1.3953725126886292, |
|
"learning_rate": 4.298282025561076e-06, |
|
"loss": 0.0478, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.091633466135458, |
|
"grad_norm": 1.561219438568973, |
|
"learning_rate": 4.282794948915271e-06, |
|
"loss": 0.0476, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.093625498007968, |
|
"grad_norm": 1.7067688119812607, |
|
"learning_rate": 4.267314894503591e-06, |
|
"loss": 0.0413, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.095617529880478, |
|
"grad_norm": 1.4480366291730635, |
|
"learning_rate": 4.2518420138929645e-06, |
|
"loss": 0.0452, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.097609561752988, |
|
"grad_norm": 1.8458558711170783, |
|
"learning_rate": 4.2363764585800775e-06, |
|
"loss": 0.0598, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.099601593625498, |
|
"grad_norm": 1.60919784354473, |
|
"learning_rate": 4.220918379989898e-06, |
|
"loss": 0.0578, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.1015936254980079, |
|
"grad_norm": 1.5149797237579112, |
|
"learning_rate": 4.205467929474186e-06, |
|
"loss": 0.0414, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.1035856573705178, |
|
"grad_norm": 1.9355788387540067, |
|
"learning_rate": 4.190025258310013e-06, |
|
"loss": 0.0605, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.1055776892430278, |
|
"grad_norm": 1.2712828264468141, |
|
"learning_rate": 4.174590517698284e-06, |
|
"loss": 0.0476, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.1075697211155378, |
|
"grad_norm": 1.7052649700043325, |
|
"learning_rate": 4.159163858762255e-06, |
|
"loss": 0.0448, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.1095617529880477, |
|
"grad_norm": 1.4892279816307104, |
|
"learning_rate": 4.143745432546053e-06, |
|
"loss": 0.0454, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.1115537848605577, |
|
"grad_norm": 1.2192774756681568, |
|
"learning_rate": 4.1283353900131965e-06, |
|
"loss": 0.0389, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.1135458167330676, |
|
"grad_norm": 1.748484313536612, |
|
"learning_rate": 4.112933882045121e-06, |
|
"loss": 0.0527, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.1155378486055776, |
|
"grad_norm": 1.5189552368858088, |
|
"learning_rate": 4.097541059439698e-06, |
|
"loss": 0.0538, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1175298804780875, |
|
"grad_norm": 1.4923045394934673, |
|
"learning_rate": 4.082157072909757e-06, |
|
"loss": 0.0494, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.1195219123505975, |
|
"grad_norm": 1.7886352200727111, |
|
"learning_rate": 4.066782073081616e-06, |
|
"loss": 0.0516, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.1215139442231075, |
|
"grad_norm": 1.6403729404088143, |
|
"learning_rate": 4.0514162104936025e-06, |
|
"loss": 0.0504, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1235059760956174, |
|
"grad_norm": 1.794559862930424, |
|
"learning_rate": 4.036059635594578e-06, |
|
"loss": 0.0539, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.1254980079681274, |
|
"grad_norm": 1.27443198183184, |
|
"learning_rate": 4.020712498742469e-06, |
|
"loss": 0.033, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.1274900398406373, |
|
"grad_norm": 1.528559195651957, |
|
"learning_rate": 4.005374950202795e-06, |
|
"loss": 0.0485, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.1294820717131473, |
|
"grad_norm": 1.6682443502200057, |
|
"learning_rate": 3.990047140147192e-06, |
|
"loss": 0.0472, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.1314741035856573, |
|
"grad_norm": 1.5977930972583627, |
|
"learning_rate": 3.974729218651946e-06, |
|
"loss": 0.0435, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.1334661354581672, |
|
"grad_norm": 1.8138279047378771, |
|
"learning_rate": 3.959421335696524e-06, |
|
"loss": 0.0525, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.1354581673306772, |
|
"grad_norm": 1.861112915058331, |
|
"learning_rate": 3.944123641162106e-06, |
|
"loss": 0.0539, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.1374501992031871, |
|
"grad_norm": 1.5212525579613898, |
|
"learning_rate": 3.928836284830113e-06, |
|
"loss": 0.0493, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.139442231075697, |
|
"grad_norm": 1.3446527061200657, |
|
"learning_rate": 3.913559416380743e-06, |
|
"loss": 0.0499, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.1414342629482073, |
|
"grad_norm": 1.2939076783357535, |
|
"learning_rate": 3.898293185391509e-06, |
|
"loss": 0.0323, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.1434262948207172, |
|
"grad_norm": 1.5273046564926365, |
|
"learning_rate": 3.883037741335772e-06, |
|
"loss": 0.0437, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.1454183266932272, |
|
"grad_norm": 1.3539627956837923, |
|
"learning_rate": 3.867793233581272e-06, |
|
"loss": 0.047, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.1474103585657371, |
|
"grad_norm": 1.2884254132839787, |
|
"learning_rate": 3.852559811388676e-06, |
|
"loss": 0.0435, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.149402390438247, |
|
"grad_norm": 1.3084774623960684, |
|
"learning_rate": 3.8373376239101076e-06, |
|
"loss": 0.0483, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.151394422310757, |
|
"grad_norm": 1.242637903895707, |
|
"learning_rate": 3.822126820187691e-06, |
|
"loss": 0.0313, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.153386454183267, |
|
"grad_norm": 1.4593599661421381, |
|
"learning_rate": 3.806927549152091e-06, |
|
"loss": 0.0401, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.155378486055777, |
|
"grad_norm": 1.5535605047130172, |
|
"learning_rate": 3.791739959621054e-06, |
|
"loss": 0.0527, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.157370517928287, |
|
"grad_norm": 1.7540657914683675, |
|
"learning_rate": 3.776564200297953e-06, |
|
"loss": 0.0527, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.159362549800797, |
|
"grad_norm": 1.57101285496225, |
|
"learning_rate": 3.761400419770328e-06, |
|
"loss": 0.0395, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.1613545816733069, |
|
"grad_norm": 1.7555114294741245, |
|
"learning_rate": 3.746248766508435e-06, |
|
"loss": 0.0434, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.1633466135458168, |
|
"grad_norm": 2.1592609021335227, |
|
"learning_rate": 3.7311093888637906e-06, |
|
"loss": 0.0495, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.1653386454183268, |
|
"grad_norm": 1.2940156736695578, |
|
"learning_rate": 3.7159824350677177e-06, |
|
"loss": 0.0387, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.1673306772908367, |
|
"grad_norm": 1.5202353281570202, |
|
"learning_rate": 3.7008680532298962e-06, |
|
"loss": 0.0432, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.1693227091633467, |
|
"grad_norm": 1.4400071402140493, |
|
"learning_rate": 3.685766391336916e-06, |
|
"loss": 0.0463, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.1713147410358566, |
|
"grad_norm": 1.7100573375487627, |
|
"learning_rate": 3.670677597250819e-06, |
|
"loss": 0.0511, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.1733067729083666, |
|
"grad_norm": 1.68459514966065, |
|
"learning_rate": 3.6556018187076624e-06, |
|
"loss": 0.0478, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.1752988047808766, |
|
"grad_norm": 1.6034087639429946, |
|
"learning_rate": 3.6405392033160637e-06, |
|
"loss": 0.04, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.1772908366533865, |
|
"grad_norm": 1.7092195009028075, |
|
"learning_rate": 3.6254898985557598e-06, |
|
"loss": 0.0474, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.1792828685258965, |
|
"grad_norm": 1.5626916752981477, |
|
"learning_rate": 3.6104540517761594e-06, |
|
"loss": 0.0551, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.1812749003984064, |
|
"grad_norm": 1.3965756525646038, |
|
"learning_rate": 3.5954318101949047e-06, |
|
"loss": 0.0369, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.1832669322709164, |
|
"grad_norm": 1.8211192187451488, |
|
"learning_rate": 3.580423320896429e-06, |
|
"loss": 0.0583, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.1852589641434264, |
|
"grad_norm": 1.348709853436774, |
|
"learning_rate": 3.5654287308305137e-06, |
|
"loss": 0.0385, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.1872509960159363, |
|
"grad_norm": 1.5559379215487086, |
|
"learning_rate": 3.55044818681085e-06, |
|
"loss": 0.0456, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.1892430278884463, |
|
"grad_norm": 2.0539321664502976, |
|
"learning_rate": 3.5354818355136058e-06, |
|
"loss": 0.0677, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.1912350597609562, |
|
"grad_norm": 1.4411394411205194, |
|
"learning_rate": 3.5205298234759854e-06, |
|
"loss": 0.0451, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.1932270916334662, |
|
"grad_norm": 1.7774567393386194, |
|
"learning_rate": 3.5055922970947943e-06, |
|
"loss": 0.0492, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.1952191235059761, |
|
"grad_norm": 1.5120643301720722, |
|
"learning_rate": 3.4906694026250075e-06, |
|
"loss": 0.0457, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1952191235059761, |
|
"eval_loss": 0.11691030859947205, |
|
"eval_runtime": 3.1386, |
|
"eval_samples_per_second": 13.063, |
|
"eval_steps_per_second": 3.505, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.197211155378486, |
|
"grad_norm": 1.364354387569068, |
|
"learning_rate": 3.475761286178341e-06, |
|
"loss": 0.0498, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.199203187250996, |
|
"grad_norm": 1.3894575596183438, |
|
"learning_rate": 3.460868093721812e-06, |
|
"loss": 0.038, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.201195219123506, |
|
"grad_norm": 1.8217847559393374, |
|
"learning_rate": 3.44598997107632e-06, |
|
"loss": 0.0567, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.203187250996016, |
|
"grad_norm": 2.1685604089404857, |
|
"learning_rate": 3.431127063915213e-06, |
|
"loss": 0.071, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.205179282868526, |
|
"grad_norm": 1.7472889111167405, |
|
"learning_rate": 3.416279517762858e-06, |
|
"loss": 0.0504, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.207171314741036, |
|
"grad_norm": 1.789030781154645, |
|
"learning_rate": 3.4014474779932295e-06, |
|
"loss": 0.0535, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.2091633466135459, |
|
"grad_norm": 1.5379198221444872, |
|
"learning_rate": 3.386631089828468e-06, |
|
"loss": 0.05, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.2111553784860558, |
|
"grad_norm": 1.3874042612095352, |
|
"learning_rate": 3.371830498337475e-06, |
|
"loss": 0.0373, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.2131474103585658, |
|
"grad_norm": 1.5231435277419703, |
|
"learning_rate": 3.35704584843448e-06, |
|
"loss": 0.037, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.2151394422310757, |
|
"grad_norm": 1.6029887156514784, |
|
"learning_rate": 3.342277284877629e-06, |
|
"loss": 0.0461, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2171314741035857, |
|
"grad_norm": 1.5816632438715723, |
|
"learning_rate": 3.3275249522675656e-06, |
|
"loss": 0.046, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.2191235059760956, |
|
"grad_norm": 1.5428519312912603, |
|
"learning_rate": 3.3127889950460094e-06, |
|
"loss": 0.0472, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.2211155378486056, |
|
"grad_norm": 1.625742641074798, |
|
"learning_rate": 3.2980695574943532e-06, |
|
"loss": 0.0437, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.2231075697211156, |
|
"grad_norm": 1.3751070319547616, |
|
"learning_rate": 3.28336678373224e-06, |
|
"loss": 0.0375, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.2250996015936255, |
|
"grad_norm": 1.9737969684729024, |
|
"learning_rate": 3.268680817716158e-06, |
|
"loss": 0.0585, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.2270916334661355, |
|
"grad_norm": 1.9490150643596276, |
|
"learning_rate": 3.254011803238026e-06, |
|
"loss": 0.0504, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.2290836653386454, |
|
"grad_norm": 1.332216726824679, |
|
"learning_rate": 3.2393598839237903e-06, |
|
"loss": 0.0342, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.2310756972111554, |
|
"grad_norm": 2.1004912830561744, |
|
"learning_rate": 3.22472520323202e-06, |
|
"loss": 0.0488, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.2330677290836654, |
|
"grad_norm": 2.068005000040964, |
|
"learning_rate": 3.2101079044524895e-06, |
|
"loss": 0.065, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.2350597609561753, |
|
"grad_norm": 1.2584068242022144, |
|
"learning_rate": 3.195508130704795e-06, |
|
"loss": 0.0378, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2370517928286853, |
|
"grad_norm": 1.3495383443316602, |
|
"learning_rate": 3.1809260249369373e-06, |
|
"loss": 0.0377, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.2390438247011952, |
|
"grad_norm": 1.74209098701691, |
|
"learning_rate": 3.1663617299239303e-06, |
|
"loss": 0.0582, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.2410358565737052, |
|
"grad_norm": 2.3541586375494767, |
|
"learning_rate": 3.1518153882663994e-06, |
|
"loss": 0.0658, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.2430278884462151, |
|
"grad_norm": 1.5107261868901234, |
|
"learning_rate": 3.1372871423891894e-06, |
|
"loss": 0.0413, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.245019920318725, |
|
"grad_norm": 2.007995750567458, |
|
"learning_rate": 3.1227771345399647e-06, |
|
"loss": 0.0477, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.247011952191235, |
|
"grad_norm": 1.3321234729078801, |
|
"learning_rate": 3.1082855067878182e-06, |
|
"loss": 0.0353, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.249003984063745, |
|
"grad_norm": 1.5773251846473662, |
|
"learning_rate": 3.093812401021885e-06, |
|
"loss": 0.0446, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.250996015936255, |
|
"grad_norm": 1.7010297581371503, |
|
"learning_rate": 3.079357958949946e-06, |
|
"loss": 0.0489, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.252988047808765, |
|
"grad_norm": 1.4948785976133703, |
|
"learning_rate": 3.0649223220970458e-06, |
|
"loss": 0.0346, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.254980079681275, |
|
"grad_norm": 1.580233346726061, |
|
"learning_rate": 3.050505631804105e-06, |
|
"loss": 0.0397, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.2569721115537849, |
|
"grad_norm": 1.4427856316940244, |
|
"learning_rate": 3.0361080292265354e-06, |
|
"loss": 0.0462, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.2589641434262948, |
|
"grad_norm": 1.5405358822480004, |
|
"learning_rate": 3.021729655332858e-06, |
|
"loss": 0.0446, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.2609561752988048, |
|
"grad_norm": 1.4069898266648972, |
|
"learning_rate": 3.0073706509033257e-06, |
|
"loss": 0.0481, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.2629482071713147, |
|
"grad_norm": 1.46434016713666, |
|
"learning_rate": 2.993031156528542e-06, |
|
"loss": 0.0405, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.2649402390438247, |
|
"grad_norm": 1.535938715676213, |
|
"learning_rate": 2.978711312608084e-06, |
|
"loss": 0.0404, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.2669322709163346, |
|
"grad_norm": 1.6527329484718531, |
|
"learning_rate": 2.9644112593491315e-06, |
|
"loss": 0.0378, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.2689243027888446, |
|
"grad_norm": 1.3342944497270683, |
|
"learning_rate": 2.9501311367650908e-06, |
|
"loss": 0.0458, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.2709163346613546, |
|
"grad_norm": 2.0538614960525874, |
|
"learning_rate": 2.9358710846742237e-06, |
|
"loss": 0.0536, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.2729083665338645, |
|
"grad_norm": 1.905491659479835, |
|
"learning_rate": 2.92163124269828e-06, |
|
"loss": 0.0531, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.2749003984063745, |
|
"grad_norm": 1.374868869463802, |
|
"learning_rate": 2.90741175026113e-06, |
|
"loss": 0.0371, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.2768924302788844, |
|
"grad_norm": 1.4720536952560632, |
|
"learning_rate": 2.8932127465874004e-06, |
|
"loss": 0.0444, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.2788844621513944, |
|
"grad_norm": 1.756270327258482, |
|
"learning_rate": 2.8790343707011114e-06, |
|
"loss": 0.0508, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.2808764940239044, |
|
"grad_norm": 1.1919142011447958, |
|
"learning_rate": 2.864876761424309e-06, |
|
"loss": 0.0361, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.2828685258964143, |
|
"grad_norm": 1.392440748957012, |
|
"learning_rate": 2.850740057375716e-06, |
|
"loss": 0.0379, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.2848605577689243, |
|
"grad_norm": 1.3950602927829037, |
|
"learning_rate": 2.8366243969693674e-06, |
|
"loss": 0.0338, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.2868525896414342, |
|
"grad_norm": 1.5201761557299762, |
|
"learning_rate": 2.822529918413259e-06, |
|
"loss": 0.0411, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.2888446215139442, |
|
"grad_norm": 1.3120599984550645, |
|
"learning_rate": 2.8084567597079915e-06, |
|
"loss": 0.0402, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.2908366533864541, |
|
"grad_norm": 1.4249084044129727, |
|
"learning_rate": 2.7944050586454215e-06, |
|
"loss": 0.0416, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.292828685258964, |
|
"grad_norm": 1.7782238234601455, |
|
"learning_rate": 2.7803749528073108e-06, |
|
"loss": 0.0488, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.294820717131474, |
|
"grad_norm": 1.6503484254463683, |
|
"learning_rate": 2.7663665795639815e-06, |
|
"loss": 0.0495, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.296812749003984, |
|
"grad_norm": 1.5251619493297126, |
|
"learning_rate": 2.752380076072967e-06, |
|
"loss": 0.0503, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.298804780876494, |
|
"grad_norm": 1.3125100617788958, |
|
"learning_rate": 2.7384155792776724e-06, |
|
"loss": 0.0356, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.300796812749004, |
|
"grad_norm": 1.5295843374836597, |
|
"learning_rate": 2.7244732259060335e-06, |
|
"loss": 0.044, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.302788844621514, |
|
"grad_norm": 1.5090295425620295, |
|
"learning_rate": 2.710553152469178e-06, |
|
"loss": 0.0426, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.3047808764940239, |
|
"grad_norm": 1.2597916390063753, |
|
"learning_rate": 2.6966554952600886e-06, |
|
"loss": 0.0359, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.3067729083665338, |
|
"grad_norm": 1.8988044363314096, |
|
"learning_rate": 2.682780390352262e-06, |
|
"loss": 0.0575, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.3087649402390438, |
|
"grad_norm": 1.6686947788798556, |
|
"learning_rate": 2.668927973598392e-06, |
|
"loss": 0.0424, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.3107569721115537, |
|
"grad_norm": 1.681750415123868, |
|
"learning_rate": 2.655098380629024e-06, |
|
"loss": 0.047, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.3127490039840637, |
|
"grad_norm": 1.5736992626946802, |
|
"learning_rate": 2.6412917468512354e-06, |
|
"loss": 0.034, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.3147410358565736, |
|
"grad_norm": 1.2994141414494456, |
|
"learning_rate": 2.627508207447308e-06, |
|
"loss": 0.0323, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.3167330677290836, |
|
"grad_norm": 1.5957841782185134, |
|
"learning_rate": 2.613747897373403e-06, |
|
"loss": 0.0537, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.3187250996015936, |
|
"grad_norm": 1.62545382658515, |
|
"learning_rate": 2.6000109513582417e-06, |
|
"loss": 0.0523, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.3207171314741035, |
|
"grad_norm": 2.230115579239022, |
|
"learning_rate": 2.5862975039017835e-06, |
|
"loss": 0.0607, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.3227091633466135, |
|
"grad_norm": 1.405368943781722, |
|
"learning_rate": 2.5726076892739127e-06, |
|
"loss": 0.0448, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.3247011952191234, |
|
"grad_norm": 1.7969792652573766, |
|
"learning_rate": 2.5589416415131215e-06, |
|
"loss": 0.0517, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.3266932270916334, |
|
"grad_norm": 1.530151249792159, |
|
"learning_rate": 2.5452994944251962e-06, |
|
"loss": 0.0331, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.3286852589641434, |
|
"grad_norm": 1.4807902414940988, |
|
"learning_rate": 2.531681381581913e-06, |
|
"loss": 0.0378, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.3306772908366533, |
|
"grad_norm": 1.7906460874526582, |
|
"learning_rate": 2.5180874363197217e-06, |
|
"loss": 0.0409, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.3326693227091633, |
|
"grad_norm": 1.6166848567176595, |
|
"learning_rate": 2.504517791738449e-06, |
|
"loss": 0.0464, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.3346613545816732, |
|
"grad_norm": 1.6676564078427882, |
|
"learning_rate": 2.4909725806999847e-06, |
|
"loss": 0.0466, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.3366533864541832, |
|
"grad_norm": 1.8873962166889124, |
|
"learning_rate": 2.4774519358269932e-06, |
|
"loss": 0.0472, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.3386454183266931, |
|
"grad_norm": 1.498576664515062, |
|
"learning_rate": 2.463955989501607e-06, |
|
"loss": 0.0464, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.340637450199203, |
|
"grad_norm": 1.4936995031717264, |
|
"learning_rate": 2.4504848738641313e-06, |
|
"loss": 0.0319, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.342629482071713, |
|
"grad_norm": 1.7166013235605342, |
|
"learning_rate": 2.437038720811752e-06, |
|
"loss": 0.0458, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.3446215139442232, |
|
"grad_norm": 1.9084481577153407, |
|
"learning_rate": 2.4236176619972436e-06, |
|
"loss": 0.0479, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3466135458167332, |
|
"grad_norm": 1.4070919253542853, |
|
"learning_rate": 2.41022182882768e-06, |
|
"loss": 0.0322, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.3486055776892432, |
|
"grad_norm": 2.0522106017301285, |
|
"learning_rate": 2.3968513524631483e-06, |
|
"loss": 0.0533, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3505976095617531, |
|
"grad_norm": 1.4028748923424417, |
|
"learning_rate": 2.3835063638154636e-06, |
|
"loss": 0.0371, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.352589641434263, |
|
"grad_norm": 1.7132235697547435, |
|
"learning_rate": 2.3701869935468893e-06, |
|
"loss": 0.045, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.354581673306773, |
|
"grad_norm": 1.3468986017248528, |
|
"learning_rate": 2.356893372068855e-06, |
|
"loss": 0.0374, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.356573705179283, |
|
"grad_norm": 1.7023203932118756, |
|
"learning_rate": 2.343625629540681e-06, |
|
"loss": 0.0431, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.358565737051793, |
|
"grad_norm": 1.7614144791766995, |
|
"learning_rate": 2.3303838958683077e-06, |
|
"loss": 0.0469, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.360557768924303, |
|
"grad_norm": 1.713466874008201, |
|
"learning_rate": 2.3171683007030117e-06, |
|
"loss": 0.0403, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.3625498007968129, |
|
"grad_norm": 1.8072313944179408, |
|
"learning_rate": 2.3039789734401524e-06, |
|
"loss": 0.0514, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.3645418326693228, |
|
"grad_norm": 1.8285876817770546, |
|
"learning_rate": 2.2908160432178937e-06, |
|
"loss": 0.047, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.3665338645418328, |
|
"grad_norm": 1.835704830271512, |
|
"learning_rate": 2.277679638915945e-06, |
|
"loss": 0.0491, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.3685258964143427, |
|
"grad_norm": 1.809690467670344, |
|
"learning_rate": 2.264569889154295e-06, |
|
"loss": 0.0527, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.3705179282868527, |
|
"grad_norm": 1.3476668235326381, |
|
"learning_rate": 2.251486922291957e-06, |
|
"loss": 0.0385, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.3725099601593627, |
|
"grad_norm": 2.0206149912840194, |
|
"learning_rate": 2.23843086642571e-06, |
|
"loss": 0.0537, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.3745019920318726, |
|
"grad_norm": 1.5720430933164902, |
|
"learning_rate": 2.225401849388842e-06, |
|
"loss": 0.048, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.3764940239043826, |
|
"grad_norm": 1.499247928447303, |
|
"learning_rate": 2.2123999987499015e-06, |
|
"loss": 0.0422, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.3784860557768925, |
|
"grad_norm": 1.276340108431395, |
|
"learning_rate": 2.1994254418114524e-06, |
|
"loss": 0.0423, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.3804780876494025, |
|
"grad_norm": 1.5216579445173712, |
|
"learning_rate": 2.186478305608819e-06, |
|
"loss": 0.0412, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.3824701195219125, |
|
"grad_norm": 1.9598772663646258, |
|
"learning_rate": 2.1735587169088435e-06, |
|
"loss": 0.0477, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.3844621513944224, |
|
"grad_norm": 1.6029428282921836, |
|
"learning_rate": 2.1606668022086517e-06, |
|
"loss": 0.0481, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.3864541832669324, |
|
"grad_norm": 1.0893162038986566, |
|
"learning_rate": 2.147802687734409e-06, |
|
"loss": 0.031, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.3884462151394423, |
|
"grad_norm": 1.48589149356358, |
|
"learning_rate": 2.1349664994400853e-06, |
|
"loss": 0.0382, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.3904382470119523, |
|
"grad_norm": 1.8673421468251405, |
|
"learning_rate": 2.122158363006223e-06, |
|
"loss": 0.0605, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.3924302788844622, |
|
"grad_norm": 1.4982653691530576, |
|
"learning_rate": 2.109378403838705e-06, |
|
"loss": 0.0441, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.3944223107569722, |
|
"grad_norm": 1.6272971890188588, |
|
"learning_rate": 2.0966267470675273e-06, |
|
"loss": 0.0387, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3964143426294822, |
|
"grad_norm": 1.3009436508973116, |
|
"learning_rate": 2.0839035175455748e-06, |
|
"loss": 0.0364, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.3984063745019921, |
|
"grad_norm": 1.5070361743640346, |
|
"learning_rate": 2.071208839847397e-06, |
|
"loss": 0.0384, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.400398406374502, |
|
"grad_norm": 1.8403315806429654, |
|
"learning_rate": 2.0585428382679894e-06, |
|
"loss": 0.0566, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.402390438247012, |
|
"grad_norm": 1.9768359787018837, |
|
"learning_rate": 2.0459056368215786e-06, |
|
"loss": 0.049, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.404382470119522, |
|
"grad_norm": 1.1994848396934092, |
|
"learning_rate": 2.0332973592404027e-06, |
|
"loss": 0.0357, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.406374501992032, |
|
"grad_norm": 1.4693306467977794, |
|
"learning_rate": 2.0207181289735073e-06, |
|
"loss": 0.0359, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.408366533864542, |
|
"grad_norm": 1.5649069322977223, |
|
"learning_rate": 2.008168069185525e-06, |
|
"loss": 0.042, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.4103585657370519, |
|
"grad_norm": 1.3393789831970144, |
|
"learning_rate": 1.9956473027554846e-06, |
|
"loss": 0.0383, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.4123505976095618, |
|
"grad_norm": 1.7830258693931829, |
|
"learning_rate": 1.9831559522755976e-06, |
|
"loss": 0.0469, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.4143426294820718, |
|
"grad_norm": 1.6147368592510511, |
|
"learning_rate": 1.97069414005006e-06, |
|
"loss": 0.0439, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.4163346613545817, |
|
"grad_norm": 1.3999929614334758, |
|
"learning_rate": 1.9582619880938565e-06, |
|
"loss": 0.0463, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.4183266932270917, |
|
"grad_norm": 1.836089297310526, |
|
"learning_rate": 1.9458596181315643e-06, |
|
"loss": 0.0573, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.4203187250996017, |
|
"grad_norm": 1.5850159120530725, |
|
"learning_rate": 1.9334871515961616e-06, |
|
"loss": 0.0378, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.4223107569721116, |
|
"grad_norm": 1.3946476121954683, |
|
"learning_rate": 1.9211447096278403e-06, |
|
"loss": 0.0405, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.4243027888446216, |
|
"grad_norm": 1.3008967894406476, |
|
"learning_rate": 1.9088324130728164e-06, |
|
"loss": 0.0326, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.4262948207171315, |
|
"grad_norm": 1.3349769350489, |
|
"learning_rate": 1.8965503824821496e-06, |
|
"loss": 0.0429, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.4282868525896415, |
|
"grad_norm": 1.6914951673768632, |
|
"learning_rate": 1.8842987381105626e-06, |
|
"loss": 0.0367, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.4302788844621515, |
|
"grad_norm": 1.3515680548078475, |
|
"learning_rate": 1.872077599915263e-06, |
|
"loss": 0.0379, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.4322709163346614, |
|
"grad_norm": 1.3377996405235246, |
|
"learning_rate": 1.8598870875547691e-06, |
|
"loss": 0.0402, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.4342629482071714, |
|
"grad_norm": 1.264515354052566, |
|
"learning_rate": 1.84772732038774e-06, |
|
"loss": 0.0323, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.4362549800796813, |
|
"grad_norm": 1.555389355360336, |
|
"learning_rate": 1.8355984174717994e-06, |
|
"loss": 0.0382, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.4382470119521913, |
|
"grad_norm": 1.3182929471725358, |
|
"learning_rate": 1.8235004975623816e-06, |
|
"loss": 0.0307, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.4402390438247012, |
|
"grad_norm": 1.4783822554476882, |
|
"learning_rate": 1.811433679111561e-06, |
|
"loss": 0.0391, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.4422310756972112, |
|
"grad_norm": 1.7708760927848832, |
|
"learning_rate": 1.7993980802668947e-06, |
|
"loss": 0.0391, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.4442231075697212, |
|
"grad_norm": 1.6784698019995254, |
|
"learning_rate": 1.787393818870264e-06, |
|
"loss": 0.0487, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.4462151394422311, |
|
"grad_norm": 1.2911428050631453, |
|
"learning_rate": 1.7754210124567216e-06, |
|
"loss": 0.0325, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.448207171314741, |
|
"grad_norm": 1.6758556031327247, |
|
"learning_rate": 1.7634797782533436e-06, |
|
"loss": 0.0378, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.450199203187251, |
|
"grad_norm": 1.319567821261113, |
|
"learning_rate": 1.7515702331780753e-06, |
|
"loss": 0.0368, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.452191235059761, |
|
"grad_norm": 1.2921652740567358, |
|
"learning_rate": 1.7396924938385933e-06, |
|
"loss": 0.0331, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.454183266932271, |
|
"grad_norm": 1.1429340732722788, |
|
"learning_rate": 1.7278466765311597e-06, |
|
"loss": 0.0336, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.456175298804781, |
|
"grad_norm": 1.8196882924447233, |
|
"learning_rate": 1.7160328972394835e-06, |
|
"loss": 0.0418, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.4581673306772909, |
|
"grad_norm": 1.6752103021159248, |
|
"learning_rate": 1.7042512716335873e-06, |
|
"loss": 0.0397, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.4601593625498008, |
|
"grad_norm": 1.797041780271012, |
|
"learning_rate": 1.6925019150686744e-06, |
|
"loss": 0.055, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.4621513944223108, |
|
"grad_norm": 1.4651037298048166, |
|
"learning_rate": 1.6807849425839933e-06, |
|
"loss": 0.0361, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.4641434262948207, |
|
"grad_norm": 1.1462478664298237, |
|
"learning_rate": 1.669100468901722e-06, |
|
"loss": 0.0308, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.4661354581673307, |
|
"grad_norm": 1.7311028494874292, |
|
"learning_rate": 1.6574486084258369e-06, |
|
"loss": 0.0447, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.4681274900398407, |
|
"grad_norm": 1.7747156927182413, |
|
"learning_rate": 1.6458294752409943e-06, |
|
"loss": 0.0423, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.4701195219123506, |
|
"grad_norm": 1.765615529033668, |
|
"learning_rate": 1.6342431831114153e-06, |
|
"loss": 0.0419, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.4721115537848606, |
|
"grad_norm": 1.588016885038511, |
|
"learning_rate": 1.6226898454797697e-06, |
|
"loss": 0.0437, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.4741035856573705, |
|
"grad_norm": 2.3682981845889954, |
|
"learning_rate": 1.6111695754660667e-06, |
|
"loss": 0.0548, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.4760956175298805, |
|
"grad_norm": 2.196815477790404, |
|
"learning_rate": 1.599682485866546e-06, |
|
"loss": 0.0471, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.4780876494023905, |
|
"grad_norm": 1.3771418350242575, |
|
"learning_rate": 1.5882286891525755e-06, |
|
"loss": 0.0428, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.4800796812749004, |
|
"grad_norm": 1.4903254185233181, |
|
"learning_rate": 1.5768082974695476e-06, |
|
"loss": 0.0404, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.4820717131474104, |
|
"grad_norm": 1.4985158317383274, |
|
"learning_rate": 1.5654214226357822e-06, |
|
"loss": 0.0329, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.4840637450199203, |
|
"grad_norm": 1.7223869396300333, |
|
"learning_rate": 1.5540681761414327e-06, |
|
"loss": 0.047, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.4860557768924303, |
|
"grad_norm": 1.570813419108133, |
|
"learning_rate": 1.5427486691473942e-06, |
|
"loss": 0.0383, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.4880478087649402, |
|
"grad_norm": 1.69930983396748, |
|
"learning_rate": 1.5314630124842144e-06, |
|
"loss": 0.0399, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.4900398406374502, |
|
"grad_norm": 1.7421371319863044, |
|
"learning_rate": 1.5202113166510058e-06, |
|
"loss": 0.0481, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.4920318725099602, |
|
"grad_norm": 1.5181018716448114, |
|
"learning_rate": 1.5089936918143705e-06, |
|
"loss": 0.0349, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.4940239043824701, |
|
"grad_norm": 1.5458296783975771, |
|
"learning_rate": 1.4978102478073165e-06, |
|
"loss": 0.0469, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.49601593625498, |
|
"grad_norm": 1.5770259773948998, |
|
"learning_rate": 1.4866610941281823e-06, |
|
"loss": 0.0325, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.49800796812749, |
|
"grad_norm": 1.5341462678859672, |
|
"learning_rate": 1.475546339939568e-06, |
|
"loss": 0.0397, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.2027634510806997, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.028, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.50199203187251, |
|
"grad_norm": 1.5111370601251886, |
|
"learning_rate": 1.4534204649991817e-06, |
|
"loss": 0.0386, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.50398406374502, |
|
"grad_norm": 1.482657437759512, |
|
"learning_rate": 1.4424095608843036e-06, |
|
"loss": 0.036, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.5059760956175299, |
|
"grad_norm": 1.077070456287382, |
|
"learning_rate": 1.4314334895316095e-06, |
|
"loss": 0.0334, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.5079681274900398, |
|
"grad_norm": 1.4629517388254307, |
|
"learning_rate": 1.4204923584090314e-06, |
|
"loss": 0.036, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.5099601593625498, |
|
"grad_norm": 1.3805538263025956, |
|
"learning_rate": 1.4095862746423961e-06, |
|
"loss": 0.0356, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.5119521912350598, |
|
"grad_norm": 1.8358312305710611, |
|
"learning_rate": 1.3987153450143775e-06, |
|
"loss": 0.0465, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.5139442231075697, |
|
"grad_norm": 1.1684136646906567, |
|
"learning_rate": 1.3878796759634544e-06, |
|
"loss": 0.0332, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.5159362549800797, |
|
"grad_norm": 1.2688724525634016, |
|
"learning_rate": 1.3770793735828603e-06, |
|
"loss": 0.0346, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.5179282868525896, |
|
"grad_norm": 1.8410552958221407, |
|
"learning_rate": 1.366314543619553e-06, |
|
"loss": 0.0464, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.5199203187250996, |
|
"grad_norm": 1.6641830537225055, |
|
"learning_rate": 1.355585291473176e-06, |
|
"loss": 0.0402, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.5219123505976095, |
|
"grad_norm": 1.4605320398418256, |
|
"learning_rate": 1.3448917221950264e-06, |
|
"loss": 0.0386, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.5239043824701195, |
|
"grad_norm": 1.628381572331683, |
|
"learning_rate": 1.3342339404870253e-06, |
|
"loss": 0.0401, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.5258964143426295, |
|
"grad_norm": 2.3659907711059467, |
|
"learning_rate": 1.3236120507006945e-06, |
|
"loss": 0.0553, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.5278884462151394, |
|
"grad_norm": 2.019268389843473, |
|
"learning_rate": 1.3130261568361335e-06, |
|
"loss": 0.0456, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.5298804780876494, |
|
"grad_norm": 1.903479551165333, |
|
"learning_rate": 1.3024763625410025e-06, |
|
"loss": 0.0452, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.5318725099601593, |
|
"grad_norm": 1.6250609527356852, |
|
"learning_rate": 1.2919627711095068e-06, |
|
"loss": 0.0362, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.5338645418326693, |
|
"grad_norm": 1.2317333249828415, |
|
"learning_rate": 1.281485485481384e-06, |
|
"loss": 0.0304, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.5358565737051793, |
|
"grad_norm": 1.8649886760609837, |
|
"learning_rate": 1.2710446082408996e-06, |
|
"loss": 0.0464, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.5378486055776892, |
|
"grad_norm": 1.8171004964117743, |
|
"learning_rate": 1.2606402416158391e-06, |
|
"loss": 0.0465, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.5398406374501992, |
|
"grad_norm": 1.862322448579373, |
|
"learning_rate": 1.2502724874765087e-06, |
|
"loss": 0.0479, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.5418326693227091, |
|
"grad_norm": 1.8583353335497257, |
|
"learning_rate": 1.2399414473347405e-06, |
|
"loss": 0.0424, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.543824701195219, |
|
"grad_norm": 1.4852985609369138, |
|
"learning_rate": 1.229647222342889e-06, |
|
"loss": 0.037, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.545816733067729, |
|
"grad_norm": 1.701665909805954, |
|
"learning_rate": 1.2193899132928539e-06, |
|
"loss": 0.0435, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.547808764940239, |
|
"grad_norm": 1.5368339751180022, |
|
"learning_rate": 1.2091696206150843e-06, |
|
"loss": 0.0405, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.549800796812749, |
|
"grad_norm": 1.4211500123116818, |
|
"learning_rate": 1.1989864443775984e-06, |
|
"loss": 0.0355, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.551792828685259, |
|
"grad_norm": 2.0587268481909646, |
|
"learning_rate": 1.1888404842850031e-06, |
|
"loss": 0.0555, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.5537848605577689, |
|
"grad_norm": 1.2088357311119127, |
|
"learning_rate": 1.1787318396775188e-06, |
|
"loss": 0.0348, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.5557768924302788, |
|
"grad_norm": 1.4388643186761485, |
|
"learning_rate": 1.1686606095300034e-06, |
|
"loss": 0.0344, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.5577689243027888, |
|
"grad_norm": 1.2409791290088654, |
|
"learning_rate": 1.158626892450988e-06, |
|
"loss": 0.0344, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.5597609561752988, |
|
"grad_norm": 1.6545764811493544, |
|
"learning_rate": 1.1486307866817082e-06, |
|
"loss": 0.0455, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.5617529880478087, |
|
"grad_norm": 1.8070275154016138, |
|
"learning_rate": 1.138672390095143e-06, |
|
"loss": 0.033, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.5637450199203187, |
|
"grad_norm": 1.6600852203195762, |
|
"learning_rate": 1.128751800195057e-06, |
|
"loss": 0.0467, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.5657370517928286, |
|
"grad_norm": 1.4417059533847947, |
|
"learning_rate": 1.1188691141150455e-06, |
|
"loss": 0.0383, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.5677290836653386, |
|
"grad_norm": 1.2631911364466095, |
|
"learning_rate": 1.1090244286175834e-06, |
|
"loss": 0.0262, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.5697211155378485, |
|
"grad_norm": 1.7996066106500415, |
|
"learning_rate": 1.0992178400930753e-06, |
|
"loss": 0.0428, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.5717131474103585, |
|
"grad_norm": 1.661450747231149, |
|
"learning_rate": 1.0894494445589171e-06, |
|
"loss": 0.0475, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.5737051792828685, |
|
"grad_norm": 1.9431205504964344, |
|
"learning_rate": 1.0797193376585518e-06, |
|
"loss": 0.0451, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.5756972111553784, |
|
"grad_norm": 1.3821996487700146, |
|
"learning_rate": 1.0700276146605349e-06, |
|
"loss": 0.0385, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.5776892430278884, |
|
"grad_norm": 1.4713583961609482, |
|
"learning_rate": 1.0603743704575992e-06, |
|
"loss": 0.0402, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.5796812749003983, |
|
"grad_norm": 1.396562745448481, |
|
"learning_rate": 1.0507596995657288e-06, |
|
"loss": 0.0407, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.5816733067729083, |
|
"grad_norm": 1.584255914198357, |
|
"learning_rate": 1.0411836961232312e-06, |
|
"loss": 0.0362, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.5836653386454183, |
|
"grad_norm": 1.2525048404082542, |
|
"learning_rate": 1.031646453889818e-06, |
|
"loss": 0.0375, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.5856573705179282, |
|
"grad_norm": 1.692525265149603, |
|
"learning_rate": 1.0221480662456845e-06, |
|
"loss": 0.0451, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.5876494023904382, |
|
"grad_norm": 1.5918519161268234, |
|
"learning_rate": 1.012688626190596e-06, |
|
"loss": 0.0479, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.5896414342629481, |
|
"grad_norm": 1.4243746693979769, |
|
"learning_rate": 1.0032682263429788e-06, |
|
"loss": 0.0331, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.591633466135458, |
|
"grad_norm": 1.926086405446464, |
|
"learning_rate": 9.93886958939011e-07, |
|
"loss": 0.0427, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"grad_norm": 1.4477256907812834, |
|
"learning_rate": 9.845449158317216e-07, |
|
"loss": 0.0359, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"eval_loss": 0.10596468299627304, |
|
"eval_runtime": 3.1343, |
|
"eval_samples_per_second": 13.081, |
|
"eval_steps_per_second": 3.51, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.595617529880478, |
|
"grad_norm": 1.421779703061797, |
|
"learning_rate": 9.752421884900915e-07, |
|
"loss": 0.0305, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.597609561752988, |
|
"grad_norm": 1.7789603558111702, |
|
"learning_rate": 9.65978867998152e-07, |
|
"loss": 0.0345, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.599601593625498, |
|
"grad_norm": 1.9525946533919227, |
|
"learning_rate": 9.567550450541012e-07, |
|
"loss": 0.0536, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.6015936254980079, |
|
"grad_norm": 1.4805860459441311, |
|
"learning_rate": 9.475708099694125e-07, |
|
"loss": 0.0425, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.6035856573705178, |
|
"grad_norm": 1.6320592012302073, |
|
"learning_rate": 9.384262526679488e-07, |
|
"loss": 0.0385, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.6055776892430278, |
|
"grad_norm": 1.407033676035743, |
|
"learning_rate": 9.293214626850838e-07, |
|
"loss": 0.0398, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.6075697211155378, |
|
"grad_norm": 1.1535555756252225, |
|
"learning_rate": 9.202565291668253e-07, |
|
"loss": 0.0314, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.6095617529880477, |
|
"grad_norm": 1.4977437784858982, |
|
"learning_rate": 9.112315408689415e-07, |
|
"loss": 0.0374, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.6115537848605577, |
|
"grad_norm": 1.732572156525876, |
|
"learning_rate": 9.022465861560931e-07, |
|
"loss": 0.0446, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.6135458167330676, |
|
"grad_norm": 1.6891276330650864, |
|
"learning_rate": 8.933017530009669e-07, |
|
"loss": 0.0383, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.6155378486055776, |
|
"grad_norm": 1.897852297890652, |
|
"learning_rate": 8.843971289834157e-07, |
|
"loss": 0.0441, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.6175298804780875, |
|
"grad_norm": 1.5594431631506627, |
|
"learning_rate": 8.755328012896002e-07, |
|
"loss": 0.0405, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.6195219123505975, |
|
"grad_norm": 1.436757522414069, |
|
"learning_rate": 8.667088567111348e-07, |
|
"loss": 0.0396, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.6215139442231075, |
|
"grad_norm": 1.9484255207208512, |
|
"learning_rate": 8.579253816442401e-07, |
|
"loss": 0.0516, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.6235059760956174, |
|
"grad_norm": 1.380709884926527, |
|
"learning_rate": 8.491824620888906e-07, |
|
"loss": 0.0356, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.6254980079681274, |
|
"grad_norm": 1.7133470278748053, |
|
"learning_rate": 8.404801836479809e-07, |
|
"loss": 0.0434, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.6274900398406373, |
|
"grad_norm": 1.583809883914204, |
|
"learning_rate": 8.318186315264859e-07, |
|
"loss": 0.0394, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.6294820717131473, |
|
"grad_norm": 1.468174216651124, |
|
"learning_rate": 8.231978905306204e-07, |
|
"loss": 0.0268, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.6314741035856573, |
|
"grad_norm": 1.4408279947153464, |
|
"learning_rate": 8.146180450670155e-07, |
|
"loss": 0.0313, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6334661354581672, |
|
"grad_norm": 1.5748697433294543, |
|
"learning_rate": 8.060791791418887e-07, |
|
"loss": 0.0363, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.6354581673306772, |
|
"grad_norm": 1.6393304512543407, |
|
"learning_rate": 7.975813763602219e-07, |
|
"loss": 0.0325, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.6374501992031871, |
|
"grad_norm": 2.1412321953511935, |
|
"learning_rate": 7.891247199249441e-07, |
|
"loss": 0.0532, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.639442231075697, |
|
"grad_norm": 1.3138050863993294, |
|
"learning_rate": 7.807092926361154e-07, |
|
"loss": 0.0331, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.641434262948207, |
|
"grad_norm": 1.5541240912252534, |
|
"learning_rate": 7.723351768901172e-07, |
|
"loss": 0.0426, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.643426294820717, |
|
"grad_norm": 1.406024578499524, |
|
"learning_rate": 7.640024546788449e-07, |
|
"loss": 0.0342, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.645418326693227, |
|
"grad_norm": 1.2539556880610443, |
|
"learning_rate": 7.557112075889034e-07, |
|
"loss": 0.026, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.647410358565737, |
|
"grad_norm": 1.394792639284168, |
|
"learning_rate": 7.474615168008126e-07, |
|
"loss": 0.0437, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.6494023904382469, |
|
"grad_norm": 1.2380166612525048, |
|
"learning_rate": 7.392534630882092e-07, |
|
"loss": 0.0308, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.6513944223107568, |
|
"grad_norm": 1.6208908227480252, |
|
"learning_rate": 7.310871268170566e-07, |
|
"loss": 0.0391, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.6533864541832668, |
|
"grad_norm": 1.3790210574012849, |
|
"learning_rate": 7.229625879448577e-07, |
|
"loss": 0.0341, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.6553784860557768, |
|
"grad_norm": 1.3562415289883447, |
|
"learning_rate": 7.148799260198736e-07, |
|
"loss": 0.0323, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.6573705179282867, |
|
"grad_norm": 1.4745348978618424, |
|
"learning_rate": 7.06839220180342e-07, |
|
"loss": 0.0394, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.6593625498007967, |
|
"grad_norm": 1.8763390835724343, |
|
"learning_rate": 6.988405491537054e-07, |
|
"loss": 0.0403, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.6613545816733066, |
|
"grad_norm": 1.5395306915776477, |
|
"learning_rate": 6.908839912558374e-07, |
|
"loss": 0.0375, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.6633466135458166, |
|
"grad_norm": 1.6645400590228017, |
|
"learning_rate": 6.829696243902784e-07, |
|
"loss": 0.0448, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.6653386454183265, |
|
"grad_norm": 1.8788295421219408, |
|
"learning_rate": 6.750975260474718e-07, |
|
"loss": 0.0425, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.6673306772908365, |
|
"grad_norm": 1.9902864466850982, |
|
"learning_rate": 6.67267773304004e-07, |
|
"loss": 0.048, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.6693227091633465, |
|
"grad_norm": 1.0800153432067903, |
|
"learning_rate": 6.594804428218527e-07, |
|
"loss": 0.0327, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.6713147410358564, |
|
"grad_norm": 1.500062093936282, |
|
"learning_rate": 6.517356108476314e-07, |
|
"loss": 0.0366, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.6733067729083664, |
|
"grad_norm": 1.9099325765908919, |
|
"learning_rate": 6.440333532118503e-07, |
|
"loss": 0.0432, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.6752988047808763, |
|
"grad_norm": 1.16559644327572, |
|
"learning_rate": 6.36373745328166e-07, |
|
"loss": 0.0258, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.6772908366533863, |
|
"grad_norm": 1.3024121385631138, |
|
"learning_rate": 6.287568621926482e-07, |
|
"loss": 0.0329, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.6792828685258963, |
|
"grad_norm": 1.7905458621338421, |
|
"learning_rate": 6.211827783830443e-07, |
|
"loss": 0.0388, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.6812749003984062, |
|
"grad_norm": 1.507626826133329, |
|
"learning_rate": 6.136515680580479e-07, |
|
"loss": 0.0365, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.6832669322709162, |
|
"grad_norm": 1.5818672866222618, |
|
"learning_rate": 6.061633049565735e-07, |
|
"loss": 0.0427, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.6852589641434261, |
|
"grad_norm": 1.7500474110140714, |
|
"learning_rate": 5.987180623970351e-07, |
|
"loss": 0.0419, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.687250996015936, |
|
"grad_norm": 1.6005728580669865, |
|
"learning_rate": 5.913159132766272e-07, |
|
"loss": 0.0386, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.6892430278884463, |
|
"grad_norm": 1.4765492940539946, |
|
"learning_rate": 5.839569300706127e-07, |
|
"loss": 0.035, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.6912350597609562, |
|
"grad_norm": 1.7073641068426852, |
|
"learning_rate": 5.766411848316111e-07, |
|
"loss": 0.0453, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.6932270916334662, |
|
"grad_norm": 1.3266813195045528, |
|
"learning_rate": 5.693687491888944e-07, |
|
"loss": 0.0269, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.6952191235059761, |
|
"grad_norm": 1.7375537306550803, |
|
"learning_rate": 5.621396943476865e-07, |
|
"loss": 0.0371, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.697211155378486, |
|
"grad_norm": 1.16829582732014, |
|
"learning_rate": 5.549540910884649e-07, |
|
"loss": 0.0292, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.699203187250996, |
|
"grad_norm": 1.2524818853083746, |
|
"learning_rate": 5.478120097662654e-07, |
|
"loss": 0.0379, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.701195219123506, |
|
"grad_norm": 1.6095889326273223, |
|
"learning_rate": 5.407135203099984e-07, |
|
"loss": 0.0348, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.703187250996016, |
|
"grad_norm": 1.7855589434412364, |
|
"learning_rate": 5.336586922217607e-07, |
|
"loss": 0.0426, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.705179282868526, |
|
"grad_norm": 1.3350483084942124, |
|
"learning_rate": 5.266475945761562e-07, |
|
"loss": 0.0324, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.707171314741036, |
|
"grad_norm": 1.9650070108762112, |
|
"learning_rate": 5.19680296019619e-07, |
|
"loss": 0.0422, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.7091633466135459, |
|
"grad_norm": 1.5395346984385803, |
|
"learning_rate": 5.127568647697407e-07, |
|
"loss": 0.0389, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.7111553784860558, |
|
"grad_norm": 1.4407452070494002, |
|
"learning_rate": 5.05877368614604e-07, |
|
"loss": 0.0336, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.7131474103585658, |
|
"grad_norm": 1.7638225778002696, |
|
"learning_rate": 4.990418749121179e-07, |
|
"loss": 0.0386, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.7151394422310757, |
|
"grad_norm": 1.3281767084541467, |
|
"learning_rate": 4.922504505893583e-07, |
|
"loss": 0.0292, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.7171314741035857, |
|
"grad_norm": 1.626746762497501, |
|
"learning_rate": 4.855031621419143e-07, |
|
"loss": 0.033, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.7191235059760956, |
|
"grad_norm": 1.432244514359786, |
|
"learning_rate": 4.788000756332339e-07, |
|
"loss": 0.0342, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.7211155378486056, |
|
"grad_norm": 1.355048293376289, |
|
"learning_rate": 4.721412566939804e-07, |
|
"loss": 0.0339, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.7231075697211156, |
|
"grad_norm": 1.8150166104991199, |
|
"learning_rate": 4.655267705213884e-07, |
|
"loss": 0.0332, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.7250996015936255, |
|
"grad_norm": 1.3905664903519097, |
|
"learning_rate": 4.5895668187862283e-07, |
|
"loss": 0.0355, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.7270916334661355, |
|
"grad_norm": 1.4892701437662028, |
|
"learning_rate": 4.524310550941513e-07, |
|
"loss": 0.0389, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.7290836653386454, |
|
"grad_norm": 1.4991400329744464, |
|
"learning_rate": 4.4594995406110785e-07, |
|
"loss": 0.0288, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.7310756972111554, |
|
"grad_norm": 2.1763244133944717, |
|
"learning_rate": 4.395134422366715e-07, |
|
"loss": 0.0597, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.7330677290836654, |
|
"grad_norm": 1.4364589154779435, |
|
"learning_rate": 4.33121582641442e-07, |
|
"loss": 0.029, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7350597609561753, |
|
"grad_norm": 1.4528953350538367, |
|
"learning_rate": 4.2677443785882566e-07, |
|
"loss": 0.0299, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.7370517928286853, |
|
"grad_norm": 1.9612964941834787, |
|
"learning_rate": 4.2047207003442003e-07, |
|
"loss": 0.0395, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.7390438247011952, |
|
"grad_norm": 2.167595941204298, |
|
"learning_rate": 4.142145408754061e-07, |
|
"loss": 0.0443, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.7410358565737052, |
|
"grad_norm": 1.91106994896413, |
|
"learning_rate": 4.0800191164994675e-07, |
|
"loss": 0.0389, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.7430278884462151, |
|
"grad_norm": 1.4525532274137143, |
|
"learning_rate": 4.018342431865818e-07, |
|
"loss": 0.032, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.745019920318725, |
|
"grad_norm": 1.6060381855232735, |
|
"learning_rate": 3.957115958736374e-07, |
|
"loss": 0.0425, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.747011952191235, |
|
"grad_norm": 1.3887626940467759, |
|
"learning_rate": 3.8963402965863094e-07, |
|
"loss": 0.0397, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.749003984063745, |
|
"grad_norm": 1.372684240605827, |
|
"learning_rate": 3.8360160404768755e-07, |
|
"loss": 0.0391, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.750996015936255, |
|
"grad_norm": 1.3181667404064525, |
|
"learning_rate": 3.7761437810495517e-07, |
|
"loss": 0.0335, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.752988047808765, |
|
"grad_norm": 1.6789671311971048, |
|
"learning_rate": 3.7167241045202474e-07, |
|
"loss": 0.0407, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.754980079681275, |
|
"grad_norm": 1.3469532000945257, |
|
"learning_rate": 3.657757592673611e-07, |
|
"loss": 0.0274, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.7569721115537849, |
|
"grad_norm": 1.4431142582457726, |
|
"learning_rate": 3.5992448228572895e-07, |
|
"loss": 0.0276, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.7589641434262948, |
|
"grad_norm": 1.2459499701396597, |
|
"learning_rate": 3.5411863679762956e-07, |
|
"loss": 0.0313, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.7609561752988048, |
|
"grad_norm": 1.8366872404033463, |
|
"learning_rate": 3.483582796487395e-07, |
|
"loss": 0.0434, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.7629482071713147, |
|
"grad_norm": 1.6841292031312827, |
|
"learning_rate": 3.426434672393542e-07, |
|
"loss": 0.0375, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.7649402390438247, |
|
"grad_norm": 1.696643515004684, |
|
"learning_rate": 3.3697425552383536e-07, |
|
"loss": 0.0421, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.7669322709163346, |
|
"grad_norm": 1.316459868698009, |
|
"learning_rate": 3.3135070001006186e-07, |
|
"loss": 0.033, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.7689243027888446, |
|
"grad_norm": 1.9166835989066684, |
|
"learning_rate": 3.257728557588902e-07, |
|
"loss": 0.0438, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.7709163346613546, |
|
"grad_norm": 1.6334683496335292, |
|
"learning_rate": 3.202407773836108e-07, |
|
"loss": 0.0356, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.7729083665338645, |
|
"grad_norm": 1.7333830974632702, |
|
"learning_rate": 3.1475451904941613e-07, |
|
"loss": 0.0392, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.7749003984063745, |
|
"grad_norm": 1.6486004340046463, |
|
"learning_rate": 3.093141344728695e-07, |
|
"loss": 0.0429, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.7768924302788844, |
|
"grad_norm": 1.7192233169908389, |
|
"learning_rate": 3.039196769213787e-07, |
|
"loss": 0.0334, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.7788844621513944, |
|
"grad_norm": 1.4986249160852925, |
|
"learning_rate": 2.985711992126772e-07, |
|
"loss": 0.0332, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.7808764940239044, |
|
"grad_norm": 1.391955958968789, |
|
"learning_rate": 2.932687537143003e-07, |
|
"loss": 0.0395, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.7828685258964143, |
|
"grad_norm": 1.5022523308748341, |
|
"learning_rate": 2.8801239234308e-07, |
|
"loss": 0.0328, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.7848605577689243, |
|
"grad_norm": 1.5896216675986345, |
|
"learning_rate": 2.828021665646341e-07, |
|
"loss": 0.0395, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.7868525896414342, |
|
"grad_norm": 1.5943964105881356, |
|
"learning_rate": 2.776381273928597e-07, |
|
"loss": 0.0335, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.7888446215139442, |
|
"grad_norm": 1.4573130385904005, |
|
"learning_rate": 2.725203253894365e-07, |
|
"loss": 0.0328, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.7908366533864541, |
|
"grad_norm": 1.5957847319376306, |
|
"learning_rate": 2.6744881066333104e-07, |
|
"loss": 0.0302, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.792828685258964, |
|
"grad_norm": 1.5938499333127716, |
|
"learning_rate": 2.6242363287030617e-07, |
|
"loss": 0.0334, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.794820717131474, |
|
"grad_norm": 1.493471769426448, |
|
"learning_rate": 2.5744484121243416e-07, |
|
"loss": 0.0376, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.796812749003984, |
|
"grad_norm": 1.4679887246269543, |
|
"learning_rate": 2.5251248443761644e-07, |
|
"loss": 0.0308, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.798804780876494, |
|
"grad_norm": 1.4811334241506124, |
|
"learning_rate": 2.47626610839104e-07, |
|
"loss": 0.0461, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.800796812749004, |
|
"grad_norm": 1.4378347029531395, |
|
"learning_rate": 2.4278726825502696e-07, |
|
"loss": 0.0308, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.802788844621514, |
|
"grad_norm": 1.3874168126207334, |
|
"learning_rate": 2.3799450406792435e-07, |
|
"loss": 0.0383, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.8047808764940239, |
|
"grad_norm": 1.309299013582529, |
|
"learning_rate": 2.3324836520428275e-07, |
|
"loss": 0.0313, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.8067729083665338, |
|
"grad_norm": 1.724858228945609, |
|
"learning_rate": 2.285488981340722e-07, |
|
"loss": 0.0355, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.8087649402390438, |
|
"grad_norm": 1.696250952718899, |
|
"learning_rate": 2.2389614887029564e-07, |
|
"loss": 0.0314, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.8107569721115537, |
|
"grad_norm": 1.4712298958031025, |
|
"learning_rate": 2.1929016296853679e-07, |
|
"loss": 0.0371, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.812749003984064, |
|
"grad_norm": 1.2608615587029985, |
|
"learning_rate": 2.147309855265145e-07, |
|
"loss": 0.0313, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.8147410358565739, |
|
"grad_norm": 1.6486609933665899, |
|
"learning_rate": 2.1021866118363987e-07, |
|
"loss": 0.0372, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.8167330677290838, |
|
"grad_norm": 1.456648149760954, |
|
"learning_rate": 2.0575323412058036e-07, |
|
"loss": 0.036, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.8187250996015938, |
|
"grad_norm": 1.6643795113487712, |
|
"learning_rate": 2.0133474805882735e-07, |
|
"loss": 0.0357, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.8207171314741037, |
|
"grad_norm": 1.4592297656846365, |
|
"learning_rate": 1.9696324626026774e-07, |
|
"loss": 0.0343, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.8227091633466137, |
|
"grad_norm": 1.5024913962281268, |
|
"learning_rate": 1.926387715267597e-07, |
|
"loss": 0.0332, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.8247011952191237, |
|
"grad_norm": 1.5222331132128035, |
|
"learning_rate": 1.8836136619971468e-07, |
|
"loss": 0.0274, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.8266932270916336, |
|
"grad_norm": 1.470688430167571, |
|
"learning_rate": 1.8413107215968174e-07, |
|
"loss": 0.0358, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.8286852589641436, |
|
"grad_norm": 1.2370854006861078, |
|
"learning_rate": 1.7994793082593942e-07, |
|
"loss": 0.0294, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.8306772908366535, |
|
"grad_norm": 1.479773183899037, |
|
"learning_rate": 1.7581198315608727e-07, |
|
"loss": 0.0404, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.8326693227091635, |
|
"grad_norm": 1.9167324471302913, |
|
"learning_rate": 1.7172326964564777e-07, |
|
"loss": 0.0444, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.8346613545816735, |
|
"grad_norm": 1.4329590983432605, |
|
"learning_rate": 1.6768183032766728e-07, |
|
"loss": 0.0372, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.8366533864541834, |
|
"grad_norm": 1.2780231630551226, |
|
"learning_rate": 1.6368770477232622e-07, |
|
"loss": 0.0328, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.8386454183266934, |
|
"grad_norm": 1.4118812529232878, |
|
"learning_rate": 1.597409320865506e-07, |
|
"loss": 0.0415, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.8406374501992033, |
|
"grad_norm": 1.836114736553896, |
|
"learning_rate": 1.5584155091362907e-07, |
|
"loss": 0.0489, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.8426294820717133, |
|
"grad_norm": 1.3679200385396149, |
|
"learning_rate": 1.5198959943283466e-07, |
|
"loss": 0.0323, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.8446215139442232, |
|
"grad_norm": 1.5872343873759975, |
|
"learning_rate": 1.4818511535905077e-07, |
|
"loss": 0.0395, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.8466135458167332, |
|
"grad_norm": 1.4421140886448538, |
|
"learning_rate": 1.444281359424038e-07, |
|
"loss": 0.0315, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.8486055776892432, |
|
"grad_norm": 1.7604548086333676, |
|
"learning_rate": 1.4071869796789427e-07, |
|
"loss": 0.05, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.8505976095617531, |
|
"grad_norm": 1.4556017520765743, |
|
"learning_rate": 1.3705683775504075e-07, |
|
"loss": 0.0281, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.852589641434263, |
|
"grad_norm": 1.3271732449749567, |
|
"learning_rate": 1.3344259115752268e-07, |
|
"loss": 0.0324, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.854581673306773, |
|
"grad_norm": 1.2761857510150398, |
|
"learning_rate": 1.2987599356282853e-07, |
|
"loss": 0.0391, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.856573705179283, |
|
"grad_norm": 1.5833152792310408, |
|
"learning_rate": 1.263570798919106e-07, |
|
"loss": 0.0339, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.858565737051793, |
|
"grad_norm": 1.2639695829653441, |
|
"learning_rate": 1.2288588459884344e-07, |
|
"loss": 0.0345, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.860557768924303, |
|
"grad_norm": 1.3658369812613829, |
|
"learning_rate": 1.1946244167048314e-07, |
|
"loss": 0.0315, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.8625498007968129, |
|
"grad_norm": 1.4408384970498218, |
|
"learning_rate": 1.1608678462613987e-07, |
|
"loss": 0.0405, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.8645418326693228, |
|
"grad_norm": 1.8837289894616709, |
|
"learning_rate": 1.1275894651724517e-07, |
|
"loss": 0.0432, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.8665338645418328, |
|
"grad_norm": 1.5772528501243686, |
|
"learning_rate": 1.0947895992703129e-07, |
|
"loss": 0.034, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.8685258964143427, |
|
"grad_norm": 1.498705914254895, |
|
"learning_rate": 1.062468569702102e-07, |
|
"loss": 0.0348, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.8705179282868527, |
|
"grad_norm": 1.6273002546701671, |
|
"learning_rate": 1.0306266929265951e-07, |
|
"loss": 0.0444, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.8725099601593627, |
|
"grad_norm": 1.8115132687464455, |
|
"learning_rate": 9.992642807111486e-08, |
|
"loss": 0.045, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.8745019920318726, |
|
"grad_norm": 1.2331103677449673, |
|
"learning_rate": 9.683816401286017e-08, |
|
"loss": 0.0384, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.8764940239043826, |
|
"grad_norm": 1.719077840733274, |
|
"learning_rate": 9.379790735543182e-08, |
|
"loss": 0.0426, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.8784860557768925, |
|
"grad_norm": 1.3832648508966021, |
|
"learning_rate": 9.080568786631939e-08, |
|
"loss": 0.0305, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.8804780876494025, |
|
"grad_norm": 1.6655589837620999, |
|
"learning_rate": 8.78615348426759e-08, |
|
"loss": 0.0387, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.8824701195219125, |
|
"grad_norm": 1.3436243075109746, |
|
"learning_rate": 8.49654771110292e-08, |
|
"loss": 0.0336, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.8844621513944224, |
|
"grad_norm": 1.4941725985898642, |
|
"learning_rate": 8.211754302700159e-08, |
|
"loss": 0.035, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.8864541832669324, |
|
"grad_norm": 1.2284626460508044, |
|
"learning_rate": 7.93177604750317e-08, |
|
"loss": 0.0285, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.8884462151394423, |
|
"grad_norm": 1.225195965424142, |
|
"learning_rate": 7.656615686809976e-08, |
|
"loss": 0.0269, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.8904382470119523, |
|
"grad_norm": 1.3770985891500578, |
|
"learning_rate": 7.386275914746222e-08, |
|
"loss": 0.0324, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.8924302788844622, |
|
"grad_norm": 1.2826488338620294, |
|
"learning_rate": 7.120759378238585e-08, |
|
"loss": 0.032, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.8944223107569722, |
|
"grad_norm": 1.6006341483416981, |
|
"learning_rate": 6.860068676988907e-08, |
|
"loss": 0.0392, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.8964143426294822, |
|
"grad_norm": 1.4187750994999075, |
|
"learning_rate": 6.604206363448662e-08, |
|
"loss": 0.0321, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.8984063745019921, |
|
"grad_norm": 1.3893878947453275, |
|
"learning_rate": 6.353174942794138e-08, |
|
"loss": 0.0405, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.900398406374502, |
|
"grad_norm": 1.7017032311402343, |
|
"learning_rate": 6.106976872901793e-08, |
|
"loss": 0.0358, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.902390438247012, |
|
"grad_norm": 1.5607283128871816, |
|
"learning_rate": 5.865614564324273e-08, |
|
"loss": 0.0366, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.904382470119522, |
|
"grad_norm": 1.8745845801550434, |
|
"learning_rate": 5.6290903802665444e-08, |
|
"loss": 0.0493, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.906374501992032, |
|
"grad_norm": 1.8941896384592207, |
|
"learning_rate": 5.397406636563296e-08, |
|
"loss": 0.0482, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.908366533864542, |
|
"grad_norm": 1.7755712845597584, |
|
"learning_rate": 5.1705656016555196e-08, |
|
"loss": 0.0377, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.9103585657370519, |
|
"grad_norm": 1.5081972730474071, |
|
"learning_rate": 4.948569496569078e-08, |
|
"loss": 0.0424, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.9123505976095618, |
|
"grad_norm": 1.5670265082331885, |
|
"learning_rate": 4.7314204948923356e-08, |
|
"loss": 0.0352, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.9143426294820718, |
|
"grad_norm": 1.5649598609019262, |
|
"learning_rate": 4.5191207227553437e-08, |
|
"loss": 0.0325, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.9163346613545817, |
|
"grad_norm": 1.2941707907708186, |
|
"learning_rate": 4.311672258808575e-08, |
|
"loss": 0.0282, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.9183266932270917, |
|
"grad_norm": 1.5926908997623397, |
|
"learning_rate": 4.109077134202999e-08, |
|
"loss": 0.0338, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.9203187250996017, |
|
"grad_norm": 1.6503076572985267, |
|
"learning_rate": 3.911337332569876e-08, |
|
"loss": 0.0406, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.9223107569721116, |
|
"grad_norm": 1.7379935919621892, |
|
"learning_rate": 3.718454790001546e-08, |
|
"loss": 0.0402, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.9243027888446216, |
|
"grad_norm": 1.2573758118317542, |
|
"learning_rate": 3.530431395032396e-08, |
|
"loss": 0.0308, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.9262948207171315, |
|
"grad_norm": 1.8088122825079003, |
|
"learning_rate": 3.347268988620256e-08, |
|
"loss": 0.0416, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.9282868525896415, |
|
"grad_norm": 1.5128035830310647, |
|
"learning_rate": 3.168969364128527e-08, |
|
"loss": 0.0314, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.9302788844621515, |
|
"grad_norm": 1.5006247617840172, |
|
"learning_rate": 2.995534267308697e-08, |
|
"loss": 0.0336, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.9322709163346614, |
|
"grad_norm": 1.4914522988842465, |
|
"learning_rate": 2.8269653962829104e-08, |
|
"loss": 0.0391, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.9342629482071714, |
|
"grad_norm": 1.795362403964496, |
|
"learning_rate": 2.6632644015276987e-08, |
|
"loss": 0.0516, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.9362549800796813, |
|
"grad_norm": 1.516180508821745, |
|
"learning_rate": 2.5044328858576105e-08, |
|
"loss": 0.0379, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.9382470119521913, |
|
"grad_norm": 2.0020652857017813, |
|
"learning_rate": 2.3504724044097206e-08, |
|
"loss": 0.0469, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.9402390438247012, |
|
"grad_norm": 1.5598747529026058, |
|
"learning_rate": 2.2013844646280313e-08, |
|
"loss": 0.0396, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.9422310756972112, |
|
"grad_norm": 1.6142472802956638, |
|
"learning_rate": 2.057170526249097e-08, |
|
"loss": 0.0404, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.9442231075697212, |
|
"grad_norm": 1.2019709175359712, |
|
"learning_rate": 1.917832001287645e-08, |
|
"loss": 0.0287, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.9462151394422311, |
|
"grad_norm": 1.251311639854653, |
|
"learning_rate": 1.783370254022587e-08, |
|
"loss": 0.0376, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.948207171314741, |
|
"grad_norm": 1.6937487382902727, |
|
"learning_rate": 1.6537866009837533e-08, |
|
"loss": 0.0487, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.950199203187251, |
|
"grad_norm": 1.9336485273597244, |
|
"learning_rate": 1.5290823109390673e-08, |
|
"loss": 0.0436, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.952191235059761, |
|
"grad_norm": 1.2972478534447982, |
|
"learning_rate": 1.4092586048820578e-08, |
|
"loss": 0.0355, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.954183266932271, |
|
"grad_norm": 1.5530751540422953, |
|
"learning_rate": 1.2943166560199228e-08, |
|
"loss": 0.0328, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.956175298804781, |
|
"grad_norm": 1.5748922448723512, |
|
"learning_rate": 1.1842575897619835e-08, |
|
"loss": 0.0325, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.9581673306772909, |
|
"grad_norm": 1.4683412245601868, |
|
"learning_rate": 1.0790824837088043e-08, |
|
"loss": 0.0305, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.9601593625498008, |
|
"grad_norm": 1.704418853645297, |
|
"learning_rate": 9.787923676414235e-09, |
|
"loss": 0.043, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.9621513944223108, |
|
"grad_norm": 1.2313086946224918, |
|
"learning_rate": 8.833882235115277e-09, |
|
"loss": 0.041, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.9641434262948207, |
|
"grad_norm": 1.5411732148234083, |
|
"learning_rate": 7.928709854316818e-09, |
|
"loss": 0.0319, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.9661354581673307, |
|
"grad_norm": 1.803560123914151, |
|
"learning_rate": 7.072415396661703e-09, |
|
"loss": 0.0405, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.9681274900398407, |
|
"grad_norm": 1.4510860737042486, |
|
"learning_rate": 6.265007246223365e-09, |
|
"loss": 0.0348, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.9701195219123506, |
|
"grad_norm": 1.508510166730464, |
|
"learning_rate": 5.506493308425342e-09, |
|
"loss": 0.0334, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.9721115537848606, |
|
"grad_norm": 1.7329289625239936, |
|
"learning_rate": 4.796881009961341e-09, |
|
"loss": 0.0427, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.9741035856573705, |
|
"grad_norm": 1.6711540109669145, |
|
"learning_rate": 4.136177298724176e-09, |
|
"loss": 0.0321, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.9760956175298805, |
|
"grad_norm": 1.5519285892825812, |
|
"learning_rate": 3.524388643736387e-09, |
|
"loss": 0.0337, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.9780876494023905, |
|
"grad_norm": 1.5062338202236834, |
|
"learning_rate": 2.9615210350891764e-09, |
|
"loss": 0.0342, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.9800796812749004, |
|
"grad_norm": 1.4830859961400638, |
|
"learning_rate": 2.447579983881343e-09, |
|
"loss": 0.0316, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.9820717131474104, |
|
"grad_norm": 1.723742367527845, |
|
"learning_rate": 1.9825705221665493e-09, |
|
"loss": 0.0375, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.9840637450199203, |
|
"grad_norm": 1.41394511694927, |
|
"learning_rate": 1.566497202904471e-09, |
|
"loss": 0.0296, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.9860557768924303, |
|
"grad_norm": 1.3088020276395265, |
|
"learning_rate": 1.1993640999147238e-09, |
|
"loss": 0.0267, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.9880478087649402, |
|
"grad_norm": 1.6100561146810857, |
|
"learning_rate": 8.811748078385584e-10, |
|
"loss": 0.0358, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.9900398406374502, |
|
"grad_norm": 1.535784061618093, |
|
"learning_rate": 6.119324421016704e-10, |
|
"loss": 0.035, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.9920318725099602, |
|
"grad_norm": 1.4352594424209704, |
|
"learning_rate": 3.916396388869981e-10, |
|
"loss": 0.0378, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9920318725099602, |
|
"eval_loss": 0.10359270870685577, |
|
"eval_runtime": 3.1378, |
|
"eval_samples_per_second": 13.067, |
|
"eval_steps_per_second": 3.506, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9940239043824701, |
|
"grad_norm": 1.4155470820818645, |
|
"learning_rate": 2.2029855510474762e-10, |
|
"loss": 0.0368, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.99601593625498, |
|
"grad_norm": 1.7851317595092946, |
|
"learning_rate": 9.791086837573905e-11, |
|
"loss": 0.0397, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.99800796812749, |
|
"grad_norm": 1.4719784863811807, |
|
"learning_rate": 2.4477777010312175e-11, |
|
"loss": 0.0351, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1952136070405424, |
|
"learning_rate": 0.0, |
|
"loss": 0.028, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1004, |
|
"total_flos": 18718017454080.0, |
|
"train_loss": 0.09493647176267735, |
|
"train_runtime": 1399.7108, |
|
"train_samples_per_second": 5.73, |
|
"train_steps_per_second": 0.717 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1004, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 18718017454080.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|