{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8071, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012390038409119069, "grad_norm": 0.8171148300170898, "learning_rate": 2.4752475247524754e-07, "loss": 0.2732, "step": 1 }, { "epoch": 0.0006195019204559534, "grad_norm": 0.5865882039070129, "learning_rate": 1.2376237623762377e-06, "loss": 0.2864, "step": 5 }, { "epoch": 0.0012390038409119068, "grad_norm": 0.7508181929588318, "learning_rate": 2.4752475247524753e-06, "loss": 0.272, "step": 10 }, { "epoch": 0.0018585057613678603, "grad_norm": 0.5865369439125061, "learning_rate": 3.7128712871287128e-06, "loss": 0.2839, "step": 15 }, { "epoch": 0.0024780076818238135, "grad_norm": 0.571369469165802, "learning_rate": 4.950495049504951e-06, "loss": 0.2702, "step": 20 }, { "epoch": 0.003097509602279767, "grad_norm": 0.6478548645973206, "learning_rate": 6.1881188118811885e-06, "loss": 0.2317, "step": 25 }, { "epoch": 0.0037170115227357207, "grad_norm": 0.4509166479110718, "learning_rate": 7.4257425742574256e-06, "loss": 0.1915, "step": 30 }, { "epoch": 0.004336513443191674, "grad_norm": 0.4477602541446686, "learning_rate": 8.663366336633663e-06, "loss": 0.1675, "step": 35 }, { "epoch": 0.004956015363647627, "grad_norm": 0.5267804861068726, "learning_rate": 9.900990099009901e-06, "loss": 0.128, "step": 40 }, { "epoch": 0.00557551728410358, "grad_norm": 0.4315434992313385, "learning_rate": 1.113861386138614e-05, "loss": 0.0794, "step": 45 }, { "epoch": 0.006195019204559534, "grad_norm": 0.2182588130235672, "learning_rate": 1.2376237623762377e-05, "loss": 0.051, "step": 50 }, { "epoch": 0.006814521125015487, "grad_norm": 0.22109036147594452, "learning_rate": 1.3613861386138616e-05, "loss": 0.0273, "step": 55 }, { "epoch": 0.007434023045471441, "grad_norm": 0.27581965923309326, "learning_rate": 1.4851485148514851e-05, "loss": 0.0262, "step": 60 }, { "epoch": 0.008053524965927395, "grad_norm": 0.11976505070924759, "learning_rate": 1.608910891089109e-05, "loss": 0.0113, "step": 65 }, { "epoch": 0.008673026886383347, "grad_norm": 0.052997805178165436, "learning_rate": 1.7326732673267325e-05, "loss": 0.006, "step": 70 }, { "epoch": 0.009292528806839302, "grad_norm": 0.06217635050415993, "learning_rate": 1.8564356435643564e-05, "loss": 0.0053, "step": 75 }, { "epoch": 0.009912030727295254, "grad_norm": 0.04059373959898949, "learning_rate": 1.9801980198019803e-05, "loss": 0.0029, "step": 80 }, { "epoch": 0.010531532647751208, "grad_norm": 0.02450651116669178, "learning_rate": 2.103960396039604e-05, "loss": 0.0041, "step": 85 }, { "epoch": 0.01115103456820716, "grad_norm": 0.19307288527488708, "learning_rate": 2.227722772277228e-05, "loss": 0.003, "step": 90 }, { "epoch": 0.011770536488663115, "grad_norm": 0.033222004771232605, "learning_rate": 2.3514851485148515e-05, "loss": 0.002, "step": 95 }, { "epoch": 0.012390038409119068, "grad_norm": 0.04849155619740486, "learning_rate": 2.4752475247524754e-05, "loss": 0.0038, "step": 100 }, { "epoch": 0.013009540329575022, "grad_norm": 0.007733750622719526, "learning_rate": 2.5990099009900993e-05, "loss": 0.0016, "step": 105 }, { "epoch": 0.013629042250030974, "grad_norm": 0.016011925414204597, "learning_rate": 2.722772277227723e-05, "loss": 0.0011, "step": 110 }, { "epoch": 0.014248544170486929, "grad_norm": 0.008594951592385769, "learning_rate": 2.8465346534653464e-05, "loss": 0.0008, "step": 115 }, { "epoch": 0.014868046090942883, "grad_norm": 0.023802626878023148, "learning_rate": 2.9702970297029702e-05, "loss": 0.0022, "step": 120 }, { "epoch": 0.015487548011398835, "grad_norm": 0.004283198155462742, "learning_rate": 3.094059405940594e-05, "loss": 0.0008, "step": 125 }, { "epoch": 0.01610704993185479, "grad_norm": 0.0410744734108448, "learning_rate": 3.217821782178218e-05, "loss": 0.0006, "step": 130 }, { "epoch": 0.016726551852310742, "grad_norm": 0.014116263948380947, "learning_rate": 3.341584158415842e-05, "loss": 0.0006, "step": 135 }, { "epoch": 0.017346053772766695, "grad_norm": 0.03373579680919647, "learning_rate": 3.465346534653465e-05, "loss": 0.0014, "step": 140 }, { "epoch": 0.01796555569322265, "grad_norm": 0.011104862205684185, "learning_rate": 3.589108910891089e-05, "loss": 0.0006, "step": 145 }, { "epoch": 0.018585057613678603, "grad_norm": 0.00416351156309247, "learning_rate": 3.712871287128713e-05, "loss": 0.0007, "step": 150 }, { "epoch": 0.019204559534134556, "grad_norm": 0.005642627365887165, "learning_rate": 3.8366336633663367e-05, "loss": 0.0006, "step": 155 }, { "epoch": 0.019824061454590508, "grad_norm": 0.050379302352666855, "learning_rate": 3.9603960396039605e-05, "loss": 0.0007, "step": 160 }, { "epoch": 0.020443563375046464, "grad_norm": 0.1290789693593979, "learning_rate": 4.0841584158415844e-05, "loss": 0.0013, "step": 165 }, { "epoch": 0.021063065295502417, "grad_norm": 0.006892290432006121, "learning_rate": 4.207920792079208e-05, "loss": 0.0006, "step": 170 }, { "epoch": 0.02168256721595837, "grad_norm": 0.0032141890842467546, "learning_rate": 4.331683168316832e-05, "loss": 0.0007, "step": 175 }, { "epoch": 0.02230206913641432, "grad_norm": 0.005677732173353434, "learning_rate": 4.455445544554456e-05, "loss": 0.0004, "step": 180 }, { "epoch": 0.022921571056870278, "grad_norm": 0.002673968207091093, "learning_rate": 4.57920792079208e-05, "loss": 0.0007, "step": 185 }, { "epoch": 0.02354107297732623, "grad_norm": 0.003006473882123828, "learning_rate": 4.702970297029703e-05, "loss": 0.002, "step": 190 }, { "epoch": 0.024160574897782183, "grad_norm": 0.004191545769572258, "learning_rate": 4.826732673267327e-05, "loss": 0.0007, "step": 195 }, { "epoch": 0.024780076818238135, "grad_norm": 0.005613662302494049, "learning_rate": 4.950495049504951e-05, "loss": 0.0004, "step": 200 }, { "epoch": 0.02539957873869409, "grad_norm": 0.004046352580189705, "learning_rate": 5.074257425742575e-05, "loss": 0.0004, "step": 205 }, { "epoch": 0.026019080659150044, "grad_norm": 0.004100356716662645, "learning_rate": 5.1980198019801986e-05, "loss": 0.0007, "step": 210 }, { "epoch": 0.026638582579605996, "grad_norm": 0.002381186233833432, "learning_rate": 5.3217821782178224e-05, "loss": 0.0004, "step": 215 }, { "epoch": 0.02725808450006195, "grad_norm": 0.006840377114713192, "learning_rate": 5.445544554455446e-05, "loss": 0.0007, "step": 220 }, { "epoch": 0.027877586420517905, "grad_norm": 0.02731333114206791, "learning_rate": 5.56930693069307e-05, "loss": 0.0005, "step": 225 }, { "epoch": 0.028497088340973857, "grad_norm": 0.00305747939273715, "learning_rate": 5.693069306930693e-05, "loss": 0.0004, "step": 230 }, { "epoch": 0.02911659026142981, "grad_norm": 0.009804563596844673, "learning_rate": 5.8168316831683166e-05, "loss": 0.0004, "step": 235 }, { "epoch": 0.029736092181885766, "grad_norm": 0.004222201183438301, "learning_rate": 5.9405940594059404e-05, "loss": 0.0003, "step": 240 }, { "epoch": 0.030355594102341718, "grad_norm": 0.0017239191802218556, "learning_rate": 6.064356435643564e-05, "loss": 0.0008, "step": 245 }, { "epoch": 0.03097509602279767, "grad_norm": 0.0024299216456711292, "learning_rate": 6.188118811881188e-05, "loss": 0.0003, "step": 250 }, { "epoch": 0.03159459794325362, "grad_norm": 0.002068975241854787, "learning_rate": 6.311881188118812e-05, "loss": 0.0003, "step": 255 }, { "epoch": 0.03221409986370958, "grad_norm": 0.05045396462082863, "learning_rate": 6.435643564356436e-05, "loss": 0.004, "step": 260 }, { "epoch": 0.03283360178416553, "grad_norm": 0.007033636327832937, "learning_rate": 6.55940594059406e-05, "loss": 0.0006, "step": 265 }, { "epoch": 0.033453103704621484, "grad_norm": 0.048315584659576416, "learning_rate": 6.683168316831684e-05, "loss": 0.0006, "step": 270 }, { "epoch": 0.03407260562507744, "grad_norm": 0.008937629871070385, "learning_rate": 6.806930693069308e-05, "loss": 0.0006, "step": 275 }, { "epoch": 0.03469210754553339, "grad_norm": 0.002053669421002269, "learning_rate": 6.93069306930693e-05, "loss": 0.0007, "step": 280 }, { "epoch": 0.035311609465989345, "grad_norm": 0.0016619764501228929, "learning_rate": 7.054455445544554e-05, "loss": 0.0003, "step": 285 }, { "epoch": 0.0359311113864453, "grad_norm": 0.03136918693780899, "learning_rate": 7.178217821782178e-05, "loss": 0.0004, "step": 290 }, { "epoch": 0.03655061330690125, "grad_norm": 0.002426222898066044, "learning_rate": 7.301980198019802e-05, "loss": 0.0003, "step": 295 }, { "epoch": 0.037170115227357206, "grad_norm": 0.004757250193506479, "learning_rate": 7.425742574257426e-05, "loss": 0.0003, "step": 300 }, { "epoch": 0.037789617147813155, "grad_norm": 0.0023780246265232563, "learning_rate": 7.54950495049505e-05, "loss": 0.0003, "step": 305 }, { "epoch": 0.03840911906826911, "grad_norm": 0.0011860375525429845, "learning_rate": 7.673267326732673e-05, "loss": 0.0003, "step": 310 }, { "epoch": 0.03902862098872507, "grad_norm": 0.0011214578989893198, "learning_rate": 7.797029702970297e-05, "loss": 0.0002, "step": 315 }, { "epoch": 0.039648122909181016, "grad_norm": 0.0019758904818445444, "learning_rate": 7.920792079207921e-05, "loss": 0.0003, "step": 320 }, { "epoch": 0.04026762482963697, "grad_norm": 0.0009349973988719285, "learning_rate": 8.044554455445545e-05, "loss": 0.0002, "step": 325 }, { "epoch": 0.04088712675009293, "grad_norm": 0.0014954836806282401, "learning_rate": 8.168316831683169e-05, "loss": 0.0002, "step": 330 }, { "epoch": 0.04150662867054888, "grad_norm": 0.002598302438855171, "learning_rate": 8.292079207920793e-05, "loss": 0.0002, "step": 335 }, { "epoch": 0.04212613059100483, "grad_norm": 0.005693716462701559, "learning_rate": 8.415841584158417e-05, "loss": 0.0003, "step": 340 }, { "epoch": 0.04274563251146078, "grad_norm": 0.0008605199982412159, "learning_rate": 8.53960396039604e-05, "loss": 0.0002, "step": 345 }, { "epoch": 0.04336513443191674, "grad_norm": 0.0022424368653446436, "learning_rate": 8.663366336633664e-05, "loss": 0.0002, "step": 350 }, { "epoch": 0.043984636352372694, "grad_norm": 0.0010018571047112346, "learning_rate": 8.787128712871288e-05, "loss": 0.0002, "step": 355 }, { "epoch": 0.04460413827282864, "grad_norm": 0.001462286221794784, "learning_rate": 8.910891089108912e-05, "loss": 0.0002, "step": 360 }, { "epoch": 0.0452236401932846, "grad_norm": 0.0019121841760352254, "learning_rate": 9.034653465346536e-05, "loss": 0.0002, "step": 365 }, { "epoch": 0.045843142113740555, "grad_norm": 0.0011392048327252269, "learning_rate": 9.15841584158416e-05, "loss": 0.0002, "step": 370 }, { "epoch": 0.046462644034196504, "grad_norm": 0.0014310522237792611, "learning_rate": 9.282178217821784e-05, "loss": 0.0002, "step": 375 }, { "epoch": 0.04708214595465246, "grad_norm": 0.0013662364799529314, "learning_rate": 9.405940594059406e-05, "loss": 0.0002, "step": 380 }, { "epoch": 0.047701647875108416, "grad_norm": 0.0038763852789998055, "learning_rate": 9.52970297029703e-05, "loss": 0.0002, "step": 385 }, { "epoch": 0.048321149795564365, "grad_norm": 0.001213462557643652, "learning_rate": 9.653465346534654e-05, "loss": 0.0001, "step": 390 }, { "epoch": 0.04894065171602032, "grad_norm": 0.022503281012177467, "learning_rate": 9.777227722772278e-05, "loss": 0.0002, "step": 395 }, { "epoch": 0.04956015363647627, "grad_norm": 0.0007574481423944235, "learning_rate": 9.900990099009902e-05, "loss": 0.0002, "step": 400 }, { "epoch": 0.050179655556932226, "grad_norm": 0.0016326636541634798, "learning_rate": 0.00010024752475247526, "loss": 0.0002, "step": 405 }, { "epoch": 0.05079915747738818, "grad_norm": 0.0009274051990360022, "learning_rate": 0.0001014851485148515, "loss": 0.0001, "step": 410 }, { "epoch": 0.05141865939784413, "grad_norm": 0.0007227437454275787, "learning_rate": 0.00010272277227722773, "loss": 0.0001, "step": 415 }, { "epoch": 0.05203816131830009, "grad_norm": 0.0030249289702624083, "learning_rate": 0.00010396039603960397, "loss": 0.0001, "step": 420 }, { "epoch": 0.05265766323875604, "grad_norm": 0.0014620523434132338, "learning_rate": 0.00010519801980198021, "loss": 0.0001, "step": 425 }, { "epoch": 0.05327716515921199, "grad_norm": 0.0007130319718271494, "learning_rate": 0.00010643564356435645, "loss": 0.0001, "step": 430 }, { "epoch": 0.05389666707966795, "grad_norm": 0.0006076836143620312, "learning_rate": 0.00010767326732673269, "loss": 0.0001, "step": 435 }, { "epoch": 0.0545161690001239, "grad_norm": 0.0006773742497898638, "learning_rate": 0.00010891089108910893, "loss": 0.0001, "step": 440 }, { "epoch": 0.05513567092057985, "grad_norm": 0.0007489768322557211, "learning_rate": 0.00011014851485148517, "loss": 0.0001, "step": 445 }, { "epoch": 0.05575517284103581, "grad_norm": 0.0007599226082675159, "learning_rate": 0.0001113861386138614, "loss": 0.0001, "step": 450 }, { "epoch": 0.05637467476149176, "grad_norm": 0.001440377556718886, "learning_rate": 0.00011262376237623762, "loss": 0.0001, "step": 455 }, { "epoch": 0.056994176681947714, "grad_norm": 0.000529022712726146, "learning_rate": 0.00011386138613861385, "loss": 0.0001, "step": 460 }, { "epoch": 0.05761367860240367, "grad_norm": 0.0005864354898221791, "learning_rate": 0.00011509900990099009, "loss": 0.0001, "step": 465 }, { "epoch": 0.05823318052285962, "grad_norm": 0.000685163598973304, "learning_rate": 0.00011633663366336633, "loss": 0.0001, "step": 470 }, { "epoch": 0.058852682443315575, "grad_norm": 0.0007952851010486484, "learning_rate": 0.00011757425742574257, "loss": 0.0001, "step": 475 }, { "epoch": 0.05947218436377153, "grad_norm": 0.0009927827632054687, "learning_rate": 0.00011881188118811881, "loss": 0.0001, "step": 480 }, { "epoch": 0.06009168628422748, "grad_norm": 0.0015025357715785503, "learning_rate": 0.00012004950495049505, "loss": 0.0001, "step": 485 }, { "epoch": 0.060711188204683436, "grad_norm": 0.0005874437629245222, "learning_rate": 0.00012128712871287129, "loss": 0.0001, "step": 490 }, { "epoch": 0.061330690125139385, "grad_norm": 0.0007537162164226174, "learning_rate": 0.00012252475247524753, "loss": 0.0002, "step": 495 }, { "epoch": 0.06195019204559534, "grad_norm": 0.0010542782256379724, "learning_rate": 0.00012376237623762376, "loss": 0.0001, "step": 500 }, { "epoch": 0.06256969396605129, "grad_norm": 0.0009638050105422735, "learning_rate": 0.000125, "loss": 0.0001, "step": 505 }, { "epoch": 0.06318919588650725, "grad_norm": 0.000623985833954066, "learning_rate": 0.00012623762376237624, "loss": 0.0001, "step": 510 }, { "epoch": 0.0638086978069632, "grad_norm": 0.00036727244150824845, "learning_rate": 0.00012747524752475248, "loss": 0.0002, "step": 515 }, { "epoch": 0.06442819972741916, "grad_norm": 0.0007118601351976395, "learning_rate": 0.00012871287128712872, "loss": 0.0001, "step": 520 }, { "epoch": 0.06504770164787511, "grad_norm": 0.0005547236069105566, "learning_rate": 0.00012995049504950496, "loss": 0.0001, "step": 525 }, { "epoch": 0.06566720356833106, "grad_norm": 0.0011366987600922585, "learning_rate": 0.0001311881188118812, "loss": 0.0001, "step": 530 }, { "epoch": 0.06628670548878701, "grad_norm": 0.000973159505520016, "learning_rate": 0.00013242574257425743, "loss": 0.0001, "step": 535 }, { "epoch": 0.06690620740924297, "grad_norm": 0.000375576491933316, "learning_rate": 0.00013366336633663367, "loss": 0.0001, "step": 540 }, { "epoch": 0.06752570932969892, "grad_norm": 0.0015565247740596533, "learning_rate": 0.0001349009900990099, "loss": 0.0001, "step": 545 }, { "epoch": 0.06814521125015488, "grad_norm": 0.0006600512424483895, "learning_rate": 0.00013613861386138615, "loss": 0.0001, "step": 550 }, { "epoch": 0.06876471317061082, "grad_norm": 0.0003556181618478149, "learning_rate": 0.0001373762376237624, "loss": 0.0001, "step": 555 }, { "epoch": 0.06938421509106678, "grad_norm": 0.0003735134087037295, "learning_rate": 0.0001386138613861386, "loss": 0.0001, "step": 560 }, { "epoch": 0.07000371701152273, "grad_norm": 0.0013679016847163439, "learning_rate": 0.00013985148514851484, "loss": 0.0001, "step": 565 }, { "epoch": 0.07062321893197869, "grad_norm": 0.0004570016171783209, "learning_rate": 0.00014108910891089108, "loss": 0.0001, "step": 570 }, { "epoch": 0.07124272085243465, "grad_norm": 0.0003963381750509143, "learning_rate": 0.00014232673267326732, "loss": 0.0001, "step": 575 }, { "epoch": 0.0718622227728906, "grad_norm": 0.0006178012117743492, "learning_rate": 0.00014356435643564356, "loss": 0.0001, "step": 580 }, { "epoch": 0.07248172469334654, "grad_norm": 0.00038247296470217407, "learning_rate": 0.0001448019801980198, "loss": 0.0001, "step": 585 }, { "epoch": 0.0731012266138025, "grad_norm": 0.0005610303487628698, "learning_rate": 0.00014603960396039603, "loss": 0.0001, "step": 590 }, { "epoch": 0.07372072853425846, "grad_norm": 0.0005213368567638099, "learning_rate": 0.00014727722772277227, "loss": 0.0001, "step": 595 }, { "epoch": 0.07434023045471441, "grad_norm": 0.0005507573368959129, "learning_rate": 0.0001485148514851485, "loss": 0.0001, "step": 600 }, { "epoch": 0.07495973237517037, "grad_norm": 0.003872029483318329, "learning_rate": 0.00014975247524752475, "loss": 0.0001, "step": 605 }, { "epoch": 0.07557923429562631, "grad_norm": 0.00032138984533958137, "learning_rate": 0.000150990099009901, "loss": 0.0001, "step": 610 }, { "epoch": 0.07619873621608227, "grad_norm": 0.000464010750874877, "learning_rate": 0.00015222772277227723, "loss": 0.0001, "step": 615 }, { "epoch": 0.07681823813653822, "grad_norm": 0.0005143339512869716, "learning_rate": 0.00015346534653465347, "loss": 0.0001, "step": 620 }, { "epoch": 0.07743774005699418, "grad_norm": 0.0010108405258506536, "learning_rate": 0.0001547029702970297, "loss": 0.0001, "step": 625 }, { "epoch": 0.07805724197745013, "grad_norm": 0.0004447873798198998, "learning_rate": 0.00015594059405940594, "loss": 0.0001, "step": 630 }, { "epoch": 0.07867674389790609, "grad_norm": 0.0009173435973934829, "learning_rate": 0.00015717821782178218, "loss": 0.0001, "step": 635 }, { "epoch": 0.07929624581836203, "grad_norm": 0.0003478958969935775, "learning_rate": 0.00015841584158415842, "loss": 0.0, "step": 640 }, { "epoch": 0.07991574773881799, "grad_norm": 0.0007245603483170271, "learning_rate": 0.00015965346534653466, "loss": 0.0, "step": 645 }, { "epoch": 0.08053524965927394, "grad_norm": 0.00028074192232452333, "learning_rate": 0.0001608910891089109, "loss": 0.0, "step": 650 }, { "epoch": 0.0811547515797299, "grad_norm": 0.0005884894053451717, "learning_rate": 0.00016212871287128714, "loss": 0.0001, "step": 655 }, { "epoch": 0.08177425350018586, "grad_norm": 0.0006277436041273177, "learning_rate": 0.00016336633663366338, "loss": 0.0001, "step": 660 }, { "epoch": 0.0823937554206418, "grad_norm": 0.0007243048748932779, "learning_rate": 0.00016460396039603961, "loss": 0.0, "step": 665 }, { "epoch": 0.08301325734109775, "grad_norm": 0.00035753866541199386, "learning_rate": 0.00016584158415841585, "loss": 0.0006, "step": 670 }, { "epoch": 0.08363275926155371, "grad_norm": 0.010953194461762905, "learning_rate": 0.0001670792079207921, "loss": 0.0001, "step": 675 }, { "epoch": 0.08425226118200967, "grad_norm": 0.0017663134494796395, "learning_rate": 0.00016831683168316833, "loss": 0.0001, "step": 680 }, { "epoch": 0.08487176310246562, "grad_norm": 0.02589467354118824, "learning_rate": 0.00016955445544554457, "loss": 0.0001, "step": 685 }, { "epoch": 0.08549126502292156, "grad_norm": 0.0031152956653386354, "learning_rate": 0.0001707920792079208, "loss": 0.0001, "step": 690 }, { "epoch": 0.08611076694337752, "grad_norm": 0.0004953789757564664, "learning_rate": 0.00017202970297029705, "loss": 0.0, "step": 695 }, { "epoch": 0.08673026886383348, "grad_norm": 0.007253966294229031, "learning_rate": 0.00017326732673267329, "loss": 0.0001, "step": 700 }, { "epoch": 0.08734977078428943, "grad_norm": 0.0012269304133951664, "learning_rate": 0.00017450495049504952, "loss": 0.0001, "step": 705 }, { "epoch": 0.08796927270474539, "grad_norm": 0.004027728922665119, "learning_rate": 0.00017574257425742576, "loss": 0.0004, "step": 710 }, { "epoch": 0.08858877462520134, "grad_norm": 0.003470494644716382, "learning_rate": 0.000176980198019802, "loss": 0.0001, "step": 715 }, { "epoch": 0.08920827654565729, "grad_norm": 0.002473268425092101, "learning_rate": 0.00017821782178217824, "loss": 0.0001, "step": 720 }, { "epoch": 0.08982777846611324, "grad_norm": 0.0044687651097774506, "learning_rate": 0.00017945544554455448, "loss": 0.0001, "step": 725 }, { "epoch": 0.0904472803865692, "grad_norm": 0.00038652599323540926, "learning_rate": 0.00018069306930693072, "loss": 0.0001, "step": 730 }, { "epoch": 0.09106678230702515, "grad_norm": 0.0010647657327353954, "learning_rate": 0.00018193069306930696, "loss": 0.0001, "step": 735 }, { "epoch": 0.09168628422748111, "grad_norm": 0.0003727759176399559, "learning_rate": 0.0001831683168316832, "loss": 0.0001, "step": 740 }, { "epoch": 0.09230578614793705, "grad_norm": 0.0004803001938853413, "learning_rate": 0.00018440594059405943, "loss": 0.0001, "step": 745 }, { "epoch": 0.09292528806839301, "grad_norm": 0.02083461731672287, "learning_rate": 0.00018564356435643567, "loss": 0.0001, "step": 750 }, { "epoch": 0.09354478998884896, "grad_norm": 0.00023884052643552423, "learning_rate": 0.0001868811881188119, "loss": 0.0, "step": 755 }, { "epoch": 0.09416429190930492, "grad_norm": 0.0012363456189632416, "learning_rate": 0.00018811881188118812, "loss": 0.0002, "step": 760 }, { "epoch": 0.09478379382976088, "grad_norm": 0.002488932805135846, "learning_rate": 0.00018935643564356436, "loss": 0.0001, "step": 765 }, { "epoch": 0.09540329575021683, "grad_norm": 0.0016660866094753146, "learning_rate": 0.0001905940594059406, "loss": 0.0001, "step": 770 }, { "epoch": 0.09602279767067277, "grad_norm": 0.0009721709066070616, "learning_rate": 0.00019183168316831684, "loss": 0.0003, "step": 775 }, { "epoch": 0.09664229959112873, "grad_norm": 0.18908309936523438, "learning_rate": 0.00019306930693069308, "loss": 0.0014, "step": 780 }, { "epoch": 0.09726180151158469, "grad_norm": 0.003752433927729726, "learning_rate": 0.00019430693069306932, "loss": 0.0005, "step": 785 }, { "epoch": 0.09788130343204064, "grad_norm": 0.1964350938796997, "learning_rate": 0.00019554455445544556, "loss": 0.0022, "step": 790 }, { "epoch": 0.0985008053524966, "grad_norm": 0.006680600345134735, "learning_rate": 0.0001967821782178218, "loss": 0.0007, "step": 795 }, { "epoch": 0.09912030727295254, "grad_norm": 0.04012683033943176, "learning_rate": 0.00019801980198019803, "loss": 0.0004, "step": 800 }, { "epoch": 0.0997398091934085, "grad_norm": 0.05125076323747635, "learning_rate": 0.00019925742574257427, "loss": 0.0002, "step": 805 }, { "epoch": 0.10035931111386445, "grad_norm": 0.023082932457327843, "learning_rate": 0.00019999996258053055, "loss": 0.0003, "step": 810 }, { "epoch": 0.10097881303432041, "grad_norm": 0.05832625553011894, "learning_rate": 0.0001999995416118208, "loss": 0.0002, "step": 815 }, { "epoch": 0.10159831495477636, "grad_norm": 0.12272185832262039, "learning_rate": 0.00019999865290204008, "loss": 0.0004, "step": 820 }, { "epoch": 0.1022178168752323, "grad_norm": 0.016897963359951973, "learning_rate": 0.0001999972964553453, "loss": 0.0001, "step": 825 }, { "epoch": 0.10283731879568826, "grad_norm": 0.010411771014332771, "learning_rate": 0.0001999954722780811, "loss": 0.0001, "step": 830 }, { "epoch": 0.10345682071614422, "grad_norm": 0.00578752625733614, "learning_rate": 0.00019999318037877995, "loss": 0.0001, "step": 835 }, { "epoch": 0.10407632263660017, "grad_norm": 0.001388857839629054, "learning_rate": 0.00019999042076816214, "loss": 0.0002, "step": 840 }, { "epoch": 0.10469582455705613, "grad_norm": 0.00042960603605024517, "learning_rate": 0.00019998719345913548, "loss": 0.0001, "step": 845 }, { "epoch": 0.10531532647751209, "grad_norm": 0.30472713708877563, "learning_rate": 0.00019998349846679547, "loss": 0.0039, "step": 850 }, { "epoch": 0.10593482839796803, "grad_norm": 0.025928743183612823, "learning_rate": 0.00019997933580842526, "loss": 0.0011, "step": 855 }, { "epoch": 0.10655433031842398, "grad_norm": 0.007612328976392746, "learning_rate": 0.00019997470550349534, "loss": 0.0019, "step": 860 }, { "epoch": 0.10717383223887994, "grad_norm": 0.002156597562134266, "learning_rate": 0.0001999696075736637, "loss": 0.0005, "step": 865 }, { "epoch": 0.1077933341593359, "grad_norm": 0.0008489580941386521, "learning_rate": 0.00019996404204277552, "loss": 0.0001, "step": 870 }, { "epoch": 0.10841283607979185, "grad_norm": 0.00182132248301059, "learning_rate": 0.0001999580089368632, "loss": 0.0001, "step": 875 }, { "epoch": 0.1090323380002478, "grad_norm": 0.047709498554468155, "learning_rate": 0.0001999515082841462, "loss": 0.0002, "step": 880 }, { "epoch": 0.10965183992070375, "grad_norm": 0.013201555237174034, "learning_rate": 0.00019994454011503085, "loss": 0.0002, "step": 885 }, { "epoch": 0.1102713418411597, "grad_norm": 0.00459629762917757, "learning_rate": 0.00019993710446211034, "loss": 0.0009, "step": 890 }, { "epoch": 0.11089084376161566, "grad_norm": 0.0028362020384520292, "learning_rate": 0.00019992920136016439, "loss": 0.0001, "step": 895 }, { "epoch": 0.11151034568207162, "grad_norm": 0.0009640655480325222, "learning_rate": 0.00019992083084615923, "loss": 0.0009, "step": 900 }, { "epoch": 0.11212984760252757, "grad_norm": 0.045145198702812195, "learning_rate": 0.00019991199295924737, "loss": 0.0002, "step": 905 }, { "epoch": 0.11274934952298352, "grad_norm": 0.001918994472362101, "learning_rate": 0.00019990268774076742, "loss": 0.0003, "step": 910 }, { "epoch": 0.11336885144343947, "grad_norm": 0.0031678332015872, "learning_rate": 0.00019989291523424396, "loss": 0.0001, "step": 915 }, { "epoch": 0.11398835336389543, "grad_norm": 0.059177737683057785, "learning_rate": 0.00019988267548538718, "loss": 0.0001, "step": 920 }, { "epoch": 0.11460785528435138, "grad_norm": 0.0007660340052098036, "learning_rate": 0.00019987196854209278, "loss": 0.001, "step": 925 }, { "epoch": 0.11522735720480734, "grad_norm": 0.0007047258550301194, "learning_rate": 0.00019986079445444182, "loss": 0.0001, "step": 930 }, { "epoch": 0.11584685912526328, "grad_norm": 0.0018356975633651018, "learning_rate": 0.00019984915327470032, "loss": 0.0001, "step": 935 }, { "epoch": 0.11646636104571924, "grad_norm": 0.031389400362968445, "learning_rate": 0.0001998370450573191, "loss": 0.0004, "step": 940 }, { "epoch": 0.1170858629661752, "grad_norm": 0.00041564652929082513, "learning_rate": 0.00019982446985893356, "loss": 0.0002, "step": 945 }, { "epoch": 0.11770536488663115, "grad_norm": 0.0003845066821668297, "learning_rate": 0.00019981142773836324, "loss": 0.0, "step": 950 }, { "epoch": 0.1183248668070871, "grad_norm": 0.032493408769369125, "learning_rate": 0.0001997979187566119, "loss": 0.0006, "step": 955 }, { "epoch": 0.11894436872754306, "grad_norm": 0.0030908240005373955, "learning_rate": 0.00019978394297686677, "loss": 0.0001, "step": 960 }, { "epoch": 0.119563870647999, "grad_norm": 0.0002424619160592556, "learning_rate": 0.00019976950046449873, "loss": 0.0, "step": 965 }, { "epoch": 0.12018337256845496, "grad_norm": 0.0004731389053631574, "learning_rate": 0.00019975459128706156, "loss": 0.0002, "step": 970 }, { "epoch": 0.12080287448891092, "grad_norm": 0.0016735541867092252, "learning_rate": 0.00019973921551429195, "loss": 0.0002, "step": 975 }, { "epoch": 0.12142237640936687, "grad_norm": 0.022409195080399513, "learning_rate": 0.00019972337321810904, "loss": 0.0002, "step": 980 }, { "epoch": 0.12204187832982283, "grad_norm": 0.0010909795528277755, "learning_rate": 0.0001997070644726141, "loss": 0.0, "step": 985 }, { "epoch": 0.12266138025027877, "grad_norm": 0.0003083925985265523, "learning_rate": 0.00019969028935409017, "loss": 0.0, "step": 990 }, { "epoch": 0.12328088217073473, "grad_norm": 0.012686856091022491, "learning_rate": 0.0001996730479410017, "loss": 0.0, "step": 995 }, { "epoch": 0.12390038409119068, "grad_norm": 0.00014611794904340059, "learning_rate": 0.00019965534031399432, "loss": 0.0003, "step": 1000 }, { "epoch": 0.12451988601164664, "grad_norm": 0.0010019895853474736, "learning_rate": 0.00019963716655589417, "loss": 0.0004, "step": 1005 }, { "epoch": 0.12513938793210258, "grad_norm": 0.0009234255994670093, "learning_rate": 0.0001996185267517078, "loss": 0.0002, "step": 1010 }, { "epoch": 0.12575888985255854, "grad_norm": 0.005433406215161085, "learning_rate": 0.00019959942098862163, "loss": 0.0001, "step": 1015 }, { "epoch": 0.1263783917730145, "grad_norm": 0.0051355077885091305, "learning_rate": 0.00019957984935600157, "loss": 0.0001, "step": 1020 }, { "epoch": 0.12699789369347045, "grad_norm": 0.051757294684648514, "learning_rate": 0.0001995598119453926, "loss": 0.0001, "step": 1025 }, { "epoch": 0.1276173956139264, "grad_norm": 0.004595691338181496, "learning_rate": 0.00019953930885051834, "loss": 0.0001, "step": 1030 }, { "epoch": 0.12823689753438236, "grad_norm": 0.0019630291499197483, "learning_rate": 0.00019951834016728064, "loss": 0.0003, "step": 1035 }, { "epoch": 0.12885639945483832, "grad_norm": 0.003922370728105307, "learning_rate": 0.0001994969059937591, "loss": 0.0001, "step": 1040 }, { "epoch": 0.12947590137529427, "grad_norm": 0.002835423918440938, "learning_rate": 0.00019947500643021057, "loss": 0.0001, "step": 1045 }, { "epoch": 0.13009540329575023, "grad_norm": 0.004482944495975971, "learning_rate": 0.00019945264157906882, "loss": 0.0001, "step": 1050 }, { "epoch": 0.13071490521620616, "grad_norm": 0.0002481273259036243, "learning_rate": 0.00019942981154494392, "loss": 0.0, "step": 1055 }, { "epoch": 0.1313344071366621, "grad_norm": 0.0025857596192508936, "learning_rate": 0.00019940651643462177, "loss": 0.0, "step": 1060 }, { "epoch": 0.13195390905711807, "grad_norm": 0.0002530000638216734, "learning_rate": 0.0001993827563570637, "loss": 0.0001, "step": 1065 }, { "epoch": 0.13257341097757402, "grad_norm": 0.0006596549646928906, "learning_rate": 0.00019935853142340586, "loss": 0.0, "step": 1070 }, { "epoch": 0.13319291289802998, "grad_norm": 0.0005576589028351009, "learning_rate": 0.0001993338417469587, "loss": 0.0, "step": 1075 }, { "epoch": 0.13381241481848594, "grad_norm": 0.0009830187773332, "learning_rate": 0.00019930868744320656, "loss": 0.0, "step": 1080 }, { "epoch": 0.1344319167389419, "grad_norm": 0.021230095997452736, "learning_rate": 0.00019928306862980694, "loss": 0.0002, "step": 1085 }, { "epoch": 0.13505141865939785, "grad_norm": 0.00029107555747032166, "learning_rate": 0.00019925698542659013, "loss": 0.0, "step": 1090 }, { "epoch": 0.1356709205798538, "grad_norm": 0.0006564328796230257, "learning_rate": 0.00019923043795555854, "loss": 0.0, "step": 1095 }, { "epoch": 0.13629042250030976, "grad_norm": 0.0003910544328391552, "learning_rate": 0.00019920342634088616, "loss": 0.0, "step": 1100 }, { "epoch": 0.13690992442076572, "grad_norm": 0.2171882539987564, "learning_rate": 0.00019917595070891798, "loss": 0.0013, "step": 1105 }, { "epoch": 0.13752942634122164, "grad_norm": 0.018005263060331345, "learning_rate": 0.00019914801118816938, "loss": 0.0001, "step": 1110 }, { "epoch": 0.1381489282616776, "grad_norm": 0.0005760540952906013, "learning_rate": 0.00019911960790932565, "loss": 0.0001, "step": 1115 }, { "epoch": 0.13876843018213356, "grad_norm": 0.0002963422448374331, "learning_rate": 0.00019909074100524113, "loss": 0.0005, "step": 1120 }, { "epoch": 0.1393879321025895, "grad_norm": 0.0006331729819066823, "learning_rate": 0.0001990614106109388, "loss": 0.0002, "step": 1125 }, { "epoch": 0.14000743402304547, "grad_norm": 0.0007892021676525474, "learning_rate": 0.00019903161686360966, "loss": 0.0001, "step": 1130 }, { "epoch": 0.14062693594350142, "grad_norm": 0.001115883351303637, "learning_rate": 0.00019900135990261186, "loss": 0.0005, "step": 1135 }, { "epoch": 0.14124643786395738, "grad_norm": 0.10479993373155594, "learning_rate": 0.00019897063986947034, "loss": 0.0002, "step": 1140 }, { "epoch": 0.14186593978441334, "grad_norm": 0.0006630583084188402, "learning_rate": 0.00019893945690787593, "loss": 0.0001, "step": 1145 }, { "epoch": 0.1424854417048693, "grad_norm": 0.002615175908431411, "learning_rate": 0.00019890781116368484, "loss": 0.0001, "step": 1150 }, { "epoch": 0.14310494362532525, "grad_norm": 0.0008467686711810529, "learning_rate": 0.0001988757027849179, "loss": 0.0001, "step": 1155 }, { "epoch": 0.1437244455457812, "grad_norm": 0.00045287617831490934, "learning_rate": 0.00019884313192175987, "loss": 0.0001, "step": 1160 }, { "epoch": 0.14434394746623713, "grad_norm": 0.0009068161016330123, "learning_rate": 0.00019881009872655872, "loss": 0.0007, "step": 1165 }, { "epoch": 0.1449634493866931, "grad_norm": 0.12152829766273499, "learning_rate": 0.00019877660335382503, "loss": 0.0003, "step": 1170 }, { "epoch": 0.14558295130714904, "grad_norm": 0.007462367881089449, "learning_rate": 0.0001987426459602311, "loss": 0.0004, "step": 1175 }, { "epoch": 0.146202453227605, "grad_norm": 0.004900072235614061, "learning_rate": 0.00019870822670461032, "loss": 0.0001, "step": 1180 }, { "epoch": 0.14682195514806096, "grad_norm": 0.0038032953161746264, "learning_rate": 0.00019867334574795646, "loss": 0.0001, "step": 1185 }, { "epoch": 0.1474414570685169, "grad_norm": 0.03040175698697567, "learning_rate": 0.00019863800325342282, "loss": 0.0002, "step": 1190 }, { "epoch": 0.14806095898897287, "grad_norm": 0.0014615260297432542, "learning_rate": 0.0001986021993863215, "loss": 0.0, "step": 1195 }, { "epoch": 0.14868046090942882, "grad_norm": 0.011134659871459007, "learning_rate": 0.00019856593431412267, "loss": 0.0001, "step": 1200 }, { "epoch": 0.14929996282988478, "grad_norm": 0.5269059538841248, "learning_rate": 0.00019852920820645373, "loss": 0.0013, "step": 1205 }, { "epoch": 0.14991946475034074, "grad_norm": 0.3130854666233063, "learning_rate": 0.0001984920212350986, "loss": 0.0017, "step": 1210 }, { "epoch": 0.1505389666707967, "grad_norm": 0.053764209151268005, "learning_rate": 0.00019845437357399678, "loss": 0.0009, "step": 1215 }, { "epoch": 0.15115846859125262, "grad_norm": 0.04573635011911392, "learning_rate": 0.00019841626539924265, "loss": 0.0034, "step": 1220 }, { "epoch": 0.15177797051170858, "grad_norm": 0.03521274775266647, "learning_rate": 0.0001983776968890846, "loss": 0.0008, "step": 1225 }, { "epoch": 0.15239747243216453, "grad_norm": 0.004348814487457275, "learning_rate": 0.0001983386682239243, "loss": 0.0014, "step": 1230 }, { "epoch": 0.1530169743526205, "grad_norm": 0.045531272888183594, "learning_rate": 0.00019829917958631555, "loss": 0.0002, "step": 1235 }, { "epoch": 0.15363647627307644, "grad_norm": 0.0022551945876330137, "learning_rate": 0.0001982592311609639, "loss": 0.0005, "step": 1240 }, { "epoch": 0.1542559781935324, "grad_norm": 0.11713286489248276, "learning_rate": 0.00019821882313472532, "loss": 0.002, "step": 1245 }, { "epoch": 0.15487548011398836, "grad_norm": 0.01844358630478382, "learning_rate": 0.00019817795569660563, "loss": 0.0009, "step": 1250 }, { "epoch": 0.1554949820344443, "grad_norm": 0.012676713988184929, "learning_rate": 0.00019813662903775953, "loss": 0.0052, "step": 1255 }, { "epoch": 0.15611448395490027, "grad_norm": 0.4586104452610016, "learning_rate": 0.00019809484335148964, "loss": 0.0348, "step": 1260 }, { "epoch": 0.15673398587535622, "grad_norm": 0.0076586343348026276, "learning_rate": 0.00019805259883324565, "loss": 0.0006, "step": 1265 }, { "epoch": 0.15735348779581218, "grad_norm": 0.004846053197979927, "learning_rate": 0.00019800989568062347, "loss": 0.0004, "step": 1270 }, { "epoch": 0.1579729897162681, "grad_norm": 0.05919318646192551, "learning_rate": 0.00019796673409336413, "loss": 0.0005, "step": 1275 }, { "epoch": 0.15859249163672406, "grad_norm": 0.2159508317708969, "learning_rate": 0.00019792311427335312, "loss": 0.0013, "step": 1280 }, { "epoch": 0.15921199355718002, "grad_norm": 0.43903329968452454, "learning_rate": 0.00019787903642461913, "loss": 0.0008, "step": 1285 }, { "epoch": 0.15983149547763598, "grad_norm": 0.7132349014282227, "learning_rate": 0.0001978345007533333, "loss": 0.1284, "step": 1290 }, { "epoch": 0.16045099739809193, "grad_norm": 0.3137208819389343, "learning_rate": 0.00019778950746780825, "loss": 0.0083, "step": 1295 }, { "epoch": 0.1610704993185479, "grad_norm": 0.025275858119130135, "learning_rate": 0.000197744056778497, "loss": 0.0024, "step": 1300 }, { "epoch": 0.16169000123900384, "grad_norm": 0.46965697407722473, "learning_rate": 0.00019769814889799206, "loss": 0.0019, "step": 1305 }, { "epoch": 0.1623095031594598, "grad_norm": 0.08064667135477066, "learning_rate": 0.00019765178404102443, "loss": 0.0013, "step": 1310 }, { "epoch": 0.16292900507991576, "grad_norm": 0.008910354226827621, "learning_rate": 0.00019760496242446257, "loss": 0.0012, "step": 1315 }, { "epoch": 0.1635485070003717, "grad_norm": 0.02162902057170868, "learning_rate": 0.00019755768426731144, "loss": 0.0006, "step": 1320 }, { "epoch": 0.16416800892082767, "grad_norm": 0.0011170781217515469, "learning_rate": 0.0001975099497907114, "loss": 0.0006, "step": 1325 }, { "epoch": 0.1647875108412836, "grad_norm": 0.017614515498280525, "learning_rate": 0.0001974617592179372, "loss": 0.0004, "step": 1330 }, { "epoch": 0.16540701276173955, "grad_norm": 0.008366206660866737, "learning_rate": 0.00019741311277439704, "loss": 0.0004, "step": 1335 }, { "epoch": 0.1660265146821955, "grad_norm": 0.002770440885797143, "learning_rate": 0.00019736401068763134, "loss": 0.0004, "step": 1340 }, { "epoch": 0.16664601660265146, "grad_norm": 0.00948986504226923, "learning_rate": 0.0001973144531873117, "loss": 0.0002, "step": 1345 }, { "epoch": 0.16726551852310742, "grad_norm": 0.002766720484942198, "learning_rate": 0.00019726444050524003, "loss": 0.0005, "step": 1350 }, { "epoch": 0.16788502044356338, "grad_norm": 0.01357900071889162, "learning_rate": 0.0001972139728753473, "loss": 0.0001, "step": 1355 }, { "epoch": 0.16850452236401933, "grad_norm": 0.001577444258145988, "learning_rate": 0.00019716305053369234, "loss": 0.0008, "step": 1360 }, { "epoch": 0.1691240242844753, "grad_norm": 0.0039864592254161835, "learning_rate": 0.000197111673718461, "loss": 0.0001, "step": 1365 }, { "epoch": 0.16974352620493124, "grad_norm": 0.0007635201909579337, "learning_rate": 0.0001970598426699648, "loss": 0.0008, "step": 1370 }, { "epoch": 0.1703630281253872, "grad_norm": 0.0008222947362810373, "learning_rate": 0.00019700755763063998, "loss": 0.0001, "step": 1375 }, { "epoch": 0.17098253004584313, "grad_norm": 0.007679250091314316, "learning_rate": 0.0001969548188450463, "loss": 0.0001, "step": 1380 }, { "epoch": 0.17160203196629908, "grad_norm": 0.0015139685710892081, "learning_rate": 0.00019690162655986582, "loss": 0.0001, "step": 1385 }, { "epoch": 0.17222153388675504, "grad_norm": 0.0041490960866212845, "learning_rate": 0.0001968479810239018, "loss": 0.0015, "step": 1390 }, { "epoch": 0.172841035807211, "grad_norm": 0.008305913768708706, "learning_rate": 0.0001967938824880777, "loss": 0.0004, "step": 1395 }, { "epoch": 0.17346053772766695, "grad_norm": 0.02154563181102276, "learning_rate": 0.00019673933120543564, "loss": 0.0042, "step": 1400 }, { "epoch": 0.1740800396481229, "grad_norm": 0.0028958581387996674, "learning_rate": 0.0001966843274311356, "loss": 0.0002, "step": 1405 }, { "epoch": 0.17469954156857886, "grad_norm": 0.001956045627593994, "learning_rate": 0.00019662887142245401, "loss": 0.0008, "step": 1410 }, { "epoch": 0.17531904348903482, "grad_norm": 0.08150173723697662, "learning_rate": 0.00019657296343878256, "loss": 0.0003, "step": 1415 }, { "epoch": 0.17593854540949078, "grad_norm": 0.0035198316909372807, "learning_rate": 0.00019651660374162707, "loss": 0.0003, "step": 1420 }, { "epoch": 0.17655804732994673, "grad_norm": 0.00872136652469635, "learning_rate": 0.0001964597925946062, "loss": 0.0002, "step": 1425 }, { "epoch": 0.1771775492504027, "grad_norm": 0.002725751604884863, "learning_rate": 0.00019640253026345024, "loss": 0.0002, "step": 1430 }, { "epoch": 0.17779705117085862, "grad_norm": 0.01025388389825821, "learning_rate": 0.0001963448170159998, "loss": 0.0001, "step": 1435 }, { "epoch": 0.17841655309131457, "grad_norm": 0.005298434756696224, "learning_rate": 0.00019628665312220475, "loss": 0.0002, "step": 1440 }, { "epoch": 0.17903605501177053, "grad_norm": 0.0019736161921173334, "learning_rate": 0.00019622803885412275, "loss": 0.0002, "step": 1445 }, { "epoch": 0.17965555693222648, "grad_norm": 0.001888057915493846, "learning_rate": 0.00019616897448591802, "loss": 0.0001, "step": 1450 }, { "epoch": 0.18027505885268244, "grad_norm": 0.0022415723651647568, "learning_rate": 0.00019610946029386016, "loss": 0.0001, "step": 1455 }, { "epoch": 0.1808945607731384, "grad_norm": 0.012730030342936516, "learning_rate": 0.00019604949655632279, "loss": 0.0001, "step": 1460 }, { "epoch": 0.18151406269359435, "grad_norm": 0.0006294928025454283, "learning_rate": 0.00019598908355378218, "loss": 0.0001, "step": 1465 }, { "epoch": 0.1821335646140503, "grad_norm": 0.00037341751158237457, "learning_rate": 0.00019592822156881608, "loss": 0.0001, "step": 1470 }, { "epoch": 0.18275306653450626, "grad_norm": 0.03415524959564209, "learning_rate": 0.00019586691088610225, "loss": 0.0001, "step": 1475 }, { "epoch": 0.18337256845496222, "grad_norm": 0.0003992998390458524, "learning_rate": 0.00019580515179241733, "loss": 0.0, "step": 1480 }, { "epoch": 0.18399207037541818, "grad_norm": 0.002114582108333707, "learning_rate": 0.00019574294457663522, "loss": 0.0001, "step": 1485 }, { "epoch": 0.1846115722958741, "grad_norm": 0.0005940650589764118, "learning_rate": 0.00019568028952972596, "loss": 0.0001, "step": 1490 }, { "epoch": 0.18523107421633006, "grad_norm": 0.0019096137257292867, "learning_rate": 0.0001956171869447543, "loss": 0.0, "step": 1495 }, { "epoch": 0.18585057613678602, "grad_norm": 0.05786804109811783, "learning_rate": 0.00019555363711687833, "loss": 0.0005, "step": 1500 }, { "epoch": 0.18647007805724197, "grad_norm": 0.003543251659721136, "learning_rate": 0.00019548964034334798, "loss": 0.0001, "step": 1505 }, { "epoch": 0.18708957997769793, "grad_norm": 0.03845427185297012, "learning_rate": 0.00019542519692350386, "loss": 0.001, "step": 1510 }, { "epoch": 0.18770908189815388, "grad_norm": 0.0011573415249586105, "learning_rate": 0.00019536030715877574, "loss": 0.0001, "step": 1515 }, { "epoch": 0.18832858381860984, "grad_norm": 0.015640288591384888, "learning_rate": 0.000195294971352681, "loss": 0.0004, "step": 1520 }, { "epoch": 0.1889480857390658, "grad_norm": 0.003193578217178583, "learning_rate": 0.00019522918981082347, "loss": 0.0006, "step": 1525 }, { "epoch": 0.18956758765952175, "grad_norm": 0.0019035928416997194, "learning_rate": 0.0001951629628408919, "loss": 0.0002, "step": 1530 }, { "epoch": 0.1901870895799777, "grad_norm": 0.0008361918735317886, "learning_rate": 0.0001950962907526584, "loss": 0.0001, "step": 1535 }, { "epoch": 0.19080659150043366, "grad_norm": 0.002178671769797802, "learning_rate": 0.00019502917385797716, "loss": 0.0, "step": 1540 }, { "epoch": 0.1914260934208896, "grad_norm": 0.000307333713863045, "learning_rate": 0.0001949616124707829, "loss": 0.0001, "step": 1545 }, { "epoch": 0.19204559534134555, "grad_norm": 0.0008854054030962288, "learning_rate": 0.00019489360690708938, "loss": 0.0001, "step": 1550 }, { "epoch": 0.1926650972618015, "grad_norm": 0.000268277944996953, "learning_rate": 0.00019482515748498806, "loss": 0.0, "step": 1555 }, { "epoch": 0.19328459918225746, "grad_norm": 0.0006831574137322605, "learning_rate": 0.00019475626452464647, "loss": 0.0, "step": 1560 }, { "epoch": 0.19390410110271342, "grad_norm": 0.001071661477908492, "learning_rate": 0.00019468692834830674, "loss": 0.0001, "step": 1565 }, { "epoch": 0.19452360302316937, "grad_norm": 0.0006375533994287252, "learning_rate": 0.00019461714928028408, "loss": 0.0, "step": 1570 }, { "epoch": 0.19514310494362533, "grad_norm": 0.12698017060756683, "learning_rate": 0.00019454692764696546, "loss": 0.0005, "step": 1575 }, { "epoch": 0.19576260686408128, "grad_norm": 0.014129637740552425, "learning_rate": 0.00019447626377680773, "loss": 0.0001, "step": 1580 }, { "epoch": 0.19638210878453724, "grad_norm": 0.0004987851716578007, "learning_rate": 0.00019440515800033637, "loss": 0.0, "step": 1585 }, { "epoch": 0.1970016107049932, "grad_norm": 0.00453907810151577, "learning_rate": 0.0001943336106501438, "loss": 0.0001, "step": 1590 }, { "epoch": 0.19762111262544915, "grad_norm": 0.0011737572494894266, "learning_rate": 0.0001942616220608879, "loss": 0.0001, "step": 1595 }, { "epoch": 0.19824061454590508, "grad_norm": 0.0007884913356974721, "learning_rate": 0.00019418919256929042, "loss": 0.0007, "step": 1600 }, { "epoch": 0.19886011646636104, "grad_norm": 0.003918599337339401, "learning_rate": 0.00019411632251413542, "loss": 0.0001, "step": 1605 }, { "epoch": 0.199479618386817, "grad_norm": 0.004090205300599337, "learning_rate": 0.0001940430122362676, "loss": 0.0004, "step": 1610 }, { "epoch": 0.20009912030727295, "grad_norm": 0.0007981263916008174, "learning_rate": 0.00019396926207859084, "loss": 0.0001, "step": 1615 }, { "epoch": 0.2007186222277289, "grad_norm": 0.0004586945869959891, "learning_rate": 0.00019389507238606651, "loss": 0.0002, "step": 1620 }, { "epoch": 0.20133812414818486, "grad_norm": 0.0034128485713154078, "learning_rate": 0.00019382044350571177, "loss": 0.0001, "step": 1625 }, { "epoch": 0.20195762606864082, "grad_norm": 0.003208999754860997, "learning_rate": 0.00019374537578659826, "loss": 0.0001, "step": 1630 }, { "epoch": 0.20257712798909677, "grad_norm": 0.0005963240400888026, "learning_rate": 0.0001936698695798501, "loss": 0.0001, "step": 1635 }, { "epoch": 0.20319662990955273, "grad_norm": 0.00044395984150469303, "learning_rate": 0.00019359392523864242, "loss": 0.0001, "step": 1640 }, { "epoch": 0.20381613183000868, "grad_norm": 0.004007943440228701, "learning_rate": 0.00019351754311819976, "loss": 0.0001, "step": 1645 }, { "epoch": 0.2044356337504646, "grad_norm": 0.00035174479125998914, "learning_rate": 0.00019344072357579427, "loss": 0.0001, "step": 1650 }, { "epoch": 0.20505513567092057, "grad_norm": 0.0006279583321884274, "learning_rate": 0.00019336346697074422, "loss": 0.0, "step": 1655 }, { "epoch": 0.20567463759137652, "grad_norm": 0.0008990754140540957, "learning_rate": 0.00019328577366441207, "loss": 0.0, "step": 1660 }, { "epoch": 0.20629413951183248, "grad_norm": 0.0004474915622267872, "learning_rate": 0.000193207644020203, "loss": 0.0, "step": 1665 }, { "epoch": 0.20691364143228844, "grad_norm": 0.00037963371141813695, "learning_rate": 0.0001931290784035631, "loss": 0.0001, "step": 1670 }, { "epoch": 0.2075331433527444, "grad_norm": 0.001648230361752212, "learning_rate": 0.00019305007718197777, "loss": 0.0, "step": 1675 }, { "epoch": 0.20815264527320035, "grad_norm": 0.00040166027611121535, "learning_rate": 0.00019297064072496984, "loss": 0.0, "step": 1680 }, { "epoch": 0.2087721471936563, "grad_norm": 0.00032061603269539773, "learning_rate": 0.00019289076940409792, "loss": 0.0, "step": 1685 }, { "epoch": 0.20939164911411226, "grad_norm": 0.0004227515310049057, "learning_rate": 0.0001928104635929547, "loss": 0.0, "step": 1690 }, { "epoch": 0.21001115103456822, "grad_norm": 0.0002632966497913003, "learning_rate": 0.00019272972366716525, "loss": 0.0, "step": 1695 }, { "epoch": 0.21063065295502417, "grad_norm": 0.0011797933839261532, "learning_rate": 0.00019264855000438496, "loss": 0.0, "step": 1700 }, { "epoch": 0.2112501548754801, "grad_norm": 0.0002971686189994216, "learning_rate": 0.00019256694298429818, "loss": 0.0, "step": 1705 }, { "epoch": 0.21186965679593606, "grad_norm": 0.0005524231237359345, "learning_rate": 0.00019248490298861626, "loss": 0.0, "step": 1710 }, { "epoch": 0.212489158716392, "grad_norm": 0.00020437220518942922, "learning_rate": 0.00019240243040107567, "loss": 0.0, "step": 1715 }, { "epoch": 0.21310866063684797, "grad_norm": 0.0005050025647506118, "learning_rate": 0.00019231952560743633, "loss": 0.0, "step": 1720 }, { "epoch": 0.21372816255730392, "grad_norm": 0.000392941408790648, "learning_rate": 0.0001922361889954798, "loss": 0.0001, "step": 1725 }, { "epoch": 0.21434766447775988, "grad_norm": 0.0009142369963228703, "learning_rate": 0.00019215242095500744, "loss": 0.0, "step": 1730 }, { "epoch": 0.21496716639821584, "grad_norm": 0.00020727685478050262, "learning_rate": 0.0001920682218778386, "loss": 0.0, "step": 1735 }, { "epoch": 0.2155866683186718, "grad_norm": 0.0003952252445742488, "learning_rate": 0.0001919835921578087, "loss": 0.0, "step": 1740 }, { "epoch": 0.21620617023912775, "grad_norm": 0.00013894705625716597, "learning_rate": 0.00019189853219076753, "loss": 0.0, "step": 1745 }, { "epoch": 0.2168256721595837, "grad_norm": 0.00021790718892589211, "learning_rate": 0.0001918130423745773, "loss": 0.0, "step": 1750 }, { "epoch": 0.21744517408003966, "grad_norm": 0.0007013051654212177, "learning_rate": 0.00019172712310911084, "loss": 0.0, "step": 1755 }, { "epoch": 0.2180646760004956, "grad_norm": 0.00016509837587364018, "learning_rate": 0.00019164077479624973, "loss": 0.0, "step": 1760 }, { "epoch": 0.21868417792095154, "grad_norm": 0.0001544052065582946, "learning_rate": 0.00019155399783988227, "loss": 0.0, "step": 1765 }, { "epoch": 0.2193036798414075, "grad_norm": 0.00018853796063922346, "learning_rate": 0.00019146679264590182, "loss": 0.0, "step": 1770 }, { "epoch": 0.21992318176186346, "grad_norm": 0.0003170891432091594, "learning_rate": 0.00019137915962220476, "loss": 0.0, "step": 1775 }, { "epoch": 0.2205426836823194, "grad_norm": 0.00026965016149915755, "learning_rate": 0.00019129109917868863, "loss": 0.0001, "step": 1780 }, { "epoch": 0.22116218560277537, "grad_norm": 0.00023218896239995956, "learning_rate": 0.00019120261172725012, "loss": 0.0, "step": 1785 }, { "epoch": 0.22178168752323132, "grad_norm": 0.0021776568610221148, "learning_rate": 0.0001911136976817833, "loss": 0.0, "step": 1790 }, { "epoch": 0.22240118944368728, "grad_norm": 0.00010594737250357866, "learning_rate": 0.00019102435745817765, "loss": 0.0, "step": 1795 }, { "epoch": 0.22302069136414324, "grad_norm": 0.0008549446356482804, "learning_rate": 0.00019093459147431592, "loss": 0.0, "step": 1800 }, { "epoch": 0.2236401932845992, "grad_norm": 0.00015743107360322028, "learning_rate": 0.00019084440015007246, "loss": 0.0, "step": 1805 }, { "epoch": 0.22425969520505515, "grad_norm": 0.00035693394602276385, "learning_rate": 0.00019075378390731107, "loss": 0.0, "step": 1810 }, { "epoch": 0.22487919712551108, "grad_norm": 0.0007437100866809487, "learning_rate": 0.00019066274316988305, "loss": 0.0002, "step": 1815 }, { "epoch": 0.22549869904596703, "grad_norm": 0.00031910199322737753, "learning_rate": 0.00019057127836362528, "loss": 0.0, "step": 1820 }, { "epoch": 0.226118200966423, "grad_norm": 0.00017805799143388867, "learning_rate": 0.0001904793899163582, "loss": 0.0, "step": 1825 }, { "epoch": 0.22673770288687894, "grad_norm": 0.0005008209845982492, "learning_rate": 0.00019038707825788377, "loss": 0.0, "step": 1830 }, { "epoch": 0.2273572048073349, "grad_norm": 0.0010279848938807845, "learning_rate": 0.0001902943438199835, "loss": 0.0, "step": 1835 }, { "epoch": 0.22797670672779086, "grad_norm": 0.00014174918760545552, "learning_rate": 0.00019020118703641647, "loss": 0.0, "step": 1840 }, { "epoch": 0.2285962086482468, "grad_norm": 0.0018335055792704225, "learning_rate": 0.00019010760834291718, "loss": 0.0, "step": 1845 }, { "epoch": 0.22921571056870277, "grad_norm": 0.00025655582430772483, "learning_rate": 0.00019001360817719364, "loss": 0.0, "step": 1850 }, { "epoch": 0.22983521248915872, "grad_norm": 0.000164328288519755, "learning_rate": 0.00018991918697892524, "loss": 0.0, "step": 1855 }, { "epoch": 0.23045471440961468, "grad_norm": 0.00021767888392787427, "learning_rate": 0.00018982434518976073, "loss": 0.0, "step": 1860 }, { "epoch": 0.23107421633007064, "grad_norm": 9.736277570482343e-05, "learning_rate": 0.0001897290832533161, "loss": 0.0, "step": 1865 }, { "epoch": 0.23169371825052656, "grad_norm": 0.031168675050139427, "learning_rate": 0.0001896334016151727, "loss": 0.0, "step": 1870 }, { "epoch": 0.23231322017098252, "grad_norm": 0.00011456626816652715, "learning_rate": 0.0001895373007228748, "loss": 0.0001, "step": 1875 }, { "epoch": 0.23293272209143848, "grad_norm": 0.00011591204383876175, "learning_rate": 0.00018944078102592785, "loss": 0.0, "step": 1880 }, { "epoch": 0.23355222401189443, "grad_norm": 0.004366991110146046, "learning_rate": 0.00018934384297579617, "loss": 0.0, "step": 1885 }, { "epoch": 0.2341717259323504, "grad_norm": 0.00017258629668504, "learning_rate": 0.00018924648702590093, "loss": 0.0, "step": 1890 }, { "epoch": 0.23479122785280634, "grad_norm": 0.00041053423774428666, "learning_rate": 0.00018914871363161795, "loss": 0.0, "step": 1895 }, { "epoch": 0.2354107297732623, "grad_norm": 0.00011154228559462354, "learning_rate": 0.00018905052325027567, "loss": 0.0, "step": 1900 }, { "epoch": 0.23603023169371826, "grad_norm": 0.001997201004996896, "learning_rate": 0.00018895191634115291, "loss": 0.0, "step": 1905 }, { "epoch": 0.2366497336141742, "grad_norm": 0.00017417450726497918, "learning_rate": 0.00018885289336547682, "loss": 0.0, "step": 1910 }, { "epoch": 0.23726923553463017, "grad_norm": 0.0021275924518704414, "learning_rate": 0.00018875345478642068, "loss": 0.0, "step": 1915 }, { "epoch": 0.23788873745508612, "grad_norm": 0.00013825582573190331, "learning_rate": 0.00018865360106910163, "loss": 0.0, "step": 1920 }, { "epoch": 0.23850823937554205, "grad_norm": 0.00018638362234923989, "learning_rate": 0.00018855333268057872, "loss": 0.0, "step": 1925 }, { "epoch": 0.239127741295998, "grad_norm": 0.00014317786553874612, "learning_rate": 0.00018845265008985047, "loss": 0.0, "step": 1930 }, { "epoch": 0.23974724321645396, "grad_norm": 0.0002915103978011757, "learning_rate": 0.00018835155376785293, "loss": 0.0, "step": 1935 }, { "epoch": 0.24036674513690992, "grad_norm": 0.00011501055269036442, "learning_rate": 0.00018825004418745724, "loss": 0.0, "step": 1940 }, { "epoch": 0.24098624705736588, "grad_norm": 8.360140782315284e-05, "learning_rate": 0.00018814812182346762, "loss": 0.0, "step": 1945 }, { "epoch": 0.24160574897782183, "grad_norm": 0.0002894556673709303, "learning_rate": 0.00018804578715261898, "loss": 0.0002, "step": 1950 }, { "epoch": 0.2422252508982778, "grad_norm": 0.03497488424181938, "learning_rate": 0.00018794304065357479, "loss": 0.0001, "step": 1955 }, { "epoch": 0.24284475281873374, "grad_norm": 0.00044845009688287973, "learning_rate": 0.00018783988280692487, "loss": 0.0001, "step": 1960 }, { "epoch": 0.2434642547391897, "grad_norm": 0.00015136846923269331, "learning_rate": 0.00018773631409518297, "loss": 0.0, "step": 1965 }, { "epoch": 0.24408375665964566, "grad_norm": 0.0002044896682491526, "learning_rate": 0.0001876323350027848, "loss": 0.0, "step": 1970 }, { "epoch": 0.24470325858010158, "grad_norm": 0.0001959316577995196, "learning_rate": 0.00018752794601608548, "loss": 0.0, "step": 1975 }, { "epoch": 0.24532276050055754, "grad_norm": 0.0005982258589938283, "learning_rate": 0.0001874231476233574, "loss": 0.0, "step": 1980 }, { "epoch": 0.2459422624210135, "grad_norm": 0.00031565167591907084, "learning_rate": 0.00018731794031478794, "loss": 0.0, "step": 1985 }, { "epoch": 0.24656176434146945, "grad_norm": 0.0002690436667762697, "learning_rate": 0.00018721232458247716, "loss": 0.0, "step": 1990 }, { "epoch": 0.2471812662619254, "grad_norm": 0.0001720521249808371, "learning_rate": 0.00018710630092043541, "loss": 0.0, "step": 1995 }, { "epoch": 0.24780076818238136, "grad_norm": 0.0003437872801441699, "learning_rate": 0.0001869998698245813, "loss": 0.0, "step": 2000 }, { "epoch": 0.24842027010283732, "grad_norm": 0.00043409023783169687, "learning_rate": 0.00018689303179273895, "loss": 0.0, "step": 2005 }, { "epoch": 0.24903977202329328, "grad_norm": 0.00013267307076603174, "learning_rate": 0.00018678578732463606, "loss": 0.0, "step": 2010 }, { "epoch": 0.24965927394374923, "grad_norm": 0.00018134865968022496, "learning_rate": 0.00018667813692190135, "loss": 0.0, "step": 2015 }, { "epoch": 0.25027877586420516, "grad_norm": 0.00015442888252437115, "learning_rate": 0.00018657008108806226, "loss": 0.0, "step": 2020 }, { "epoch": 0.2508982777846611, "grad_norm": 0.0037410708609968424, "learning_rate": 0.00018646162032854262, "loss": 0.0, "step": 2025 }, { "epoch": 0.2515177797051171, "grad_norm": 0.00014964683214202523, "learning_rate": 0.00018635275515066027, "loss": 0.0, "step": 2030 }, { "epoch": 0.25213728162557303, "grad_norm": 0.00013305028551258147, "learning_rate": 0.00018624348606362473, "loss": 0.0, "step": 2035 }, { "epoch": 0.252756783546029, "grad_norm": 9.897362178890035e-05, "learning_rate": 0.0001861338135785347, "loss": 0.0004, "step": 2040 }, { "epoch": 0.25337628546648494, "grad_norm": 0.0002119188429787755, "learning_rate": 0.0001860237382083758, "loss": 0.0, "step": 2045 }, { "epoch": 0.2539957873869409, "grad_norm": 0.01641119457781315, "learning_rate": 0.00018591326046801815, "loss": 0.0, "step": 2050 }, { "epoch": 0.25461528930739685, "grad_norm": 0.00011807310511358082, "learning_rate": 0.00018580238087421378, "loss": 0.0, "step": 2055 }, { "epoch": 0.2552347912278528, "grad_norm": 0.00033228175016120076, "learning_rate": 0.00018569109994559456, "loss": 0.0, "step": 2060 }, { "epoch": 0.25585429314830876, "grad_norm": 0.00018344639101997018, "learning_rate": 0.00018557941820266944, "loss": 0.0, "step": 2065 }, { "epoch": 0.2564737950687647, "grad_norm": 0.00011060066753998399, "learning_rate": 0.0001854673361678222, "loss": 0.0, "step": 2070 }, { "epoch": 0.2570932969892207, "grad_norm": 8.901116962078959e-05, "learning_rate": 0.00018535485436530898, "loss": 0.0, "step": 2075 }, { "epoch": 0.25771279890967663, "grad_norm": 0.00019972374138887972, "learning_rate": 0.00018524197332125576, "loss": 0.0, "step": 2080 }, { "epoch": 0.2583323008301326, "grad_norm": 0.00013531267177313566, "learning_rate": 0.00018512869356365595, "loss": 0.0, "step": 2085 }, { "epoch": 0.25895180275058854, "grad_norm": 0.000175971319549717, "learning_rate": 0.00018501501562236797, "loss": 0.0, "step": 2090 }, { "epoch": 0.2595713046710445, "grad_norm": 0.00010387874499429017, "learning_rate": 0.00018490094002911262, "loss": 0.0, "step": 2095 }, { "epoch": 0.26019080659150046, "grad_norm": 0.0001373535196762532, "learning_rate": 0.00018478646731747081, "loss": 0.0, "step": 2100 }, { "epoch": 0.2608103085119564, "grad_norm": 8.851318125380203e-05, "learning_rate": 0.0001846715980228808, "loss": 0.0, "step": 2105 }, { "epoch": 0.2614298104324123, "grad_norm": 0.00014913504128344357, "learning_rate": 0.00018455633268263602, "loss": 0.0, "step": 2110 }, { "epoch": 0.26204931235286827, "grad_norm": 0.00010552656021900475, "learning_rate": 0.00018444067183588222, "loss": 0.0, "step": 2115 }, { "epoch": 0.2626688142733242, "grad_norm": 0.000230904552154243, "learning_rate": 0.00018432461602361518, "loss": 0.0, "step": 2120 }, { "epoch": 0.2632883161937802, "grad_norm": 0.00010144018597202376, "learning_rate": 0.00018420816578867806, "loss": 0.0, "step": 2125 }, { "epoch": 0.26390781811423614, "grad_norm": 0.00010451846901560202, "learning_rate": 0.00018409132167575894, "loss": 0.0003, "step": 2130 }, { "epoch": 0.2645273200346921, "grad_norm": 0.00019561382941901684, "learning_rate": 0.0001839740842313883, "loss": 0.0, "step": 2135 }, { "epoch": 0.26514682195514805, "grad_norm": 0.0001312725798925385, "learning_rate": 0.00018385645400393626, "loss": 0.0, "step": 2140 }, { "epoch": 0.265766323875604, "grad_norm": 0.00047598761739209294, "learning_rate": 0.00018373843154361022, "loss": 0.0, "step": 2145 }, { "epoch": 0.26638582579605996, "grad_norm": 0.00014992771320976317, "learning_rate": 0.00018362001740245226, "loss": 0.0, "step": 2150 }, { "epoch": 0.2670053277165159, "grad_norm": 0.009201920591294765, "learning_rate": 0.0001835012121343365, "loss": 0.0, "step": 2155 }, { "epoch": 0.2676248296369719, "grad_norm": 0.00029466464184224606, "learning_rate": 0.00018338201629496646, "loss": 0.0001, "step": 2160 }, { "epoch": 0.26824433155742783, "grad_norm": 0.00014994775119703263, "learning_rate": 0.0001832624304418727, "loss": 0.0, "step": 2165 }, { "epoch": 0.2688638334778838, "grad_norm": 0.00012314421474002302, "learning_rate": 0.00018314245513440983, "loss": 0.0, "step": 2170 }, { "epoch": 0.26948333539833974, "grad_norm": 0.00013231039338279516, "learning_rate": 0.00018302209093375428, "loss": 0.0, "step": 2175 }, { "epoch": 0.2701028373187957, "grad_norm": 0.0001399478205712512, "learning_rate": 0.0001829013384029014, "loss": 0.0, "step": 2180 }, { "epoch": 0.27072233923925165, "grad_norm": 0.0002205887867603451, "learning_rate": 0.00018278019810666295, "loss": 0.0, "step": 2185 }, { "epoch": 0.2713418411597076, "grad_norm": 0.0001581410033395514, "learning_rate": 0.00018265867061166446, "loss": 0.0, "step": 2190 }, { "epoch": 0.27196134308016356, "grad_norm": 0.00011533142242114991, "learning_rate": 0.00018253675648634255, "loss": 0.0, "step": 2195 }, { "epoch": 0.2725808450006195, "grad_norm": 0.005214076954871416, "learning_rate": 0.00018241445630094228, "loss": 0.0, "step": 2200 }, { "epoch": 0.2732003469210755, "grad_norm": 8.166854968294501e-05, "learning_rate": 0.00018229177062751442, "loss": 0.0, "step": 2205 }, { "epoch": 0.27381984884153143, "grad_norm": 0.00016513287846464664, "learning_rate": 0.00018216870003991291, "loss": 0.0, "step": 2210 }, { "epoch": 0.2744393507619874, "grad_norm": 0.000974692462477833, "learning_rate": 0.00018204524511379212, "loss": 0.0, "step": 2215 }, { "epoch": 0.2750588526824433, "grad_norm": 0.00041982249240390956, "learning_rate": 0.00018192140642660402, "loss": 0.0, "step": 2220 }, { "epoch": 0.27567835460289924, "grad_norm": 0.0011037853546440601, "learning_rate": 0.0001817971845575957, "loss": 0.0, "step": 2225 }, { "epoch": 0.2762978565233552, "grad_norm": 7.91082638897933e-05, "learning_rate": 0.0001816725800878065, "loss": 0.0, "step": 2230 }, { "epoch": 0.27691735844381116, "grad_norm": 0.00013951574510429054, "learning_rate": 0.00018154759360006543, "loss": 0.0, "step": 2235 }, { "epoch": 0.2775368603642671, "grad_norm": 0.00011253332922933623, "learning_rate": 0.0001814222256789882, "loss": 0.0, "step": 2240 }, { "epoch": 0.27815636228472307, "grad_norm": 0.00018238971824757755, "learning_rate": 0.00018129647691097488, "loss": 0.0, "step": 2245 }, { "epoch": 0.278775864205179, "grad_norm": 8.32600417197682e-05, "learning_rate": 0.00018117034788420674, "loss": 0.0, "step": 2250 }, { "epoch": 0.279395366125635, "grad_norm": 9.163943468593061e-05, "learning_rate": 0.0001810438391886437, "loss": 0.0, "step": 2255 }, { "epoch": 0.28001486804609094, "grad_norm": 0.00028675931389443576, "learning_rate": 0.0001809169514160217, "loss": 0.0, "step": 2260 }, { "epoch": 0.2806343699665469, "grad_norm": 0.00017373080481775105, "learning_rate": 0.0001807896851598496, "loss": 0.0, "step": 2265 }, { "epoch": 0.28125387188700285, "grad_norm": 0.0002107757463818416, "learning_rate": 0.00018066204101540678, "loss": 0.0, "step": 2270 }, { "epoch": 0.2818733738074588, "grad_norm": 0.00011386910773580894, "learning_rate": 0.00018053401957973995, "loss": 0.0, "step": 2275 }, { "epoch": 0.28249287572791476, "grad_norm": 0.00022561063815373927, "learning_rate": 0.00018040562145166074, "loss": 0.0, "step": 2280 }, { "epoch": 0.2831123776483707, "grad_norm": 0.00015824096044525504, "learning_rate": 0.00018027684723174268, "loss": 0.0009, "step": 2285 }, { "epoch": 0.2837318795688267, "grad_norm": 0.0002232889091828838, "learning_rate": 0.00018014769752231844, "loss": 0.0, "step": 2290 }, { "epoch": 0.28435138148928263, "grad_norm": 0.00014339544577524066, "learning_rate": 0.00018001817292747702, "loss": 0.0, "step": 2295 }, { "epoch": 0.2849708834097386, "grad_norm": 6.143360951682553e-05, "learning_rate": 0.00017988827405306093, "loss": 0.0, "step": 2300 }, { "epoch": 0.28559038533019454, "grad_norm": 0.0002683981438167393, "learning_rate": 0.0001797580015066634, "loss": 0.0, "step": 2305 }, { "epoch": 0.2862098872506505, "grad_norm": 0.00014696561265736818, "learning_rate": 0.00017962735589762533, "loss": 0.0, "step": 2310 }, { "epoch": 0.28682938917110645, "grad_norm": 0.00011365246609784663, "learning_rate": 0.00017949633783703273, "loss": 0.0, "step": 2315 }, { "epoch": 0.2874488910915624, "grad_norm": 0.0001968323194887489, "learning_rate": 0.0001793649479377137, "loss": 0.0, "step": 2320 }, { "epoch": 0.28806839301201836, "grad_norm": 0.0001067871053237468, "learning_rate": 0.00017923318681423557, "loss": 0.0, "step": 2325 }, { "epoch": 0.28868789493247426, "grad_norm": 0.00020979139662813395, "learning_rate": 0.00017910105508290206, "loss": 0.0, "step": 2330 }, { "epoch": 0.2893073968529302, "grad_norm": 6.391115311998874e-05, "learning_rate": 0.00017896855336175035, "loss": 0.0, "step": 2335 }, { "epoch": 0.2899268987733862, "grad_norm": 8.320100459968671e-05, "learning_rate": 0.00017883568227054832, "loss": 0.0, "step": 2340 }, { "epoch": 0.29054640069384213, "grad_norm": 6.734030466759577e-05, "learning_rate": 0.00017870244243079145, "loss": 0.0, "step": 2345 }, { "epoch": 0.2911659026142981, "grad_norm": 0.00019551298464648426, "learning_rate": 0.00017856883446569998, "loss": 0.0, "step": 2350 }, { "epoch": 0.29178540453475404, "grad_norm": 0.00012009276542812586, "learning_rate": 0.0001784348590002162, "loss": 0.0, "step": 2355 }, { "epoch": 0.29240490645521, "grad_norm": 6.396598473656923e-05, "learning_rate": 0.00017830051666100122, "loss": 0.0, "step": 2360 }, { "epoch": 0.29302440837566596, "grad_norm": 0.00010196940274909139, "learning_rate": 0.00017816580807643222, "loss": 0.0, "step": 2365 }, { "epoch": 0.2936439102961219, "grad_norm": 0.00011442007962614298, "learning_rate": 0.00017803073387659944, "loss": 0.0, "step": 2370 }, { "epoch": 0.29426341221657787, "grad_norm": 0.0001426203380106017, "learning_rate": 0.0001778952946933033, "loss": 0.0, "step": 2375 }, { "epoch": 0.2948829141370338, "grad_norm": 9.849296475294977e-05, "learning_rate": 0.00017775949116005145, "loss": 0.0, "step": 2380 }, { "epoch": 0.2955024160574898, "grad_norm": 0.0001592986227478832, "learning_rate": 0.0001776233239120556, "loss": 0.0, "step": 2385 }, { "epoch": 0.29612191797794574, "grad_norm": 7.148679287638515e-05, "learning_rate": 0.00017748679358622885, "loss": 0.0, "step": 2390 }, { "epoch": 0.2967414198984017, "grad_norm": 0.00010711931099649519, "learning_rate": 0.0001773499008211826, "loss": 0.0, "step": 2395 }, { "epoch": 0.29736092181885765, "grad_norm": 0.00022670227917842567, "learning_rate": 0.0001772126462572234, "loss": 0.0, "step": 2400 }, { "epoch": 0.2979804237393136, "grad_norm": 8.951595373218879e-05, "learning_rate": 0.00017707503053635018, "loss": 0.0, "step": 2405 }, { "epoch": 0.29859992565976956, "grad_norm": 5.4322041250998154e-05, "learning_rate": 0.00017693705430225115, "loss": 0.0, "step": 2410 }, { "epoch": 0.2992194275802255, "grad_norm": 6.341623520711437e-05, "learning_rate": 0.00017679871820030073, "loss": 0.0, "step": 2415 }, { "epoch": 0.2998389295006815, "grad_norm": 0.00011064052523579448, "learning_rate": 0.0001766600228775567, "loss": 0.0, "step": 2420 }, { "epoch": 0.30045843142113743, "grad_norm": 7.799950981279835e-05, "learning_rate": 0.00017652096898275704, "loss": 0.0, "step": 2425 }, { "epoch": 0.3010779333415934, "grad_norm": 6.430044595617801e-05, "learning_rate": 0.00017638155716631686, "loss": 0.0, "step": 2430 }, { "epoch": 0.3016974352620493, "grad_norm": 0.00011371615983080119, "learning_rate": 0.00017624178808032552, "loss": 0.0, "step": 2435 }, { "epoch": 0.30231693718250524, "grad_norm": 0.00010884269431699067, "learning_rate": 0.00017610166237854339, "loss": 0.0, "step": 2440 }, { "epoch": 0.3029364391029612, "grad_norm": 0.0001215854863403365, "learning_rate": 0.00017596118071639896, "loss": 0.0, "step": 2445 }, { "epoch": 0.30355594102341715, "grad_norm": 0.00034932265407405794, "learning_rate": 0.00017582034375098564, "loss": 0.0, "step": 2450 }, { "epoch": 0.3041754429438731, "grad_norm": 0.00016452455020044, "learning_rate": 0.00017567915214105882, "loss": 0.0, "step": 2455 }, { "epoch": 0.30479494486432906, "grad_norm": 0.00011540481500560418, "learning_rate": 0.00017553760654703268, "loss": 0.0, "step": 2460 }, { "epoch": 0.305414446784785, "grad_norm": 8.892964251572266e-05, "learning_rate": 0.0001753957076309771, "loss": 0.0, "step": 2465 }, { "epoch": 0.306033948705241, "grad_norm": 0.00012043407332384959, "learning_rate": 0.00017525345605661464, "loss": 0.0, "step": 2470 }, { "epoch": 0.30665345062569693, "grad_norm": 6.319625390460715e-05, "learning_rate": 0.0001751108524893174, "loss": 0.0, "step": 2475 }, { "epoch": 0.3072729525461529, "grad_norm": 6.390988710336387e-05, "learning_rate": 0.00017496789759610388, "loss": 0.0, "step": 2480 }, { "epoch": 0.30789245446660884, "grad_norm": 6.89134030835703e-05, "learning_rate": 0.0001748245920456359, "loss": 0.0, "step": 2485 }, { "epoch": 0.3085119563870648, "grad_norm": 5.600175427389331e-05, "learning_rate": 0.00017468093650821543, "loss": 0.0, "step": 2490 }, { "epoch": 0.30913145830752076, "grad_norm": 0.00014325766824185848, "learning_rate": 0.00017453693165578153, "loss": 0.0, "step": 2495 }, { "epoch": 0.3097509602279767, "grad_norm": 7.282514707185328e-05, "learning_rate": 0.00017439257816190712, "loss": 0.0, "step": 2500 }, { "epoch": 0.31037046214843267, "grad_norm": 7.02428660588339e-05, "learning_rate": 0.00017424787670179586, "loss": 0.0, "step": 2505 }, { "epoch": 0.3109899640688886, "grad_norm": 5.522788706002757e-05, "learning_rate": 0.000174102827952279, "loss": 0.0, "step": 2510 }, { "epoch": 0.3116094659893446, "grad_norm": 0.00021377568191383034, "learning_rate": 0.00017395743259181225, "loss": 0.0, "step": 2515 }, { "epoch": 0.31222896790980054, "grad_norm": 6.341623520711437e-05, "learning_rate": 0.00017381169130047255, "loss": 0.0, "step": 2520 }, { "epoch": 0.3128484698302565, "grad_norm": 6.36400654911995e-05, "learning_rate": 0.00017366560475995488, "loss": 0.0, "step": 2525 }, { "epoch": 0.31346797175071245, "grad_norm": 0.0001394295395584777, "learning_rate": 0.0001735191736535691, "loss": 0.0, "step": 2530 }, { "epoch": 0.3140874736711684, "grad_norm": 8.54153695399873e-05, "learning_rate": 0.00017337239866623683, "loss": 0.0, "step": 2535 }, { "epoch": 0.31470697559162436, "grad_norm": 0.000122211073176004, "learning_rate": 0.00017322528048448806, "loss": 0.0, "step": 2540 }, { "epoch": 0.31532647751208026, "grad_norm": 5.572181908064522e-05, "learning_rate": 0.00017307781979645817, "loss": 0.0, "step": 2545 }, { "epoch": 0.3159459794325362, "grad_norm": 0.0001151581818703562, "learning_rate": 0.00017293001729188446, "loss": 0.0, "step": 2550 }, { "epoch": 0.3165654813529922, "grad_norm": 6.063346518203616e-05, "learning_rate": 0.00017278187366210314, "loss": 0.0, "step": 2555 }, { "epoch": 0.31718498327344813, "grad_norm": 0.00020429839787539095, "learning_rate": 0.000172633389600046, "loss": 0.0, "step": 2560 }, { "epoch": 0.3178044851939041, "grad_norm": 6.185399979585782e-05, "learning_rate": 0.00017248456580023716, "loss": 0.0, "step": 2565 }, { "epoch": 0.31842398711436004, "grad_norm": 9.439489076612517e-05, "learning_rate": 0.0001723354029587898, "loss": 0.0, "step": 2570 }, { "epoch": 0.319043489034816, "grad_norm": 0.0008876679348759353, "learning_rate": 0.00017218590177340307, "loss": 0.0, "step": 2575 }, { "epoch": 0.31966299095527195, "grad_norm": 0.00019878758757840842, "learning_rate": 0.00017203606294335854, "loss": 0.0, "step": 2580 }, { "epoch": 0.3202824928757279, "grad_norm": 0.0001071761071216315, "learning_rate": 0.00017188588716951725, "loss": 0.0, "step": 2585 }, { "epoch": 0.32090199479618386, "grad_norm": 4.953141979058273e-05, "learning_rate": 0.00017173537515431612, "loss": 0.0, "step": 2590 }, { "epoch": 0.3215214967166398, "grad_norm": 0.00011462245311122388, "learning_rate": 0.00017158452760176495, "loss": 0.0, "step": 2595 }, { "epoch": 0.3221409986370958, "grad_norm": 6.151345587568358e-05, "learning_rate": 0.0001714333452174429, "loss": 0.0, "step": 2600 }, { "epoch": 0.32276050055755173, "grad_norm": 0.00022295014059636742, "learning_rate": 0.00017128182870849532, "loss": 0.0, "step": 2605 }, { "epoch": 0.3233800024780077, "grad_norm": 0.00010550506704021245, "learning_rate": 0.00017112997878363038, "loss": 0.0, "step": 2610 }, { "epoch": 0.32399950439846364, "grad_norm": 5.513501673704013e-05, "learning_rate": 0.00017097779615311582, "loss": 0.0, "step": 2615 }, { "epoch": 0.3246190063189196, "grad_norm": 0.00030427344609051943, "learning_rate": 0.0001708252815287756, "loss": 0.0, "step": 2620 }, { "epoch": 0.32523850823937556, "grad_norm": 0.00010723331070039421, "learning_rate": 0.00017067243562398648, "loss": 0.0, "step": 2625 }, { "epoch": 0.3258580101598315, "grad_norm": 0.0001250340574188158, "learning_rate": 0.00017051925915367484, "loss": 0.0, "step": 2630 }, { "epoch": 0.32647751208028747, "grad_norm": 9.58742166403681e-05, "learning_rate": 0.00017036575283431319, "loss": 0.0, "step": 2635 }, { "epoch": 0.3270970140007434, "grad_norm": 0.00038248911732807755, "learning_rate": 0.00017021191738391696, "loss": 0.0, "step": 2640 }, { "epoch": 0.3277165159211994, "grad_norm": 5.256919757812284e-05, "learning_rate": 0.00017005775352204103, "loss": 0.0, "step": 2645 }, { "epoch": 0.32833601784165534, "grad_norm": 0.00010905520321102813, "learning_rate": 0.00016990326196977636, "loss": 0.0, "step": 2650 }, { "epoch": 0.32895551976211124, "grad_norm": 5.522977153304964e-05, "learning_rate": 0.00016974844344974676, "loss": 0.0, "step": 2655 }, { "epoch": 0.3295750216825672, "grad_norm": 6.280790694290772e-05, "learning_rate": 0.0001695932986861053, "loss": 0.0, "step": 2660 }, { "epoch": 0.33019452360302315, "grad_norm": 6.421532452804968e-05, "learning_rate": 0.00016943782840453115, "loss": 0.0, "step": 2665 }, { "epoch": 0.3308140255234791, "grad_norm": 4.6305252908496186e-05, "learning_rate": 0.00016928203333222593, "loss": 0.0, "step": 2670 }, { "epoch": 0.33143352744393506, "grad_norm": 8.33050871733576e-05, "learning_rate": 0.0001691259141979106, "loss": 0.0, "step": 2675 }, { "epoch": 0.332053029364391, "grad_norm": 0.00011507300223456696, "learning_rate": 0.00016896947173182175, "loss": 0.0, "step": 2680 }, { "epoch": 0.332672531284847, "grad_norm": 7.855492731323466e-05, "learning_rate": 0.00016881270666570844, "loss": 0.0, "step": 2685 }, { "epoch": 0.33329203320530293, "grad_norm": 8.478804375045002e-05, "learning_rate": 0.0001686556197328286, "loss": 0.0, "step": 2690 }, { "epoch": 0.3339115351257589, "grad_norm": 6.498532457044348e-05, "learning_rate": 0.0001684982116679457, "loss": 0.0, "step": 2695 }, { "epoch": 0.33453103704621484, "grad_norm": 5.6049222621368244e-05, "learning_rate": 0.00016834048320732534, "loss": 0.0, "step": 2700 }, { "epoch": 0.3351505389666708, "grad_norm": 8.417150820605457e-05, "learning_rate": 0.00016818243508873163, "loss": 0.0, "step": 2705 }, { "epoch": 0.33577004088712675, "grad_norm": 0.00014095885853748769, "learning_rate": 0.00016802406805142394, "loss": 0.0, "step": 2710 }, { "epoch": 0.3363895428075827, "grad_norm": 8.485751459375024e-05, "learning_rate": 0.00016786538283615336, "loss": 0.0, "step": 2715 }, { "epoch": 0.33700904472803866, "grad_norm": 0.0001001075142994523, "learning_rate": 0.00016770638018515918, "loss": 0.0, "step": 2720 }, { "epoch": 0.3376285466484946, "grad_norm": 4.374800846562721e-05, "learning_rate": 0.00016754706084216555, "loss": 0.0, "step": 2725 }, { "epoch": 0.3382480485689506, "grad_norm": 0.00017496530199423432, "learning_rate": 0.00016738742555237788, "loss": 0.0, "step": 2730 }, { "epoch": 0.33886755048940653, "grad_norm": 0.00011764427472371608, "learning_rate": 0.00016722747506247942, "loss": 0.0, "step": 2735 }, { "epoch": 0.3394870524098625, "grad_norm": 0.0005166829214431345, "learning_rate": 0.0001670672101206277, "loss": 0.0, "step": 2740 }, { "epoch": 0.34010655433031844, "grad_norm": 6.855570973129943e-05, "learning_rate": 0.0001669066314764511, "loss": 0.0, "step": 2745 }, { "epoch": 0.3407260562507744, "grad_norm": 0.00017698151350487024, "learning_rate": 0.0001667457398810454, "loss": 0.0, "step": 2750 }, { "epoch": 0.34134555817123036, "grad_norm": 5.2731658797711134e-05, "learning_rate": 0.00016658453608697, "loss": 0.0, "step": 2755 }, { "epoch": 0.34196506009168626, "grad_norm": 4.2087754991371185e-05, "learning_rate": 0.00016642302084824486, "loss": 0.0, "step": 2760 }, { "epoch": 0.3425845620121422, "grad_norm": 6.9858280767221e-05, "learning_rate": 0.00016626119492034645, "loss": 0.0, "step": 2765 }, { "epoch": 0.34320406393259817, "grad_norm": 7.300668221432716e-05, "learning_rate": 0.0001660990590602046, "loss": 0.0, "step": 2770 }, { "epoch": 0.3438235658530541, "grad_norm": 4.735090988106094e-05, "learning_rate": 0.00016593661402619877, "loss": 0.0, "step": 2775 }, { "epoch": 0.3444430677735101, "grad_norm": 9.909595974022523e-05, "learning_rate": 0.00016577386057815464, "loss": 0.0, "step": 2780 }, { "epoch": 0.34506256969396604, "grad_norm": 7.363449549302459e-05, "learning_rate": 0.00016561079947734038, "loss": 0.0, "step": 2785 }, { "epoch": 0.345682071614422, "grad_norm": 0.00011281658953521401, "learning_rate": 0.00016544743148646322, "loss": 0.0, "step": 2790 }, { "epoch": 0.34630157353487795, "grad_norm": 4.874388105235994e-05, "learning_rate": 0.00016528375736966588, "loss": 0.0, "step": 2795 }, { "epoch": 0.3469210754553339, "grad_norm": 8.400322985835373e-05, "learning_rate": 0.00016511977789252291, "loss": 0.0, "step": 2800 }, { "epoch": 0.34754057737578986, "grad_norm": 7.343111064983532e-05, "learning_rate": 0.0001649554938220372, "loss": 0.0, "step": 2805 }, { "epoch": 0.3481600792962458, "grad_norm": 4.89971753268037e-05, "learning_rate": 0.00016479090592663636, "loss": 0.0, "step": 2810 }, { "epoch": 0.3487795812167018, "grad_norm": 0.00034520638291724026, "learning_rate": 0.00016462601497616906, "loss": 0.0, "step": 2815 }, { "epoch": 0.34939908313715773, "grad_norm": 8.785334648564458e-05, "learning_rate": 0.00016446082174190156, "loss": 0.0, "step": 2820 }, { "epoch": 0.3500185850576137, "grad_norm": 9.04490880202502e-05, "learning_rate": 0.00016429532699651403, "loss": 0.0, "step": 2825 }, { "epoch": 0.35063808697806964, "grad_norm": 0.00013318652054294944, "learning_rate": 0.00016412953151409687, "loss": 0.0, "step": 2830 }, { "epoch": 0.3512575888985256, "grad_norm": 6.16578254266642e-05, "learning_rate": 0.00016396343607014718, "loss": 0.0, "step": 2835 }, { "epoch": 0.35187709081898155, "grad_norm": 4.037901817355305e-05, "learning_rate": 0.00016379704144156522, "loss": 0.0, "step": 2840 }, { "epoch": 0.3524965927394375, "grad_norm": 4.106410779058933e-05, "learning_rate": 0.0001636303484066505, "loss": 0.0, "step": 2845 }, { "epoch": 0.35311609465989346, "grad_norm": 5.396677806857042e-05, "learning_rate": 0.0001634633577450984, "loss": 0.0, "step": 2850 }, { "epoch": 0.3537355965803494, "grad_norm": 4.246679600328207e-05, "learning_rate": 0.0001632960702379964, "loss": 0.0, "step": 2855 }, { "epoch": 0.3543550985008054, "grad_norm": 0.00010264861339237541, "learning_rate": 0.0001631284866678205, "loss": 0.0, "step": 2860 }, { "epoch": 0.35497460042126133, "grad_norm": 4.5441884140018374e-05, "learning_rate": 0.00016296060781843146, "loss": 0.0, "step": 2865 }, { "epoch": 0.35559410234171723, "grad_norm": 4.7840061597526073e-05, "learning_rate": 0.00016279243447507116, "loss": 0.0, "step": 2870 }, { "epoch": 0.3562136042621732, "grad_norm": 5.065930963610299e-05, "learning_rate": 0.00016262396742435908, "loss": 0.0, "step": 2875 }, { "epoch": 0.35683310618262915, "grad_norm": 4.227169119985774e-05, "learning_rate": 0.00016245520745428838, "loss": 0.0, "step": 2880 }, { "epoch": 0.3574526081030851, "grad_norm": 4.690683272201568e-05, "learning_rate": 0.00016228615535422236, "loss": 0.0, "step": 2885 }, { "epoch": 0.35807211002354106, "grad_norm": 0.00018714590987656265, "learning_rate": 0.00016211681191489078, "loss": 0.0, "step": 2890 }, { "epoch": 0.358691611943997, "grad_norm": 0.00010494949674466625, "learning_rate": 0.00016194717792838604, "loss": 0.0, "step": 2895 }, { "epoch": 0.35931111386445297, "grad_norm": 0.00013575387129094452, "learning_rate": 0.0001617772541881597, "loss": 0.0, "step": 2900 }, { "epoch": 0.3599306157849089, "grad_norm": 5.397819040808827e-05, "learning_rate": 0.00016160704148901838, "loss": 0.0, "step": 2905 }, { "epoch": 0.3605501177053649, "grad_norm": 5.2977884479332715e-05, "learning_rate": 0.00016143654062712054, "loss": 0.0, "step": 2910 }, { "epoch": 0.36116961962582084, "grad_norm": 0.00023259432055056095, "learning_rate": 0.00016126575239997236, "loss": 0.0, "step": 2915 }, { "epoch": 0.3617891215462768, "grad_norm": 8.560038986615837e-05, "learning_rate": 0.0001610946776064242, "loss": 0.0, "step": 2920 }, { "epoch": 0.36240862346673275, "grad_norm": 0.0001078248824342154, "learning_rate": 0.00016092331704666677, "loss": 0.0, "step": 2925 }, { "epoch": 0.3630281253871887, "grad_norm": 0.0001459190680179745, "learning_rate": 0.00016075167152222747, "loss": 0.0, "step": 2930 }, { "epoch": 0.36364762730764466, "grad_norm": 4.5987875637365505e-05, "learning_rate": 0.0001605797418359666, "loss": 0.0, "step": 2935 }, { "epoch": 0.3642671292281006, "grad_norm": 0.00019486696692183614, "learning_rate": 0.00016040752879207363, "loss": 0.0, "step": 2940 }, { "epoch": 0.3648866311485566, "grad_norm": 8.019043889362365e-05, "learning_rate": 0.00016023503319606336, "loss": 0.0, "step": 2945 }, { "epoch": 0.36550613306901253, "grad_norm": 4.632591662812047e-05, "learning_rate": 0.00016006225585477222, "loss": 0.0, "step": 2950 }, { "epoch": 0.3661256349894685, "grad_norm": 0.00020382850198075175, "learning_rate": 0.00015988919757635453, "loss": 0.0, "step": 2955 }, { "epoch": 0.36674513690992444, "grad_norm": 4.034637095173821e-05, "learning_rate": 0.00015971585917027862, "loss": 0.0, "step": 2960 }, { "epoch": 0.3673646388303804, "grad_norm": 7.394382555503398e-05, "learning_rate": 0.00015954224144732314, "loss": 0.0, "step": 2965 }, { "epoch": 0.36798414075083635, "grad_norm": 0.00010523940727580339, "learning_rate": 0.00015936834521957323, "loss": 0.0, "step": 2970 }, { "epoch": 0.3686036426712923, "grad_norm": 5.112990038469434e-05, "learning_rate": 0.00015919417130041668, "loss": 0.0, "step": 2975 }, { "epoch": 0.3692231445917482, "grad_norm": 5.9081456129206344e-05, "learning_rate": 0.00015901972050454016, "loss": 0.0, "step": 2980 }, { "epoch": 0.36984264651220417, "grad_norm": 3.269299486419186e-05, "learning_rate": 0.00015884499364792544, "loss": 0.0, "step": 2985 }, { "epoch": 0.3704621484326601, "grad_norm": 3.839211058220826e-05, "learning_rate": 0.00015866999154784552, "loss": 0.0, "step": 2990 }, { "epoch": 0.3710816503531161, "grad_norm": 0.0002732900029513985, "learning_rate": 0.0001584947150228609, "loss": 0.0, "step": 2995 }, { "epoch": 0.37170115227357203, "grad_norm": 8.83788161445409e-05, "learning_rate": 0.0001583191648928156, "loss": 0.0, "step": 3000 }, { "epoch": 0.372320654194028, "grad_norm": 0.00010765095066744834, "learning_rate": 0.00015814334197883346, "loss": 0.0, "step": 3005 }, { "epoch": 0.37294015611448394, "grad_norm": 5.309424886945635e-05, "learning_rate": 0.0001579672471033142, "loss": 0.0, "step": 3010 }, { "epoch": 0.3735596580349399, "grad_norm": 4.5256350858835503e-05, "learning_rate": 0.00015779088108992966, "loss": 0.0, "step": 3015 }, { "epoch": 0.37417915995539586, "grad_norm": 3.904269033228047e-05, "learning_rate": 0.00015761424476361992, "loss": 0.0, "step": 3020 }, { "epoch": 0.3747986618758518, "grad_norm": 7.406606891890988e-05, "learning_rate": 0.00015743733895058937, "loss": 0.0, "step": 3025 }, { "epoch": 0.37541816379630777, "grad_norm": 4.932947922497988e-05, "learning_rate": 0.00015726016447830302, "loss": 0.0, "step": 3030 }, { "epoch": 0.3760376657167637, "grad_norm": 6.637252226937562e-05, "learning_rate": 0.0001570827221754824, "loss": 0.0, "step": 3035 }, { "epoch": 0.3766571676372197, "grad_norm": 3.3784475817810744e-05, "learning_rate": 0.00015690501287210187, "loss": 0.0, "step": 3040 }, { "epoch": 0.37727666955767564, "grad_norm": 3.542944978107698e-05, "learning_rate": 0.00015672703739938454, "loss": 0.0, "step": 3045 }, { "epoch": 0.3778961714781316, "grad_norm": 0.00013856589794158936, "learning_rate": 0.00015654879658979874, "loss": 0.0, "step": 3050 }, { "epoch": 0.37851567339858755, "grad_norm": 4.007277311757207e-05, "learning_rate": 0.00015637029127705371, "loss": 0.0, "step": 3055 }, { "epoch": 0.3791351753190435, "grad_norm": 6.357618258334696e-05, "learning_rate": 0.0001561915222960959, "loss": 0.0, "step": 3060 }, { "epoch": 0.37975467723949946, "grad_norm": 5.158142084837891e-05, "learning_rate": 0.00015601249048310514, "loss": 0.0, "step": 3065 }, { "epoch": 0.3803741791599554, "grad_norm": 3.7166359106777236e-05, "learning_rate": 0.00015583319667549056, "loss": 0.0, "step": 3070 }, { "epoch": 0.3809936810804114, "grad_norm": 5.609666186501272e-05, "learning_rate": 0.00015565364171188682, "loss": 0.0, "step": 3075 }, { "epoch": 0.38161318300086733, "grad_norm": 4.3794785597128794e-05, "learning_rate": 0.00015547382643215003, "loss": 0.0, "step": 3080 }, { "epoch": 0.38223268492132323, "grad_norm": 5.281629637465812e-05, "learning_rate": 0.00015529375167735397, "loss": 0.0, "step": 3085 }, { "epoch": 0.3828521868417792, "grad_norm": 5.076703382655978e-05, "learning_rate": 0.00015511341828978613, "loss": 0.0, "step": 3090 }, { "epoch": 0.38347168876223514, "grad_norm": 4.699538112618029e-05, "learning_rate": 0.0001549328271129436, "loss": 0.0, "step": 3095 }, { "epoch": 0.3840911906826911, "grad_norm": 5.1341779908398166e-05, "learning_rate": 0.00015475197899152945, "loss": 0.0, "step": 3100 }, { "epoch": 0.38471069260314705, "grad_norm": 4.627084854291752e-05, "learning_rate": 0.00015457087477144848, "loss": 0.0, "step": 3105 }, { "epoch": 0.385330194523603, "grad_norm": 3.418140113353729e-05, "learning_rate": 0.0001543895152998034, "loss": 0.0, "step": 3110 }, { "epoch": 0.38594969644405897, "grad_norm": 3.255703632021323e-05, "learning_rate": 0.0001542079014248908, "loss": 0.0, "step": 3115 }, { "epoch": 0.3865691983645149, "grad_norm": 0.000123877776786685, "learning_rate": 0.00015402603399619725, "loss": 0.0, "step": 3120 }, { "epoch": 0.3871887002849709, "grad_norm": 3.7978446925990283e-05, "learning_rate": 0.00015384391386439534, "loss": 0.0, "step": 3125 }, { "epoch": 0.38780820220542683, "grad_norm": 4.18262934545055e-05, "learning_rate": 0.0001536615418813396, "loss": 0.0, "step": 3130 }, { "epoch": 0.3884277041258828, "grad_norm": 6.562701310031116e-05, "learning_rate": 0.00015347891890006265, "loss": 0.0, "step": 3135 }, { "epoch": 0.38904720604633874, "grad_norm": 4.073309537488967e-05, "learning_rate": 0.00015329604577477105, "loss": 0.0, "step": 3140 }, { "epoch": 0.3896667079667947, "grad_norm": 4.0355731471208856e-05, "learning_rate": 0.00015311292336084143, "loss": 0.0, "step": 3145 }, { "epoch": 0.39028620988725066, "grad_norm": 7.372553955065086e-05, "learning_rate": 0.00015292955251481653, "loss": 0.0, "step": 3150 }, { "epoch": 0.3909057118077066, "grad_norm": 6.75293558742851e-05, "learning_rate": 0.00015274593409440098, "loss": 0.0, "step": 3155 }, { "epoch": 0.39152521372816257, "grad_norm": 4.424172220751643e-05, "learning_rate": 0.00015256206895845748, "loss": 0.0, "step": 3160 }, { "epoch": 0.3921447156486185, "grad_norm": 7.366270438069478e-05, "learning_rate": 0.00015237795796700277, "loss": 0.0001, "step": 3165 }, { "epoch": 0.3927642175690745, "grad_norm": 4.658852412831038e-05, "learning_rate": 0.0001521936019812035, "loss": 0.0, "step": 3170 }, { "epoch": 0.39338371948953044, "grad_norm": 5.92035758018028e-05, "learning_rate": 0.00015200900186337224, "loss": 0.0, "step": 3175 }, { "epoch": 0.3940032214099864, "grad_norm": 7.333699613809586e-05, "learning_rate": 0.00015182415847696357, "loss": 0.0, "step": 3180 }, { "epoch": 0.39462272333044235, "grad_norm": 3.137208477710374e-05, "learning_rate": 0.00015163907268656986, "loss": 0.0, "step": 3185 }, { "epoch": 0.3952422252508983, "grad_norm": 4.5127242628950626e-05, "learning_rate": 0.00015145374535791736, "loss": 0.0, "step": 3190 }, { "epoch": 0.3958617271713542, "grad_norm": 0.00033835682552307844, "learning_rate": 0.00015126817735786207, "loss": 0.0, "step": 3195 }, { "epoch": 0.39648122909181016, "grad_norm": 0.00013611074246000499, "learning_rate": 0.0001510823695543857, "loss": 0.0, "step": 3200 }, { "epoch": 0.3971007310122661, "grad_norm": 4.8425994464196265e-05, "learning_rate": 0.00015089632281659168, "loss": 0.0, "step": 3205 }, { "epoch": 0.3977202329327221, "grad_norm": 7.081739749992266e-05, "learning_rate": 0.00015071003801470098, "loss": 0.0, "step": 3210 }, { "epoch": 0.39833973485317803, "grad_norm": 5.831833914271556e-05, "learning_rate": 0.00015052351602004807, "loss": 0.0, "step": 3215 }, { "epoch": 0.398959236773634, "grad_norm": 7.896427996456623e-05, "learning_rate": 0.00015033675770507706, "loss": 0.0, "step": 3220 }, { "epoch": 0.39957873869408994, "grad_norm": 4.6789104089839384e-05, "learning_rate": 0.00015014976394333714, "loss": 0.0, "step": 3225 }, { "epoch": 0.4001982406145459, "grad_norm": 6.98813091730699e-05, "learning_rate": 0.00014996253560947906, "loss": 0.0, "step": 3230 }, { "epoch": 0.40081774253500185, "grad_norm": 2.7482419682200998e-05, "learning_rate": 0.0001497750735792506, "loss": 0.0, "step": 3235 }, { "epoch": 0.4014372444554578, "grad_norm": 0.00379940471611917, "learning_rate": 0.0001495873787294927, "loss": 0.0, "step": 3240 }, { "epoch": 0.40205674637591376, "grad_norm": 3.576884409994818e-05, "learning_rate": 0.00014939945193813525, "loss": 0.0, "step": 3245 }, { "epoch": 0.4026762482963697, "grad_norm": 8.815489854896441e-05, "learning_rate": 0.00014921129408419312, "loss": 0.0, "step": 3250 }, { "epoch": 0.4032957502168257, "grad_norm": 4.352161704446189e-05, "learning_rate": 0.00014902290604776184, "loss": 0.0, "step": 3255 }, { "epoch": 0.40391525213728163, "grad_norm": 3.716963328770362e-05, "learning_rate": 0.00014883428871001375, "loss": 0.0, "step": 3260 }, { "epoch": 0.4045347540577376, "grad_norm": 0.00041367969242855906, "learning_rate": 0.00014864544295319356, "loss": 0.0, "step": 3265 }, { "epoch": 0.40515425597819354, "grad_norm": 0.00016067155229393393, "learning_rate": 0.00014845636966061458, "loss": 0.0, "step": 3270 }, { "epoch": 0.4057737578986495, "grad_norm": 4.448082472663373e-05, "learning_rate": 0.00014826706971665424, "loss": 0.0, "step": 3275 }, { "epoch": 0.40639325981910546, "grad_norm": 8.406947017647326e-05, "learning_rate": 0.00014807754400675017, "loss": 0.0, "step": 3280 }, { "epoch": 0.4070127617395614, "grad_norm": 3.787494279094972e-05, "learning_rate": 0.00014788779341739608, "loss": 0.0, "step": 3285 }, { "epoch": 0.40763226366001737, "grad_norm": 0.00022472151613328606, "learning_rate": 0.00014769781883613745, "loss": 0.0, "step": 3290 }, { "epoch": 0.4082517655804733, "grad_norm": 3.0463312214124016e-05, "learning_rate": 0.0001475076211515675, "loss": 0.0, "step": 3295 }, { "epoch": 0.4088712675009292, "grad_norm": 5.1993625675095245e-05, "learning_rate": 0.00014731720125332295, "loss": 0.0, "step": 3300 }, { "epoch": 0.4094907694213852, "grad_norm": 4.05214486818295e-05, "learning_rate": 0.00014712656003208006, "loss": 0.0, "step": 3305 }, { "epoch": 0.41011027134184114, "grad_norm": 4.05488426622469e-05, "learning_rate": 0.00014693569837955005, "loss": 0.0, "step": 3310 }, { "epoch": 0.4107297732622971, "grad_norm": 3.070032107643783e-05, "learning_rate": 0.00014674461718847551, "loss": 0.0, "step": 3315 }, { "epoch": 0.41134927518275305, "grad_norm": 3.820321580860764e-05, "learning_rate": 0.00014655331735262566, "loss": 0.0, "step": 3320 }, { "epoch": 0.411968777103209, "grad_norm": 2.821323687385302e-05, "learning_rate": 0.0001463617997667925, "loss": 0.0, "step": 3325 }, { "epoch": 0.41258827902366496, "grad_norm": 3.809288682532497e-05, "learning_rate": 0.00014617006532678656, "loss": 0.0, "step": 3330 }, { "epoch": 0.4132077809441209, "grad_norm": 5.8316891227150336e-05, "learning_rate": 0.00014597811492943267, "loss": 0.0, "step": 3335 }, { "epoch": 0.4138272828645769, "grad_norm": 6.116308213677257e-05, "learning_rate": 0.00014578594947256584, "loss": 0.0, "step": 3340 }, { "epoch": 0.41444678478503283, "grad_norm": 3.792963980231434e-05, "learning_rate": 0.00014559356985502687, "loss": 0.0001, "step": 3345 }, { "epoch": 0.4150662867054888, "grad_norm": 8.62360029714182e-05, "learning_rate": 0.0001454009769766584, "loss": 0.0, "step": 3350 }, { "epoch": 0.41568578862594474, "grad_norm": 7.145444396883249e-05, "learning_rate": 0.00014520817173830058, "loss": 0.0, "step": 3355 }, { "epoch": 0.4163052905464007, "grad_norm": 3.8311536627588794e-05, "learning_rate": 0.00014501515504178683, "loss": 0.0, "step": 3360 }, { "epoch": 0.41692479246685665, "grad_norm": 5.686160147888586e-05, "learning_rate": 0.0001448219277899396, "loss": 0.0, "step": 3365 }, { "epoch": 0.4175442943873126, "grad_norm": 4.575768980430439e-05, "learning_rate": 0.00014462849088656628, "loss": 0.0, "step": 3370 }, { "epoch": 0.41816379630776856, "grad_norm": 3.0528681236319244e-05, "learning_rate": 0.00014443484523645486, "loss": 0.0, "step": 3375 }, { "epoch": 0.4187832982282245, "grad_norm": 2.661229154909961e-05, "learning_rate": 0.00014424099174536976, "loss": 0.0, "step": 3380 }, { "epoch": 0.4194028001486805, "grad_norm": 3.145201117149554e-05, "learning_rate": 0.00014404693132004753, "loss": 0.0, "step": 3385 }, { "epoch": 0.42002230206913643, "grad_norm": 4.7540583182126284e-05, "learning_rate": 0.00014385266486819254, "loss": 0.0, "step": 3390 }, { "epoch": 0.4206418039895924, "grad_norm": 3.2735981221776456e-05, "learning_rate": 0.00014365819329847308, "loss": 0.0, "step": 3395 }, { "epoch": 0.42126130591004834, "grad_norm": 3.102560367551632e-05, "learning_rate": 0.00014346351752051663, "loss": 0.0, "step": 3400 }, { "epoch": 0.4218808078305043, "grad_norm": 5.7905606809072196e-05, "learning_rate": 0.00014326863844490596, "loss": 0.0, "step": 3405 }, { "epoch": 0.4225003097509602, "grad_norm": 5.015847273170948e-05, "learning_rate": 0.00014307355698317474, "loss": 0.0, "step": 3410 }, { "epoch": 0.42311981167141616, "grad_norm": 0.00035993565688841045, "learning_rate": 0.00014287827404780323, "loss": 0.0, "step": 3415 }, { "epoch": 0.4237393135918721, "grad_norm": 3.687063144752756e-05, "learning_rate": 0.00014268279055221417, "loss": 0.0, "step": 3420 }, { "epoch": 0.42435881551232807, "grad_norm": 6.287154246820137e-05, "learning_rate": 0.0001424871074107683, "loss": 0.0007, "step": 3425 }, { "epoch": 0.424978317432784, "grad_norm": 9.101985051529482e-05, "learning_rate": 0.00014229122553876023, "loss": 0.0, "step": 3430 }, { "epoch": 0.42559781935324, "grad_norm": 7.198568346211687e-05, "learning_rate": 0.00014209514585241414, "loss": 0.0, "step": 3435 }, { "epoch": 0.42621732127369594, "grad_norm": 0.00012639976921491325, "learning_rate": 0.0001418988692688795, "loss": 0.0, "step": 3440 }, { "epoch": 0.4268368231941519, "grad_norm": 9.073167893802747e-05, "learning_rate": 0.00014170239670622663, "loss": 0.0, "step": 3445 }, { "epoch": 0.42745632511460785, "grad_norm": 5.296363451634534e-05, "learning_rate": 0.00014150572908344267, "loss": 0.0, "step": 3450 }, { "epoch": 0.4280758270350638, "grad_norm": 0.0004599154635798186, "learning_rate": 0.00014130886732042713, "loss": 0.0, "step": 3455 }, { "epoch": 0.42869532895551976, "grad_norm": 0.00012561121548060328, "learning_rate": 0.00014111181233798743, "loss": 0.0, "step": 3460 }, { "epoch": 0.4293148308759757, "grad_norm": 8.397922647418454e-05, "learning_rate": 0.00014091456505783495, "loss": 0.0, "step": 3465 }, { "epoch": 0.4299343327964317, "grad_norm": 7.694229861954227e-05, "learning_rate": 0.0001407171264025805, "loss": 0.0, "step": 3470 }, { "epoch": 0.43055383471688763, "grad_norm": 0.00011744967923732474, "learning_rate": 0.00014051949729572985, "loss": 0.0, "step": 3475 }, { "epoch": 0.4311733366373436, "grad_norm": 7.479608757421374e-05, "learning_rate": 0.0001403216786616799, "loss": 0.0, "step": 3480 }, { "epoch": 0.43179283855779954, "grad_norm": 0.0004509172576945275, "learning_rate": 0.00014012367142571377, "loss": 0.0, "step": 3485 }, { "epoch": 0.4324123404782555, "grad_norm": 6.471523374784738e-05, "learning_rate": 0.00013992547651399696, "loss": 0.0, "step": 3490 }, { "epoch": 0.43303184239871145, "grad_norm": 0.0015676968032494187, "learning_rate": 0.0001397270948535727, "loss": 0.0, "step": 3495 }, { "epoch": 0.4336513443191674, "grad_norm": 3.977654341724701e-05, "learning_rate": 0.00013952852737235768, "loss": 0.0, "step": 3500 }, { "epoch": 0.43427084623962336, "grad_norm": 2.461092844896484e-05, "learning_rate": 0.0001393297749991379, "loss": 0.0, "step": 3505 }, { "epoch": 0.4348903481600793, "grad_norm": 8.508963946951553e-05, "learning_rate": 0.00013913083866356408, "loss": 0.0, "step": 3510 }, { "epoch": 0.4355098500805353, "grad_norm": 5.518314355867915e-05, "learning_rate": 0.0001389317192961474, "loss": 0.0, "step": 3515 }, { "epoch": 0.4361293520009912, "grad_norm": 0.00027178070740774274, "learning_rate": 0.00013873241782825523, "loss": 0.0, "step": 3520 }, { "epoch": 0.43674885392144713, "grad_norm": 4.275524042895995e-05, "learning_rate": 0.00013853293519210666, "loss": 0.0, "step": 3525 }, { "epoch": 0.4373683558419031, "grad_norm": 5.294285438139923e-05, "learning_rate": 0.00013833327232076813, "loss": 0.0, "step": 3530 }, { "epoch": 0.43798785776235905, "grad_norm": 4.5299442717805505e-05, "learning_rate": 0.00013813343014814925, "loss": 0.0, "step": 3535 }, { "epoch": 0.438607359682815, "grad_norm": 4.007149618701078e-05, "learning_rate": 0.00013793340960899816, "loss": 0.0, "step": 3540 }, { "epoch": 0.43922686160327096, "grad_norm": 2.8431810278561898e-05, "learning_rate": 0.00013773321163889742, "loss": 0.0, "step": 3545 }, { "epoch": 0.4398463635237269, "grad_norm": 8.575242100050673e-05, "learning_rate": 0.00013753283717425944, "loss": 0.0, "step": 3550 }, { "epoch": 0.44046586544418287, "grad_norm": 2.5520477720419876e-05, "learning_rate": 0.00013733228715232212, "loss": 0.0, "step": 3555 }, { "epoch": 0.4410853673646388, "grad_norm": 4.7618574171792716e-05, "learning_rate": 0.00013713156251114468, "loss": 0.0, "step": 3560 }, { "epoch": 0.4417048692850948, "grad_norm": 4.880678898189217e-05, "learning_rate": 0.00013693066418960293, "loss": 0.0, "step": 3565 }, { "epoch": 0.44232437120555074, "grad_norm": 0.0002793136518448591, "learning_rate": 0.00013672959312738514, "loss": 0.0, "step": 3570 }, { "epoch": 0.4429438731260067, "grad_norm": 4.9370275519322604e-05, "learning_rate": 0.0001365283502649876, "loss": 0.0, "step": 3575 }, { "epoch": 0.44356337504646265, "grad_norm": 3.5723194741876796e-05, "learning_rate": 0.0001363269365437101, "loss": 0.0, "step": 3580 }, { "epoch": 0.4441828769669186, "grad_norm": 3.421949440962635e-05, "learning_rate": 0.00013612535290565174, "loss": 0.0, "step": 3585 }, { "epoch": 0.44480237888737456, "grad_norm": 6.896677223267034e-05, "learning_rate": 0.00013592360029370624, "loss": 0.0, "step": 3590 }, { "epoch": 0.4454218808078305, "grad_norm": 8.125825843308121e-05, "learning_rate": 0.00013572167965155774, "loss": 0.0, "step": 3595 }, { "epoch": 0.4460413827282865, "grad_norm": 9.295610652770847e-05, "learning_rate": 0.0001355195919236764, "loss": 0.0, "step": 3600 }, { "epoch": 0.44666088464874243, "grad_norm": 3.486598143354058e-05, "learning_rate": 0.00013531733805531384, "loss": 0.0, "step": 3605 }, { "epoch": 0.4472803865691984, "grad_norm": 2.8632917747017927e-05, "learning_rate": 0.00013511491899249875, "loss": 0.0, "step": 3610 }, { "epoch": 0.44789988848965434, "grad_norm": 0.0001673200895311311, "learning_rate": 0.00013491233568203262, "loss": 0.0, "step": 3615 }, { "epoch": 0.4485193904101103, "grad_norm": 4.152518158662133e-05, "learning_rate": 0.00013470958907148517, "loss": 0.0, "step": 3620 }, { "epoch": 0.4491388923305662, "grad_norm": 5.316940587363206e-05, "learning_rate": 0.0001345066801091898, "loss": 0.0, "step": 3625 }, { "epoch": 0.44975839425102215, "grad_norm": 3.72047652490437e-05, "learning_rate": 0.00013430360974423953, "loss": 0.0, "step": 3630 }, { "epoch": 0.4503778961714781, "grad_norm": 2.2855287170386873e-05, "learning_rate": 0.00013410037892648219, "loss": 0.0, "step": 3635 }, { "epoch": 0.45099739809193407, "grad_norm": 5.0169637688668445e-05, "learning_rate": 0.00013389698860651606, "loss": 0.0, "step": 3640 }, { "epoch": 0.45161690001239, "grad_norm": 0.00011272204574197531, "learning_rate": 0.00013369343973568566, "loss": 0.0, "step": 3645 }, { "epoch": 0.452236401932846, "grad_norm": 6.0942209529457614e-05, "learning_rate": 0.00013348973326607694, "loss": 0.0, "step": 3650 }, { "epoch": 0.45285590385330193, "grad_norm": 4.22227312810719e-05, "learning_rate": 0.0001332858701505132, "loss": 0.0, "step": 3655 }, { "epoch": 0.4534754057737579, "grad_norm": 2.6433563107275404e-05, "learning_rate": 0.00013308185134255023, "loss": 0.0, "step": 3660 }, { "epoch": 0.45409490769421385, "grad_norm": 3.053019099752419e-05, "learning_rate": 0.00013287767779647218, "loss": 0.0, "step": 3665 }, { "epoch": 0.4547144096146698, "grad_norm": 5.7571280194679275e-05, "learning_rate": 0.00013267335046728698, "loss": 0.0, "step": 3670 }, { "epoch": 0.45533391153512576, "grad_norm": 3.9815869968151674e-05, "learning_rate": 0.00013246887031072188, "loss": 0.0, "step": 3675 }, { "epoch": 0.4559534134555817, "grad_norm": 4.502326191868633e-05, "learning_rate": 0.00013226423828321886, "loss": 0.0, "step": 3680 }, { "epoch": 0.45657291537603767, "grad_norm": 4.460475247469731e-05, "learning_rate": 0.0001320594553419304, "loss": 0.0, "step": 3685 }, { "epoch": 0.4571924172964936, "grad_norm": 2.5633846234995872e-05, "learning_rate": 0.00013185452244471478, "loss": 0.0, "step": 3690 }, { "epoch": 0.4578119192169496, "grad_norm": 8.090982009889558e-05, "learning_rate": 0.0001316494405501318, "loss": 0.0, "step": 3695 }, { "epoch": 0.45843142113740554, "grad_norm": 2.1772631953353994e-05, "learning_rate": 0.000131444210617438, "loss": 0.0, "step": 3700 }, { "epoch": 0.4590509230578615, "grad_norm": 3.230808943044394e-05, "learning_rate": 0.0001312388336065825, "loss": 0.0, "step": 3705 }, { "epoch": 0.45967042497831745, "grad_norm": 2.1502326490008272e-05, "learning_rate": 0.00013103331047820238, "loss": 0.0, "step": 3710 }, { "epoch": 0.4602899268987734, "grad_norm": 0.0003619146009441465, "learning_rate": 0.0001308276421936181, "loss": 0.0, "step": 3715 }, { "epoch": 0.46090942881922936, "grad_norm": 2.8992306397412904e-05, "learning_rate": 0.00013062182971482908, "loss": 0.0005, "step": 3720 }, { "epoch": 0.4615289307396853, "grad_norm": 2.192769170505926e-05, "learning_rate": 0.00013041587400450922, "loss": 0.0, "step": 3725 }, { "epoch": 0.4621484326601413, "grad_norm": 2.030646282946691e-05, "learning_rate": 0.00013020977602600246, "loss": 0.0, "step": 3730 }, { "epoch": 0.4627679345805972, "grad_norm": 3.258241849835031e-05, "learning_rate": 0.00013000353674331804, "loss": 0.0, "step": 3735 }, { "epoch": 0.46338743650105313, "grad_norm": 3.8590187614317983e-05, "learning_rate": 0.00012979715712112627, "loss": 0.0, "step": 3740 }, { "epoch": 0.4640069384215091, "grad_norm": 0.00010251568892272189, "learning_rate": 0.00012959063812475376, "loss": 0.0, "step": 3745 }, { "epoch": 0.46462644034196504, "grad_norm": 2.8597092750715092e-05, "learning_rate": 0.00012938398072017912, "loss": 0.0, "step": 3750 }, { "epoch": 0.465245942262421, "grad_norm": 5.450044773169793e-05, "learning_rate": 0.00012917718587402837, "loss": 0.0001, "step": 3755 }, { "epoch": 0.46586544418287695, "grad_norm": 3.835184907075018e-05, "learning_rate": 0.00012897025455357034, "loss": 0.0, "step": 3760 }, { "epoch": 0.4664849461033329, "grad_norm": 4.197909220238216e-05, "learning_rate": 0.00012876318772671227, "loss": 0.0, "step": 3765 }, { "epoch": 0.46710444802378887, "grad_norm": 7.16991416993551e-05, "learning_rate": 0.0001285559863619952, "loss": 0.0, "step": 3770 }, { "epoch": 0.4677239499442448, "grad_norm": 3.299609670648351e-05, "learning_rate": 0.00012834865142858937, "loss": 0.0, "step": 3775 }, { "epoch": 0.4683434518647008, "grad_norm": 2.2095242456998676e-05, "learning_rate": 0.00012814118389629, "loss": 0.0, "step": 3780 }, { "epoch": 0.46896295378515673, "grad_norm": 3.66107196896337e-05, "learning_rate": 0.0001279335847355123, "loss": 0.0, "step": 3785 }, { "epoch": 0.4695824557056127, "grad_norm": 8.553396764909849e-05, "learning_rate": 0.00012772585491728725, "loss": 0.0, "step": 3790 }, { "epoch": 0.47020195762606865, "grad_norm": 3.4148237318731844e-05, "learning_rate": 0.00012751799541325706, "loss": 0.0, "step": 3795 }, { "epoch": 0.4708214595465246, "grad_norm": 7.819967868272215e-05, "learning_rate": 0.00012731000719567036, "loss": 0.0, "step": 3800 }, { "epoch": 0.47144096146698056, "grad_norm": 0.0001345328491879627, "learning_rate": 0.00012710189123737802, "loss": 0.0, "step": 3805 }, { "epoch": 0.4720604633874365, "grad_norm": 4.315905607654713e-05, "learning_rate": 0.00012689364851182827, "loss": 0.0, "step": 3810 }, { "epoch": 0.47267996530789247, "grad_norm": 6.335110811050981e-05, "learning_rate": 0.0001266852799930623, "loss": 0.0, "step": 3815 }, { "epoch": 0.4732994672283484, "grad_norm": 8.836195775074884e-05, "learning_rate": 0.00012647678665570977, "loss": 0.0, "step": 3820 }, { "epoch": 0.4739189691488044, "grad_norm": 3.528664819896221e-05, "learning_rate": 0.00012626816947498409, "loss": 0.0, "step": 3825 }, { "epoch": 0.47453847106926034, "grad_norm": 2.976834548462648e-05, "learning_rate": 0.00012605942942667795, "loss": 0.0, "step": 3830 }, { "epoch": 0.4751579729897163, "grad_norm": 2.1575729988398962e-05, "learning_rate": 0.00012585056748715885, "loss": 0.0, "step": 3835 }, { "epoch": 0.47577747491017225, "grad_norm": 2.386528103670571e-05, "learning_rate": 0.00012564158463336422, "loss": 0.0, "step": 3840 }, { "epoch": 0.47639697683062815, "grad_norm": 3.217066841898486e-05, "learning_rate": 0.00012543248184279724, "loss": 0.0, "step": 3845 }, { "epoch": 0.4770164787510841, "grad_norm": 2.447585757181514e-05, "learning_rate": 0.00012522326009352207, "loss": 0.0, "step": 3850 }, { "epoch": 0.47763598067154006, "grad_norm": 4.9053120164899155e-05, "learning_rate": 0.00012501392036415917, "loss": 0.0, "step": 3855 }, { "epoch": 0.478255482591996, "grad_norm": 5.131889338372275e-05, "learning_rate": 0.00012480446363388097, "loss": 0.0, "step": 3860 }, { "epoch": 0.478874984512452, "grad_norm": 3.2030286092776805e-05, "learning_rate": 0.0001245948908824071, "loss": 0.0, "step": 3865 }, { "epoch": 0.47949448643290793, "grad_norm": 2.028559538302943e-05, "learning_rate": 0.0001243852030899998, "loss": 0.0, "step": 3870 }, { "epoch": 0.4801139883533639, "grad_norm": 4.3190771975787356e-05, "learning_rate": 0.00012417540123745968, "loss": 0.0, "step": 3875 }, { "epoch": 0.48073349027381984, "grad_norm": 3.818270124611445e-05, "learning_rate": 0.00012396548630612053, "loss": 0.0, "step": 3880 }, { "epoch": 0.4813529921942758, "grad_norm": 3.0559913284378126e-05, "learning_rate": 0.0001237554592778452, "loss": 0.0, "step": 3885 }, { "epoch": 0.48197249411473175, "grad_norm": 2.7840045731863938e-05, "learning_rate": 0.00012354532113502094, "loss": 0.0, "step": 3890 }, { "epoch": 0.4825919960351877, "grad_norm": 2.6462743335287087e-05, "learning_rate": 0.00012333507286055458, "loss": 0.0, "step": 3895 }, { "epoch": 0.48321149795564367, "grad_norm": 8.306504605570808e-05, "learning_rate": 0.00012312471543786822, "loss": 0.0, "step": 3900 }, { "epoch": 0.4838309998760996, "grad_norm": 3.1236100767273456e-05, "learning_rate": 0.00012291424985089438, "loss": 0.0, "step": 3905 }, { "epoch": 0.4844505017965556, "grad_norm": 3.3831522159744054e-05, "learning_rate": 0.00012270367708407158, "loss": 0.0001, "step": 3910 }, { "epoch": 0.48507000371701153, "grad_norm": 2.6516374418861233e-05, "learning_rate": 0.00012249299812233964, "loss": 0.0, "step": 3915 }, { "epoch": 0.4856895056374675, "grad_norm": 2.2493501091958024e-05, "learning_rate": 0.00012228221395113516, "loss": 0.0, "step": 3920 }, { "epoch": 0.48630900755792345, "grad_norm": 3.410916542634368e-05, "learning_rate": 0.00012207132555638677, "loss": 0.0, "step": 3925 }, { "epoch": 0.4869285094783794, "grad_norm": 4.995338531443849e-05, "learning_rate": 0.00012186033392451059, "loss": 0.0, "step": 3930 }, { "epoch": 0.48754801139883536, "grad_norm": 4.837947562918998e-05, "learning_rate": 0.00012164924004240573, "loss": 0.0, "step": 3935 }, { "epoch": 0.4881675133192913, "grad_norm": 3.3659747714409605e-05, "learning_rate": 0.0001214380448974494, "loss": 0.0, "step": 3940 }, { "epoch": 0.48878701523974727, "grad_norm": 8.175786206265911e-05, "learning_rate": 0.00012122674947749269, "loss": 0.0, "step": 3945 }, { "epoch": 0.48940651716020317, "grad_norm": 9.120213508140296e-05, "learning_rate": 0.00012101535477085548, "loss": 0.0, "step": 3950 }, { "epoch": 0.4900260190806591, "grad_norm": 3.1369610951514915e-05, "learning_rate": 0.00012080386176632223, "loss": 0.0, "step": 3955 }, { "epoch": 0.4906455210011151, "grad_norm": 3.738858868018724e-05, "learning_rate": 0.00012059227145313712, "loss": 0.0, "step": 3960 }, { "epoch": 0.49126502292157104, "grad_norm": 3.2235846447292715e-05, "learning_rate": 0.00012038058482099943, "loss": 0.0, "step": 3965 }, { "epoch": 0.491884524842027, "grad_norm": 3.690286030177958e-05, "learning_rate": 0.0001201688028600591, "loss": 0.0, "step": 3970 }, { "epoch": 0.49250402676248295, "grad_norm": 7.265872409334406e-05, "learning_rate": 0.00011995692656091182, "loss": 0.0, "step": 3975 }, { "epoch": 0.4931235286829389, "grad_norm": 4.379475649329834e-05, "learning_rate": 0.0001197449569145946, "loss": 0.0, "step": 3980 }, { "epoch": 0.49374303060339486, "grad_norm": 4.241093120072037e-05, "learning_rate": 0.00011953289491258113, "loss": 0.0, "step": 3985 }, { "epoch": 0.4943625325238508, "grad_norm": 4.25223697675392e-05, "learning_rate": 0.00011932074154677698, "loss": 0.0, "step": 3990 }, { "epoch": 0.4949820344443068, "grad_norm": 5.98220358369872e-05, "learning_rate": 0.00011910849780951514, "loss": 0.0, "step": 3995 }, { "epoch": 0.49560153636476273, "grad_norm": 3.5686767660081387e-05, "learning_rate": 0.00011889616469355134, "loss": 0.0, "step": 4000 }, { "epoch": 0.4962210382852187, "grad_norm": 3.5841279895976186e-05, "learning_rate": 0.00011868374319205923, "loss": 0.0, "step": 4005 }, { "epoch": 0.49684054020567464, "grad_norm": 0.00016275800589937717, "learning_rate": 0.00011847123429862607, "loss": 0.0, "step": 4010 }, { "epoch": 0.4974600421261306, "grad_norm": 2.3898659492260776e-05, "learning_rate": 0.00011825863900724772, "loss": 0.0, "step": 4015 }, { "epoch": 0.49807954404658655, "grad_norm": 4.2158044379903004e-05, "learning_rate": 0.00011804595831232426, "loss": 0.0, "step": 4020 }, { "epoch": 0.4986990459670425, "grad_norm": 4.926889596390538e-05, "learning_rate": 0.0001178331932086552, "loss": 0.0, "step": 4025 }, { "epoch": 0.49931854788749847, "grad_norm": 5.170746226212941e-05, "learning_rate": 0.00011762034469143493, "loss": 0.0, "step": 4030 }, { "epoch": 0.4999380498079544, "grad_norm": 4.9014153773896396e-05, "learning_rate": 0.00011740741375624785, "loss": 0.0, "step": 4035 }, { "epoch": 0.5005575517284103, "grad_norm": 4.2773674067575485e-05, "learning_rate": 0.00011719440139906401, "loss": 0.0, "step": 4040 }, { "epoch": 0.5011770536488663, "grad_norm": 0.00013482967915479094, "learning_rate": 0.00011698130861623429, "loss": 0.0, "step": 4045 }, { "epoch": 0.5017965555693222, "grad_norm": 1.4437957361224107e-05, "learning_rate": 0.00011676813640448565, "loss": 0.0, "step": 4050 }, { "epoch": 0.5024160574897782, "grad_norm": 2.699905599001795e-05, "learning_rate": 0.00011655488576091673, "loss": 0.0, "step": 4055 }, { "epoch": 0.5030355594102341, "grad_norm": 0.00021871710487175733, "learning_rate": 0.00011634155768299286, "loss": 0.0, "step": 4060 }, { "epoch": 0.5036550613306902, "grad_norm": 8.414475450990722e-05, "learning_rate": 0.00011612815316854176, "loss": 0.0, "step": 4065 }, { "epoch": 0.5042745632511461, "grad_norm": 3.9432328776456416e-05, "learning_rate": 0.00011591467321574848, "loss": 0.0, "step": 4070 }, { "epoch": 0.5048940651716021, "grad_norm": 6.029304859112017e-05, "learning_rate": 0.000115701118823151, "loss": 0.0, "step": 4075 }, { "epoch": 0.505513567092058, "grad_norm": 2.7848871468449943e-05, "learning_rate": 0.00011548749098963556, "loss": 0.0, "step": 4080 }, { "epoch": 0.506133069012514, "grad_norm": 7.109341095201671e-05, "learning_rate": 0.00011527379071443186, "loss": 0.0, "step": 4085 }, { "epoch": 0.5067525709329699, "grad_norm": 3.747369191842154e-05, "learning_rate": 0.00011506001899710838, "loss": 0.0, "step": 4090 }, { "epoch": 0.5073720728534259, "grad_norm": 2.507082899683155e-05, "learning_rate": 0.00011484617683756788, "loss": 0.0, "step": 4095 }, { "epoch": 0.5079915747738818, "grad_norm": 4.0924878703663126e-05, "learning_rate": 0.00011463226523604252, "loss": 0.0, "step": 4100 }, { "epoch": 0.5086110766943378, "grad_norm": 0.00016167134162969887, "learning_rate": 0.00011441828519308932, "loss": 0.0, "step": 4105 }, { "epoch": 0.5092305786147937, "grad_norm": 3.5629502235678956e-05, "learning_rate": 0.0001142042377095854, "loss": 0.0, "step": 4110 }, { "epoch": 0.5098500805352496, "grad_norm": 4.9837359256343916e-05, "learning_rate": 0.00011399012378672334, "loss": 0.0, "step": 4115 }, { "epoch": 0.5104695824557056, "grad_norm": 2.3613270968780853e-05, "learning_rate": 0.0001137759444260065, "loss": 0.0, "step": 4120 }, { "epoch": 0.5110890843761615, "grad_norm": 2.2141499357530847e-05, "learning_rate": 0.0001135617006292443, "loss": 0.0, "step": 4125 }, { "epoch": 0.5117085862966175, "grad_norm": 3.07133341266308e-05, "learning_rate": 0.00011334739339854754, "loss": 0.0, "step": 4130 }, { "epoch": 0.5123280882170734, "grad_norm": 2.233720078947954e-05, "learning_rate": 0.00011313302373632382, "loss": 0.0, "step": 4135 }, { "epoch": 0.5129475901375294, "grad_norm": 2.9585493393824436e-05, "learning_rate": 0.00011291859264527258, "loss": 0.0, "step": 4140 }, { "epoch": 0.5135670920579853, "grad_norm": 3.692436803248711e-05, "learning_rate": 0.00011270410112838081, "loss": 0.0, "step": 4145 }, { "epoch": 0.5141865939784414, "grad_norm": 3.4243628761032596e-05, "learning_rate": 0.00011248955018891798, "loss": 0.0, "step": 4150 }, { "epoch": 0.5148060958988973, "grad_norm": 3.053393811569549e-05, "learning_rate": 0.00011227494083043154, "loss": 0.0, "step": 4155 }, { "epoch": 0.5154255978193533, "grad_norm": 2.576829501776956e-05, "learning_rate": 0.00011206027405674222, "loss": 0.0, "step": 4160 }, { "epoch": 0.5160450997398092, "grad_norm": 0.00034403285826556385, "learning_rate": 0.00011184555087193926, "loss": 0.0, "step": 4165 }, { "epoch": 0.5166646016602652, "grad_norm": 3.126296724076383e-05, "learning_rate": 0.00011163077228037584, "loss": 0.0, "step": 4170 }, { "epoch": 0.5172841035807211, "grad_norm": 2.276338273077272e-05, "learning_rate": 0.00011141593928666422, "loss": 0.0, "step": 4175 }, { "epoch": 0.5179036055011771, "grad_norm": 3.831459616776556e-05, "learning_rate": 0.00011120105289567115, "loss": 0.0, "step": 4180 }, { "epoch": 0.518523107421633, "grad_norm": 2.052420677500777e-05, "learning_rate": 0.00011098611411251314, "loss": 0.0, "step": 4185 }, { "epoch": 0.519142609342089, "grad_norm": 3.322997872601263e-05, "learning_rate": 0.00011077112394255178, "loss": 0.0, "step": 4190 }, { "epoch": 0.5197621112625449, "grad_norm": 2.8367403501761146e-05, "learning_rate": 0.00011055608339138901, "loss": 0.0, "step": 4195 }, { "epoch": 0.5203816131830009, "grad_norm": 3.393167207832448e-05, "learning_rate": 0.00011034099346486237, "loss": 0.0, "step": 4200 }, { "epoch": 0.5210011151034568, "grad_norm": 4.776508649229072e-05, "learning_rate": 0.00011012585516904046, "loss": 0.0, "step": 4205 }, { "epoch": 0.5216206170239128, "grad_norm": 2.543911432439927e-05, "learning_rate": 0.00010991066951021803, "loss": 0.0, "step": 4210 }, { "epoch": 0.5222401189443687, "grad_norm": 4.458898911252618e-05, "learning_rate": 0.00010969543749491138, "loss": 0.0, "step": 4215 }, { "epoch": 0.5228596208648246, "grad_norm": 5.958169276709668e-05, "learning_rate": 0.00010948016012985371, "loss": 0.0, "step": 4220 }, { "epoch": 0.5234791227852806, "grad_norm": 2.380023397563491e-05, "learning_rate": 0.00010926483842199022, "loss": 0.0, "step": 4225 }, { "epoch": 0.5240986247057365, "grad_norm": 2.7169469831278548e-05, "learning_rate": 0.00010904947337847365, "loss": 0.0, "step": 4230 }, { "epoch": 0.5247181266261925, "grad_norm": 7.567715510958806e-05, "learning_rate": 0.00010883406600665935, "loss": 0.0, "step": 4235 }, { "epoch": 0.5253376285466484, "grad_norm": 5.485690417117439e-05, "learning_rate": 0.0001086186173141007, "loss": 0.0, "step": 4240 }, { "epoch": 0.5259571304671045, "grad_norm": 2.633564690768253e-05, "learning_rate": 0.00010840312830854433, "loss": 0.0, "step": 4245 }, { "epoch": 0.5265766323875604, "grad_norm": 2.0063273041159846e-05, "learning_rate": 0.00010818759999792542, "loss": 0.0, "step": 4250 }, { "epoch": 0.5271961343080164, "grad_norm": 6.943461630726233e-05, "learning_rate": 0.00010797203339036308, "loss": 0.0, "step": 4255 }, { "epoch": 0.5278156362284723, "grad_norm": 4.200368857709691e-05, "learning_rate": 0.00010775642949415545, "loss": 0.0, "step": 4260 }, { "epoch": 0.5284351381489283, "grad_norm": 2.858910011127591e-05, "learning_rate": 0.00010754078931777511, "loss": 0.0, "step": 4265 }, { "epoch": 0.5290546400693842, "grad_norm": 3.3121483284048736e-05, "learning_rate": 0.00010732511386986439, "loss": 0.0, "step": 4270 }, { "epoch": 0.5296741419898402, "grad_norm": 4.26892947871238e-05, "learning_rate": 0.00010710940415923056, "loss": 0.0, "step": 4275 }, { "epoch": 0.5302936439102961, "grad_norm": 2.125727041857317e-05, "learning_rate": 0.00010689366119484113, "loss": 0.0, "step": 4280 }, { "epoch": 0.5309131458307521, "grad_norm": 2.9437300327117555e-05, "learning_rate": 0.0001066778859858192, "loss": 0.0, "step": 4285 }, { "epoch": 0.531532647751208, "grad_norm": 2.2908916434971616e-05, "learning_rate": 0.00010646207954143865, "loss": 0.0, "step": 4290 }, { "epoch": 0.532152149671664, "grad_norm": 1.7271877368330024e-05, "learning_rate": 0.00010624624287111952, "loss": 0.0, "step": 4295 }, { "epoch": 0.5327716515921199, "grad_norm": 3.081001705140807e-05, "learning_rate": 0.00010603037698442316, "loss": 0.0, "step": 4300 }, { "epoch": 0.5333911535125759, "grad_norm": 4.131594323553145e-05, "learning_rate": 0.00010581448289104758, "loss": 0.0, "step": 4305 }, { "epoch": 0.5340106554330318, "grad_norm": 4.30790641985368e-05, "learning_rate": 0.00010559856160082283, "loss": 0.0, "step": 4310 }, { "epoch": 0.5346301573534878, "grad_norm": 6.162387580843642e-05, "learning_rate": 0.00010538261412370602, "loss": 0.0, "step": 4315 }, { "epoch": 0.5352496592739437, "grad_norm": 3.478436337900348e-05, "learning_rate": 0.00010516664146977686, "loss": 0.0, "step": 4320 }, { "epoch": 0.5358691611943998, "grad_norm": 2.554922320996411e-05, "learning_rate": 0.00010495064464923282, "loss": 0.0, "step": 4325 }, { "epoch": 0.5364886631148557, "grad_norm": 1.9790757505688816e-05, "learning_rate": 0.00010473462467238431, "loss": 0.0, "step": 4330 }, { "epoch": 0.5371081650353116, "grad_norm": 3.475523044471629e-05, "learning_rate": 0.0001045185825496501, "loss": 0.0, "step": 4335 }, { "epoch": 0.5377276669557676, "grad_norm": 4.6152996219461784e-05, "learning_rate": 0.00010430251929155264, "loss": 0.0, "step": 4340 }, { "epoch": 0.5383471688762235, "grad_norm": 3.145092341583222e-05, "learning_rate": 0.00010408643590871312, "loss": 0.0, "step": 4345 }, { "epoch": 0.5389666707966795, "grad_norm": 2.5642913897172548e-05, "learning_rate": 0.00010387033341184687, "loss": 0.0, "step": 4350 }, { "epoch": 0.5395861727171354, "grad_norm": 3.310311149107292e-05, "learning_rate": 0.00010365421281175871, "loss": 0.0, "step": 4355 }, { "epoch": 0.5402056746375914, "grad_norm": 1.7756987290340476e-05, "learning_rate": 0.00010343807511933803, "loss": 0.0, "step": 4360 }, { "epoch": 0.5408251765580473, "grad_norm": 6.978329474804923e-05, "learning_rate": 0.00010322192134555429, "loss": 0.0, "step": 4365 }, { "epoch": 0.5414446784785033, "grad_norm": 3.050067061849404e-05, "learning_rate": 0.00010300575250145205, "loss": 0.0, "step": 4370 }, { "epoch": 0.5420641803989592, "grad_norm": 2.2259253455558792e-05, "learning_rate": 0.00010278956959814641, "loss": 0.0, "step": 4375 }, { "epoch": 0.5426836823194152, "grad_norm": 2.437258262943942e-05, "learning_rate": 0.00010257337364681827, "loss": 0.0, "step": 4380 }, { "epoch": 0.5433031842398711, "grad_norm": 3.643068703240715e-05, "learning_rate": 0.0001023571656587095, "loss": 0.0, "step": 4385 }, { "epoch": 0.5439226861603271, "grad_norm": 3.5682005545822904e-05, "learning_rate": 0.00010214094664511826, "loss": 0.0, "step": 4390 }, { "epoch": 0.544542188080783, "grad_norm": 5.700965630239807e-05, "learning_rate": 0.00010192471761739438, "loss": 0.0, "step": 4395 }, { "epoch": 0.545161690001239, "grad_norm": 3.905803896486759e-05, "learning_rate": 0.00010170847958693441, "loss": 0.0, "step": 4400 }, { "epoch": 0.5457811919216949, "grad_norm": 3.833401569863781e-05, "learning_rate": 0.00010149223356517712, "loss": 0.0, "step": 4405 }, { "epoch": 0.546400693842151, "grad_norm": 2.6655694455257617e-05, "learning_rate": 0.00010127598056359858, "loss": 0.0, "step": 4410 }, { "epoch": 0.5470201957626069, "grad_norm": 2.25376570597291e-05, "learning_rate": 0.00010105972159370748, "loss": 0.0, "step": 4415 }, { "epoch": 0.5476396976830629, "grad_norm": 6.224387470865622e-05, "learning_rate": 0.00010084345766704057, "loss": 0.0, "step": 4420 }, { "epoch": 0.5482591996035188, "grad_norm": 5.755699748988263e-05, "learning_rate": 0.00010062718979515764, "loss": 0.0, "step": 4425 }, { "epoch": 0.5488787015239748, "grad_norm": 1.8654092855285853e-05, "learning_rate": 0.00010041091898963699, "loss": 0.0, "step": 4430 }, { "epoch": 0.5494982034444307, "grad_norm": 8.854925545165315e-05, "learning_rate": 0.00010019464626207067, "loss": 0.0, "step": 4435 }, { "epoch": 0.5501177053648866, "grad_norm": 4.123678809264675e-05, "learning_rate": 9.997837262405963e-05, "loss": 0.0, "step": 4440 }, { "epoch": 0.5507372072853426, "grad_norm": 2.9076021746732295e-05, "learning_rate": 9.97620990872092e-05, "loss": 0.0, "step": 4445 }, { "epoch": 0.5513567092057985, "grad_norm": 2.1776711946586147e-05, "learning_rate": 9.954582666312415e-05, "loss": 0.0, "step": 4450 }, { "epoch": 0.5519762111262545, "grad_norm": 0.0001497041230322793, "learning_rate": 9.932955636340405e-05, "loss": 0.0, "step": 4455 }, { "epoch": 0.5525957130467104, "grad_norm": 4.056060060975142e-05, "learning_rate": 9.911328919963859e-05, "loss": 0.0, "step": 4460 }, { "epoch": 0.5532152149671664, "grad_norm": 4.316146078053862e-05, "learning_rate": 9.889702618340273e-05, "loss": 0.0, "step": 4465 }, { "epoch": 0.5538347168876223, "grad_norm": 8.655834244564176e-05, "learning_rate": 9.868076832625205e-05, "loss": 0.0, "step": 4470 }, { "epoch": 0.5544542188080783, "grad_norm": 2.588258939795196e-05, "learning_rate": 9.846451663971802e-05, "loss": 0.0, "step": 4475 }, { "epoch": 0.5550737207285342, "grad_norm": 3.0479772249236703e-05, "learning_rate": 9.824827213530322e-05, "loss": 0.0, "step": 4480 }, { "epoch": 0.5556932226489902, "grad_norm": 2.4817833036649972e-05, "learning_rate": 9.803203582447667e-05, "loss": 0.0, "step": 4485 }, { "epoch": 0.5563127245694461, "grad_norm": 8.99506121641025e-05, "learning_rate": 9.781580871866902e-05, "loss": 0.0, "step": 4490 }, { "epoch": 0.5569322264899021, "grad_norm": 4.3884010665351525e-05, "learning_rate": 9.759959182926788e-05, "loss": 0.0, "step": 4495 }, { "epoch": 0.557551728410358, "grad_norm": 6.079499871702865e-05, "learning_rate": 9.738338616761312e-05, "loss": 0.0, "step": 4500 }, { "epoch": 0.5581712303308141, "grad_norm": 6.665864930255339e-05, "learning_rate": 9.716719274499199e-05, "loss": 0.0, "step": 4505 }, { "epoch": 0.55879073225127, "grad_norm": 2.7024989321944304e-05, "learning_rate": 9.69510125726346e-05, "loss": 0.0, "step": 4510 }, { "epoch": 0.559410234171726, "grad_norm": 4.83097683172673e-05, "learning_rate": 9.673484666170904e-05, "loss": 0.0, "step": 4515 }, { "epoch": 0.5600297360921819, "grad_norm": 3.0619456083513796e-05, "learning_rate": 9.651869602331669e-05, "loss": 0.0, "step": 4520 }, { "epoch": 0.5606492380126379, "grad_norm": 3.632020525401458e-05, "learning_rate": 9.630256166848745e-05, "loss": 0.0, "step": 4525 }, { "epoch": 0.5612687399330938, "grad_norm": 2.334518649149686e-05, "learning_rate": 9.608644460817512e-05, "loss": 0.0, "step": 4530 }, { "epoch": 0.5618882418535498, "grad_norm": 2.2813013856648467e-05, "learning_rate": 9.587034585325265e-05, "loss": 0.0, "step": 4535 }, { "epoch": 0.5625077437740057, "grad_norm": 5.5647171393502504e-05, "learning_rate": 9.565426641450724e-05, "loss": 0.0, "step": 4540 }, { "epoch": 0.5631272456944616, "grad_norm": 3.877573908539489e-05, "learning_rate": 9.543820730263582e-05, "loss": 0.0, "step": 4545 }, { "epoch": 0.5637467476149176, "grad_norm": 2.1799072783323936e-05, "learning_rate": 9.522216952824023e-05, "loss": 0.0, "step": 4550 }, { "epoch": 0.5643662495353735, "grad_norm": 4.537841959972866e-05, "learning_rate": 9.50061541018225e-05, "loss": 0.0, "step": 4555 }, { "epoch": 0.5649857514558295, "grad_norm": 3.0272765798144974e-05, "learning_rate": 9.479016203378013e-05, "loss": 0.0, "step": 4560 }, { "epoch": 0.5656052533762854, "grad_norm": 2.028520066232886e-05, "learning_rate": 9.457419433440136e-05, "loss": 0.0, "step": 4565 }, { "epoch": 0.5662247552967414, "grad_norm": 2.1410764020401984e-05, "learning_rate": 9.435825201386044e-05, "loss": 0.0, "step": 4570 }, { "epoch": 0.5668442572171973, "grad_norm": 2.7849668185808696e-05, "learning_rate": 9.414233608221296e-05, "loss": 0.0, "step": 4575 }, { "epoch": 0.5674637591376533, "grad_norm": 4.10931279475335e-05, "learning_rate": 9.392644754939097e-05, "loss": 0.0, "step": 4580 }, { "epoch": 0.5680832610581092, "grad_norm": 0.0001944853866007179, "learning_rate": 9.371058742519853e-05, "loss": 0.0, "step": 4585 }, { "epoch": 0.5687027629785653, "grad_norm": 0.00010126090637641028, "learning_rate": 9.349475671930665e-05, "loss": 0.0, "step": 4590 }, { "epoch": 0.5693222648990212, "grad_norm": 2.262040834466461e-05, "learning_rate": 9.327895644124883e-05, "loss": 0.0, "step": 4595 }, { "epoch": 0.5699417668194772, "grad_norm": 6.350939656840637e-05, "learning_rate": 9.306318760041625e-05, "loss": 0.0, "step": 4600 }, { "epoch": 0.5705612687399331, "grad_norm": 2.1397831005742773e-05, "learning_rate": 9.284745120605303e-05, "loss": 0.0, "step": 4605 }, { "epoch": 0.5711807706603891, "grad_norm": 2.799762660288252e-05, "learning_rate": 9.263174826725145e-05, "loss": 0.0, "step": 4610 }, { "epoch": 0.571800272580845, "grad_norm": 2.1157993614906445e-05, "learning_rate": 9.241607979294745e-05, "loss": 0.0, "step": 4615 }, { "epoch": 0.572419774501301, "grad_norm": 4.520654692896642e-05, "learning_rate": 9.220044679191563e-05, "loss": 0.0, "step": 4620 }, { "epoch": 0.5730392764217569, "grad_norm": 1.635063199501019e-05, "learning_rate": 9.198485027276476e-05, "loss": 0.0, "step": 4625 }, { "epoch": 0.5736587783422129, "grad_norm": 1.7665701307123527e-05, "learning_rate": 9.176929124393292e-05, "loss": 0.0, "step": 4630 }, { "epoch": 0.5742782802626688, "grad_norm": 2.6733794584288262e-05, "learning_rate": 9.155377071368281e-05, "loss": 0.0, "step": 4635 }, { "epoch": 0.5748977821831248, "grad_norm": 2.3726704966975376e-05, "learning_rate": 9.133828969009713e-05, "loss": 0.0, "step": 4640 }, { "epoch": 0.5755172841035807, "grad_norm": 1.472628446208546e-05, "learning_rate": 9.112284918107374e-05, "loss": 0.0, "step": 4645 }, { "epoch": 0.5761367860240367, "grad_norm": 3.294445195933804e-05, "learning_rate": 9.090745019432098e-05, "loss": 0.0, "step": 4650 }, { "epoch": 0.5767562879444926, "grad_norm": 6.366107845678926e-05, "learning_rate": 9.069209373735302e-05, "loss": 0.0, "step": 4655 }, { "epoch": 0.5773757898649485, "grad_norm": 2.306202077306807e-05, "learning_rate": 9.047678081748506e-05, "loss": 0.0, "step": 4660 }, { "epoch": 0.5779952917854045, "grad_norm": 5.742096982430667e-05, "learning_rate": 9.026151244182867e-05, "loss": 0.0, "step": 4665 }, { "epoch": 0.5786147937058604, "grad_norm": 3.441128137637861e-05, "learning_rate": 9.00462896172871e-05, "loss": 0.0, "step": 4670 }, { "epoch": 0.5792342956263165, "grad_norm": 8.708019595360383e-05, "learning_rate": 8.983111335055046e-05, "loss": 0.0, "step": 4675 }, { "epoch": 0.5798537975467724, "grad_norm": 1.6010872059268877e-05, "learning_rate": 8.961598464809121e-05, "loss": 0.0, "step": 4680 }, { "epoch": 0.5804732994672284, "grad_norm": 2.57802676060237e-05, "learning_rate": 8.94009045161592e-05, "loss": 0.0, "step": 4685 }, { "epoch": 0.5810928013876843, "grad_norm": 2.0934197891619988e-05, "learning_rate": 8.918587396077716e-05, "loss": 0.0, "step": 4690 }, { "epoch": 0.5817123033081403, "grad_norm": 9.115342982113361e-05, "learning_rate": 8.897089398773595e-05, "loss": 0.0, "step": 4695 }, { "epoch": 0.5823318052285962, "grad_norm": 2.3816579414415173e-05, "learning_rate": 8.875596560258976e-05, "loss": 0.0, "step": 4700 }, { "epoch": 0.5829513071490522, "grad_norm": 1.8688055206439458e-05, "learning_rate": 8.85410898106516e-05, "loss": 0.0, "step": 4705 }, { "epoch": 0.5835708090695081, "grad_norm": 1.9487793906591833e-05, "learning_rate": 8.832626761698835e-05, "loss": 0.0, "step": 4710 }, { "epoch": 0.5841903109899641, "grad_norm": 4.994100163457915e-05, "learning_rate": 8.811150002641627e-05, "loss": 0.0, "step": 4715 }, { "epoch": 0.58480981291042, "grad_norm": 3.820952770183794e-05, "learning_rate": 8.789678804349617e-05, "loss": 0.0, "step": 4720 }, { "epoch": 0.585429314830876, "grad_norm": 2.8248798116692342e-05, "learning_rate": 8.768213267252882e-05, "loss": 0.0, "step": 4725 }, { "epoch": 0.5860488167513319, "grad_norm": 8.127695764414966e-05, "learning_rate": 8.746753491755011e-05, "loss": 0.0, "step": 4730 }, { "epoch": 0.5866683186717879, "grad_norm": 2.532659527787473e-05, "learning_rate": 8.725299578232652e-05, "loss": 0.0, "step": 4735 }, { "epoch": 0.5872878205922438, "grad_norm": 2.1624024157063104e-05, "learning_rate": 8.703851627035028e-05, "loss": 0.0, "step": 4740 }, { "epoch": 0.5879073225126998, "grad_norm": 4.522816016105935e-05, "learning_rate": 8.682409738483473e-05, "loss": 0.0, "step": 4745 }, { "epoch": 0.5885268244331557, "grad_norm": 3.7423822504933923e-05, "learning_rate": 8.66097401287097e-05, "loss": 0.0, "step": 4750 }, { "epoch": 0.5891463263536117, "grad_norm": 2.1225565433269367e-05, "learning_rate": 8.639544550461668e-05, "loss": 0.0, "step": 4755 }, { "epoch": 0.5897658282740676, "grad_norm": 3.722541805473156e-05, "learning_rate": 8.618121451490427e-05, "loss": 0.0, "step": 4760 }, { "epoch": 0.5903853301945235, "grad_norm": 1.6819964002934285e-05, "learning_rate": 8.596704816162337e-05, "loss": 0.0, "step": 4765 }, { "epoch": 0.5910048321149796, "grad_norm": 1.9097911717835814e-05, "learning_rate": 8.575294744652254e-05, "loss": 0.0, "step": 4770 }, { "epoch": 0.5916243340354355, "grad_norm": 3.8917958590900525e-05, "learning_rate": 8.553891337104338e-05, "loss": 0.0, "step": 4775 }, { "epoch": 0.5922438359558915, "grad_norm": 3.851741712423973e-05, "learning_rate": 8.532494693631576e-05, "loss": 0.0, "step": 4780 }, { "epoch": 0.5928633378763474, "grad_norm": 1.76676367118489e-05, "learning_rate": 8.511104914315313e-05, "loss": 0.0, "step": 4785 }, { "epoch": 0.5934828397968034, "grad_norm": 2.2717445972375572e-05, "learning_rate": 8.489722099204794e-05, "loss": 0.0, "step": 4790 }, { "epoch": 0.5941023417172593, "grad_norm": 2.773566848190967e-05, "learning_rate": 8.468346348316682e-05, "loss": 0.0, "step": 4795 }, { "epoch": 0.5947218436377153, "grad_norm": 2.2611611711909063e-05, "learning_rate": 8.446977761634605e-05, "loss": 0.0, "step": 4800 }, { "epoch": 0.5953413455581712, "grad_norm": 2.0034345652675256e-05, "learning_rate": 8.425616439108678e-05, "loss": 0.0, "step": 4805 }, { "epoch": 0.5959608474786272, "grad_norm": 8.130742935463786e-05, "learning_rate": 8.404262480655032e-05, "loss": 0.0, "step": 4810 }, { "epoch": 0.5965803493990831, "grad_norm": 3.13762211590074e-05, "learning_rate": 8.382915986155367e-05, "loss": 0.0, "step": 4815 }, { "epoch": 0.5971998513195391, "grad_norm": 2.2441889086621813e-05, "learning_rate": 8.361577055456455e-05, "loss": 0.0, "step": 4820 }, { "epoch": 0.597819353239995, "grad_norm": 1.7559967091074213e-05, "learning_rate": 8.340245788369697e-05, "loss": 0.0, "step": 4825 }, { "epoch": 0.598438855160451, "grad_norm": 4.6539171307813376e-05, "learning_rate": 8.318922284670652e-05, "loss": 0.0, "step": 4830 }, { "epoch": 0.5990583570809069, "grad_norm": 2.9008893761783838e-05, "learning_rate": 8.297606644098563e-05, "loss": 0.0, "step": 4835 }, { "epoch": 0.599677859001363, "grad_norm": 1.4970169104344677e-05, "learning_rate": 8.276298966355887e-05, "loss": 0.0, "step": 4840 }, { "epoch": 0.6002973609218188, "grad_norm": 1.6208659872063436e-05, "learning_rate": 8.254999351107845e-05, "loss": 0.0, "step": 4845 }, { "epoch": 0.6009168628422749, "grad_norm": 2.8873602786916308e-05, "learning_rate": 8.233707897981941e-05, "loss": 0.0, "step": 4850 }, { "epoch": 0.6015363647627308, "grad_norm": 2.295142985531129e-05, "learning_rate": 8.212424706567501e-05, "loss": 0.0, "step": 4855 }, { "epoch": 0.6021558666831868, "grad_norm": 5.128447082825005e-05, "learning_rate": 8.19114987641521e-05, "loss": 0.0, "step": 4860 }, { "epoch": 0.6027753686036427, "grad_norm": 2.4551838578190655e-05, "learning_rate": 8.169883507036643e-05, "loss": 0.0, "step": 4865 }, { "epoch": 0.6033948705240986, "grad_norm": 2.460844916640781e-05, "learning_rate": 8.148625697903797e-05, "loss": 0.0, "step": 4870 }, { "epoch": 0.6040143724445546, "grad_norm": 1.975913437490817e-05, "learning_rate": 8.127376548448636e-05, "loss": 0.0, "step": 4875 }, { "epoch": 0.6046338743650105, "grad_norm": 1.6298903574352153e-05, "learning_rate": 8.106136158062612e-05, "loss": 0.0, "step": 4880 }, { "epoch": 0.6052533762854665, "grad_norm": 3.814012961811386e-05, "learning_rate": 8.08490462609621e-05, "loss": 0.0, "step": 4885 }, { "epoch": 0.6058728782059224, "grad_norm": 4.431715569808148e-05, "learning_rate": 8.063682051858483e-05, "loss": 0.0, "step": 4890 }, { "epoch": 0.6064923801263784, "grad_norm": 1.6090447388705797e-05, "learning_rate": 8.042468534616578e-05, "loss": 0.0, "step": 4895 }, { "epoch": 0.6071118820468343, "grad_norm": 2.5211544198100455e-05, "learning_rate": 8.021264173595287e-05, "loss": 0.0, "step": 4900 }, { "epoch": 0.6077313839672903, "grad_norm": 2.568330819485709e-05, "learning_rate": 8.000069067976567e-05, "loss": 0.0, "step": 4905 }, { "epoch": 0.6083508858877462, "grad_norm": 7.015263690846041e-05, "learning_rate": 7.978883316899085e-05, "loss": 0.0, "step": 4910 }, { "epoch": 0.6089703878082022, "grad_norm": 3.798906254814938e-05, "learning_rate": 7.957707019457757e-05, "loss": 0.0, "step": 4915 }, { "epoch": 0.6095898897286581, "grad_norm": 2.7672513169818558e-05, "learning_rate": 7.936540274703277e-05, "loss": 0.0, "step": 4920 }, { "epoch": 0.6102093916491141, "grad_norm": 2.9601456844829954e-05, "learning_rate": 7.915383181641657e-05, "loss": 0.0, "step": 4925 }, { "epoch": 0.61082889356957, "grad_norm": 4.726227780338377e-05, "learning_rate": 7.894235839233766e-05, "loss": 0.0, "step": 4930 }, { "epoch": 0.611448395490026, "grad_norm": 1.5505715055041946e-05, "learning_rate": 7.873098346394858e-05, "loss": 0.0, "step": 4935 }, { "epoch": 0.612067897410482, "grad_norm": 1.590794272487983e-05, "learning_rate": 7.85197080199413e-05, "loss": 0.0, "step": 4940 }, { "epoch": 0.612687399330938, "grad_norm": 6.30927097517997e-05, "learning_rate": 7.830853304854231e-05, "loss": 0.0, "step": 4945 }, { "epoch": 0.6133069012513939, "grad_norm": 4.6852823288645595e-05, "learning_rate": 7.80974595375082e-05, "loss": 0.0, "step": 4950 }, { "epoch": 0.6139264031718499, "grad_norm": 5.8120836911257356e-05, "learning_rate": 7.788648847412106e-05, "loss": 0.0, "step": 4955 }, { "epoch": 0.6145459050923058, "grad_norm": 1.7644651961745694e-05, "learning_rate": 7.767562084518368e-05, "loss": 0.0, "step": 4960 }, { "epoch": 0.6151654070127618, "grad_norm": 0.00015122753393370658, "learning_rate": 7.746485763701508e-05, "loss": 0.0, "step": 4965 }, { "epoch": 0.6157849089332177, "grad_norm": 0.00017790192214306444, "learning_rate": 7.725419983544591e-05, "loss": 0.0, "step": 4970 }, { "epoch": 0.6164044108536737, "grad_norm": 4.4161773985251784e-05, "learning_rate": 7.704364842581368e-05, "loss": 0.0, "step": 4975 }, { "epoch": 0.6170239127741296, "grad_norm": 2.427816070849076e-05, "learning_rate": 7.68332043929584e-05, "loss": 0.0, "step": 4980 }, { "epoch": 0.6176434146945855, "grad_norm": 2.15327618207084e-05, "learning_rate": 7.662286872121768e-05, "loss": 0.0, "step": 4985 }, { "epoch": 0.6182629166150415, "grad_norm": 3.844231105176732e-05, "learning_rate": 7.641264239442239e-05, "loss": 0.0, "step": 4990 }, { "epoch": 0.6188824185354974, "grad_norm": 1.5090513443283271e-05, "learning_rate": 7.620252639589194e-05, "loss": 0.0, "step": 4995 }, { "epoch": 0.6195019204559534, "grad_norm": 2.1894662495469674e-05, "learning_rate": 7.59925217084296e-05, "loss": 0.0, "step": 5000 }, { "epoch": 0.6201214223764093, "grad_norm": 1.6281534044537693e-05, "learning_rate": 7.578262931431805e-05, "loss": 0.0, "step": 5005 }, { "epoch": 0.6207409242968653, "grad_norm": 1.305163914366858e-05, "learning_rate": 7.557285019531479e-05, "loss": 0.0, "step": 5010 }, { "epoch": 0.6213604262173212, "grad_norm": 3.0064267775742337e-05, "learning_rate": 7.536318533264732e-05, "loss": 0.0, "step": 5015 }, { "epoch": 0.6219799281377772, "grad_norm": 2.5210109015461057e-05, "learning_rate": 7.51536357070089e-05, "loss": 0.0, "step": 5020 }, { "epoch": 0.6225994300582331, "grad_norm": 3.949186429963447e-05, "learning_rate": 7.494420229855363e-05, "loss": 0.0, "step": 5025 }, { "epoch": 0.6232189319786892, "grad_norm": 1.8304635887034237e-05, "learning_rate": 7.473488608689206e-05, "loss": 0.0, "step": 5030 }, { "epoch": 0.6238384338991451, "grad_norm": 1.7849299183581024e-05, "learning_rate": 7.452568805108661e-05, "loss": 0.0, "step": 5035 }, { "epoch": 0.6244579358196011, "grad_norm": 5.022202822146937e-05, "learning_rate": 7.431660916964691e-05, "loss": 0.0, "step": 5040 }, { "epoch": 0.625077437740057, "grad_norm": 5.302611316437833e-05, "learning_rate": 7.410765042052517e-05, "loss": 0.0, "step": 5045 }, { "epoch": 0.625696939660513, "grad_norm": 2.573677738837432e-05, "learning_rate": 7.389881278111181e-05, "loss": 0.0, "step": 5050 }, { "epoch": 0.6263164415809689, "grad_norm": 2.4502909582224675e-05, "learning_rate": 7.369009722823071e-05, "loss": 0.0, "step": 5055 }, { "epoch": 0.6269359435014249, "grad_norm": 1.5648043699911796e-05, "learning_rate": 7.348150473813471e-05, "loss": 0.0, "step": 5060 }, { "epoch": 0.6275554454218808, "grad_norm": 1.631654413358774e-05, "learning_rate": 7.327303628650104e-05, "loss": 0.0, "step": 5065 }, { "epoch": 0.6281749473423368, "grad_norm": 1.9882971173501574e-05, "learning_rate": 7.306469284842669e-05, "loss": 0.0, "step": 5070 }, { "epoch": 0.6287944492627927, "grad_norm": 3.006104270752985e-05, "learning_rate": 7.285647539842403e-05, "loss": 0.0, "step": 5075 }, { "epoch": 0.6294139511832487, "grad_norm": 2.433635927445721e-05, "learning_rate": 7.264838491041601e-05, "loss": 0.0, "step": 5080 }, { "epoch": 0.6300334531037046, "grad_norm": 1.4783213373448234e-05, "learning_rate": 7.244042235773176e-05, "loss": 0.0, "step": 5085 }, { "epoch": 0.6306529550241605, "grad_norm": 7.115118933143094e-05, "learning_rate": 7.223258871310204e-05, "loss": 0.0, "step": 5090 }, { "epoch": 0.6312724569446165, "grad_norm": 1.3765422409051098e-05, "learning_rate": 7.202488494865459e-05, "loss": 0.0, "step": 5095 }, { "epoch": 0.6318919588650724, "grad_norm": 1.1565353815967683e-05, "learning_rate": 7.181731203590972e-05, "loss": 0.0, "step": 5100 }, { "epoch": 0.6325114607855284, "grad_norm": 4.458922194316983e-05, "learning_rate": 7.16098709457756e-05, "loss": 0.0, "step": 5105 }, { "epoch": 0.6331309627059843, "grad_norm": 1.8716898921411484e-05, "learning_rate": 7.140256264854386e-05, "loss": 0.0, "step": 5110 }, { "epoch": 0.6337504646264404, "grad_norm": 2.9838854970876127e-05, "learning_rate": 7.119538811388497e-05, "loss": 0.0, "step": 5115 }, { "epoch": 0.6343699665468963, "grad_norm": 8.412452007178217e-05, "learning_rate": 7.098834831084385e-05, "loss": 0.0, "step": 5120 }, { "epoch": 0.6349894684673523, "grad_norm": 4.87436554976739e-05, "learning_rate": 7.078144420783508e-05, "loss": 0.0, "step": 5125 }, { "epoch": 0.6356089703878082, "grad_norm": 2.619936640257947e-05, "learning_rate": 7.057467677263853e-05, "loss": 0.0, "step": 5130 }, { "epoch": 0.6362284723082642, "grad_norm": 5.217998113948852e-05, "learning_rate": 7.036804697239491e-05, "loss": 0.0, "step": 5135 }, { "epoch": 0.6368479742287201, "grad_norm": 3.804305015364662e-05, "learning_rate": 7.016155577360107e-05, "loss": 0.0, "step": 5140 }, { "epoch": 0.6374674761491761, "grad_norm": 2.7268941266811453e-05, "learning_rate": 6.995520414210554e-05, "loss": 0.0, "step": 5145 }, { "epoch": 0.638086978069632, "grad_norm": 1.412182336935075e-05, "learning_rate": 6.974899304310414e-05, "loss": 0.0, "step": 5150 }, { "epoch": 0.638706479990088, "grad_norm": 2.3733120542601682e-05, "learning_rate": 6.954292344113522e-05, "loss": 0.0, "step": 5155 }, { "epoch": 0.6393259819105439, "grad_norm": 1.289436113438569e-05, "learning_rate": 6.933699630007544e-05, "loss": 0.0, "step": 5160 }, { "epoch": 0.6399454838309999, "grad_norm": 2.1861729692318477e-05, "learning_rate": 6.913121258313497e-05, "loss": 0.0, "step": 5165 }, { "epoch": 0.6405649857514558, "grad_norm": 3.505976201267913e-05, "learning_rate": 6.892557325285322e-05, "loss": 0.0, "step": 5170 }, { "epoch": 0.6411844876719118, "grad_norm": 1.7817852494772524e-05, "learning_rate": 6.872007927109418e-05, "loss": 0.0, "step": 5175 }, { "epoch": 0.6418039895923677, "grad_norm": 1.8512973838369362e-05, "learning_rate": 6.851473159904205e-05, "loss": 0.0, "step": 5180 }, { "epoch": 0.6424234915128237, "grad_norm": 2.638439036672935e-05, "learning_rate": 6.830953119719657e-05, "loss": 0.0, "step": 5185 }, { "epoch": 0.6430429934332796, "grad_norm": 4.453474684851244e-05, "learning_rate": 6.810447902536874e-05, "loss": 0.0, "step": 5190 }, { "epoch": 0.6436624953537355, "grad_norm": 1.321329727943521e-05, "learning_rate": 6.789957604267616e-05, "loss": 0.0, "step": 5195 }, { "epoch": 0.6442819972741916, "grad_norm": 3.259681034251116e-05, "learning_rate": 6.769482320753865e-05, "loss": 0.0, "step": 5200 }, { "epoch": 0.6449014991946475, "grad_norm": 2.3009086362435482e-05, "learning_rate": 6.74902214776737e-05, "loss": 0.0, "step": 5205 }, { "epoch": 0.6455210011151035, "grad_norm": 3.909249426214956e-05, "learning_rate": 6.7285771810092e-05, "loss": 0.0, "step": 5210 }, { "epoch": 0.6461405030355594, "grad_norm": 1.7565918824402615e-05, "learning_rate": 6.708147516109303e-05, "loss": 0.0, "step": 5215 }, { "epoch": 0.6467600049560154, "grad_norm": 1.6449575923616067e-05, "learning_rate": 6.687733248626046e-05, "loss": 0.0, "step": 5220 }, { "epoch": 0.6473795068764713, "grad_norm": 3.828321132459678e-05, "learning_rate": 6.667334474045784e-05, "loss": 0.0, "step": 5225 }, { "epoch": 0.6479990087969273, "grad_norm": 2.2801419618190266e-05, "learning_rate": 6.6469512877824e-05, "loss": 0.0, "step": 5230 }, { "epoch": 0.6486185107173832, "grad_norm": 1.53738019434968e-05, "learning_rate": 6.626583785176861e-05, "loss": 0.0, "step": 5235 }, { "epoch": 0.6492380126378392, "grad_norm": 4.638611790142022e-05, "learning_rate": 6.606232061496785e-05, "loss": 0.0, "step": 5240 }, { "epoch": 0.6498575145582951, "grad_norm": 5.008360312785953e-05, "learning_rate": 6.585896211935976e-05, "loss": 0.0, "step": 5245 }, { "epoch": 0.6504770164787511, "grad_norm": 1.568288280395791e-05, "learning_rate": 6.565576331613988e-05, "loss": 0.0, "step": 5250 }, { "epoch": 0.651096518399207, "grad_norm": 1.528701432107482e-05, "learning_rate": 6.545272515575686e-05, "loss": 0.0, "step": 5255 }, { "epoch": 0.651716020319663, "grad_norm": 2.159470386686735e-05, "learning_rate": 6.524984858790791e-05, "loss": 0.0, "step": 5260 }, { "epoch": 0.6523355222401189, "grad_norm": 2.3058144506649114e-05, "learning_rate": 6.504713456153439e-05, "loss": 0.0, "step": 5265 }, { "epoch": 0.6529550241605749, "grad_norm": 1.796290962374769e-05, "learning_rate": 6.484458402481743e-05, "loss": 0.0, "step": 5270 }, { "epoch": 0.6535745260810308, "grad_norm": 1.730887743178755e-05, "learning_rate": 6.46421979251734e-05, "loss": 0.0, "step": 5275 }, { "epoch": 0.6541940280014868, "grad_norm": 7.750130316708237e-05, "learning_rate": 6.443997720924957e-05, "loss": 0.0, "step": 5280 }, { "epoch": 0.6548135299219427, "grad_norm": 2.10803555091843e-05, "learning_rate": 6.423792282291963e-05, "loss": 0.0, "step": 5285 }, { "epoch": 0.6554330318423988, "grad_norm": 1.5896222976152785e-05, "learning_rate": 6.403603571127921e-05, "loss": 0.0, "step": 5290 }, { "epoch": 0.6560525337628547, "grad_norm": 2.5902440029312856e-05, "learning_rate": 6.383431681864163e-05, "loss": 0.0, "step": 5295 }, { "epoch": 0.6566720356833107, "grad_norm": 1.923191848618444e-05, "learning_rate": 6.363276708853331e-05, "loss": 0.0, "step": 5300 }, { "epoch": 0.6572915376037666, "grad_norm": 1.6589938240940683e-05, "learning_rate": 6.34313874636894e-05, "loss": 0.0, "step": 5305 }, { "epoch": 0.6579110395242225, "grad_norm": 9.021166624734178e-05, "learning_rate": 6.323017888604949e-05, "loss": 0.0, "step": 5310 }, { "epoch": 0.6585305414446785, "grad_norm": 1.87494824785972e-05, "learning_rate": 6.3029142296753e-05, "loss": 0.0, "step": 5315 }, { "epoch": 0.6591500433651344, "grad_norm": 2.7435238735051826e-05, "learning_rate": 6.282827863613499e-05, "loss": 0.0, "step": 5320 }, { "epoch": 0.6597695452855904, "grad_norm": 2.4243854568339884e-05, "learning_rate": 6.262758884372155e-05, "loss": 0.0, "step": 5325 }, { "epoch": 0.6603890472060463, "grad_norm": 3.0757742933928967e-05, "learning_rate": 6.24270738582256e-05, "loss": 0.0, "step": 5330 }, { "epoch": 0.6610085491265023, "grad_norm": 0.00041466180118732154, "learning_rate": 6.222673461754236e-05, "loss": 0.0, "step": 5335 }, { "epoch": 0.6616280510469582, "grad_norm": 1.814751340134535e-05, "learning_rate": 6.202657205874505e-05, "loss": 0.0, "step": 5340 }, { "epoch": 0.6622475529674142, "grad_norm": 4.896555037703365e-05, "learning_rate": 6.182658711808044e-05, "loss": 0.0, "step": 5345 }, { "epoch": 0.6628670548878701, "grad_norm": 2.8569784262799658e-05, "learning_rate": 6.162678073096456e-05, "loss": 0.0, "step": 5350 }, { "epoch": 0.6634865568083261, "grad_norm": 2.3902726752567105e-05, "learning_rate": 6.142715383197818e-05, "loss": 0.0, "step": 5355 }, { "epoch": 0.664106058728782, "grad_norm": 3.2394811569247395e-05, "learning_rate": 6.122770735486262e-05, "loss": 0.0, "step": 5360 }, { "epoch": 0.664725560649238, "grad_norm": 2.724842488532886e-05, "learning_rate": 6.1028442232515214e-05, "loss": 0.0, "step": 5365 }, { "epoch": 0.665345062569694, "grad_norm": 1.589201929164119e-05, "learning_rate": 6.082935939698504e-05, "loss": 0.0, "step": 5370 }, { "epoch": 0.66596456449015, "grad_norm": 1.3321290680323727e-05, "learning_rate": 6.063045977946861e-05, "loss": 0.0, "step": 5375 }, { "epoch": 0.6665840664106059, "grad_norm": 2.5624643967603333e-05, "learning_rate": 6.0431744310305336e-05, "loss": 0.0, "step": 5380 }, { "epoch": 0.6672035683310619, "grad_norm": 1.9941642676712945e-05, "learning_rate": 6.023321391897335e-05, "loss": 0.0, "step": 5385 }, { "epoch": 0.6678230702515178, "grad_norm": 2.964078521472402e-05, "learning_rate": 6.003486953408512e-05, "loss": 0.0, "step": 5390 }, { "epoch": 0.6684425721719738, "grad_norm": 4.425967563292943e-05, "learning_rate": 5.9836712083382996e-05, "loss": 0.0, "step": 5395 }, { "epoch": 0.6690620740924297, "grad_norm": 1.7877397112897597e-05, "learning_rate": 5.963874249373507e-05, "loss": 0.0, "step": 5400 }, { "epoch": 0.6696815760128857, "grad_norm": 2.824926377797965e-05, "learning_rate": 5.944096169113063e-05, "loss": 0.0, "step": 5405 }, { "epoch": 0.6703010779333416, "grad_norm": 6.005081377224997e-05, "learning_rate": 5.924337060067591e-05, "loss": 0.0, "step": 5410 }, { "epoch": 0.6709205798537975, "grad_norm": 2.1254591047181748e-05, "learning_rate": 5.904597014658992e-05, "loss": 0.0, "step": 5415 }, { "epoch": 0.6715400817742535, "grad_norm": 2.9084798370604403e-05, "learning_rate": 5.884876125219987e-05, "loss": 0.0, "step": 5420 }, { "epoch": 0.6721595836947094, "grad_norm": 1.9424251149757765e-05, "learning_rate": 5.865174483993696e-05, "loss": 0.0, "step": 5425 }, { "epoch": 0.6727790856151654, "grad_norm": 1.8962846297654323e-05, "learning_rate": 5.8454921831332086e-05, "loss": 0.0, "step": 5430 }, { "epoch": 0.6733985875356213, "grad_norm": 2.9507298677344806e-05, "learning_rate": 5.825829314701156e-05, "loss": 0.0, "step": 5435 }, { "epoch": 0.6740180894560773, "grad_norm": 1.6617495930404402e-05, "learning_rate": 5.806185970669267e-05, "loss": 0.0, "step": 5440 }, { "epoch": 0.6746375913765332, "grad_norm": 2.645435233716853e-05, "learning_rate": 5.78656224291795e-05, "loss": 0.0, "step": 5445 }, { "epoch": 0.6752570932969892, "grad_norm": 1.6482068531331606e-05, "learning_rate": 5.76695822323586e-05, "loss": 0.0, "step": 5450 }, { "epoch": 0.6758765952174451, "grad_norm": 6.572165148099884e-05, "learning_rate": 5.7473740033194734e-05, "loss": 0.0, "step": 5455 }, { "epoch": 0.6764960971379012, "grad_norm": 1.2028600394842215e-05, "learning_rate": 5.7278096747726485e-05, "loss": 0.0, "step": 5460 }, { "epoch": 0.677115599058357, "grad_norm": 5.625227277050726e-05, "learning_rate": 5.7082653291062015e-05, "loss": 0.0, "step": 5465 }, { "epoch": 0.6777351009788131, "grad_norm": 2.0043795302626677e-05, "learning_rate": 5.688741057737485e-05, "loss": 0.0, "step": 5470 }, { "epoch": 0.678354602899269, "grad_norm": 2.964249324577395e-05, "learning_rate": 5.6692369519899514e-05, "loss": 0.0, "step": 5475 }, { "epoch": 0.678974104819725, "grad_norm": 7.613992056576535e-05, "learning_rate": 5.6497531030927386e-05, "loss": 0.0, "step": 5480 }, { "epoch": 0.6795936067401809, "grad_norm": 1.6747364497859962e-05, "learning_rate": 5.630289602180222e-05, "loss": 0.0, "step": 5485 }, { "epoch": 0.6802131086606369, "grad_norm": 3.6422276025405154e-05, "learning_rate": 5.610846540291609e-05, "loss": 0.0, "step": 5490 }, { "epoch": 0.6808326105810928, "grad_norm": 9.778475941857323e-05, "learning_rate": 5.5914240083705006e-05, "loss": 0.0, "step": 5495 }, { "epoch": 0.6814521125015488, "grad_norm": 2.018391569436062e-05, "learning_rate": 5.572022097264473e-05, "loss": 0.0, "step": 5500 }, { "epoch": 0.6820716144220047, "grad_norm": 1.5029848327685613e-05, "learning_rate": 5.552640897724646e-05, "loss": 0.0, "step": 5505 }, { "epoch": 0.6826911163424607, "grad_norm": 2.665965075721033e-05, "learning_rate": 5.5332805004052714e-05, "loss": 0.0, "step": 5510 }, { "epoch": 0.6833106182629166, "grad_norm": 4.424316284712404e-05, "learning_rate": 5.513940995863291e-05, "loss": 0.0, "step": 5515 }, { "epoch": 0.6839301201833725, "grad_norm": 3.862302401103079e-05, "learning_rate": 5.494622474557927e-05, "loss": 0.0, "step": 5520 }, { "epoch": 0.6845496221038285, "grad_norm": 2.7808278900920413e-05, "learning_rate": 5.475325026850248e-05, "loss": 0.0, "step": 5525 }, { "epoch": 0.6851691240242844, "grad_norm": 2.1462585209519602e-05, "learning_rate": 5.456048743002755e-05, "loss": 0.0, "step": 5530 }, { "epoch": 0.6857886259447404, "grad_norm": 1.6656968000461347e-05, "learning_rate": 5.4367937131789646e-05, "loss": 0.0, "step": 5535 }, { "epoch": 0.6864081278651963, "grad_norm": 1.9248189346399158e-05, "learning_rate": 5.417560027442969e-05, "loss": 0.0, "step": 5540 }, { "epoch": 0.6870276297856523, "grad_norm": 4.226673627272248e-05, "learning_rate": 5.398347775759029e-05, "loss": 0.0, "step": 5545 }, { "epoch": 0.6876471317061082, "grad_norm": 2.1627163732773624e-05, "learning_rate": 5.379157047991148e-05, "loss": 0.0, "step": 5550 }, { "epoch": 0.6882666336265643, "grad_norm": 1.9537486878107302e-05, "learning_rate": 5.359987933902648e-05, "loss": 0.0, "step": 5555 }, { "epoch": 0.6888861355470202, "grad_norm": 1.3555930308939423e-05, "learning_rate": 5.340840523155769e-05, "loss": 0.0, "step": 5560 }, { "epoch": 0.6895056374674762, "grad_norm": 2.7592664991971105e-05, "learning_rate": 5.3217149053112194e-05, "loss": 0.0, "step": 5565 }, { "epoch": 0.6901251393879321, "grad_norm": 2.1695079340133816e-05, "learning_rate": 5.302611169827783e-05, "loss": 0.0, "step": 5570 }, { "epoch": 0.6907446413083881, "grad_norm": 1.786207576515153e-05, "learning_rate": 5.28352940606188e-05, "loss": 0.0, "step": 5575 }, { "epoch": 0.691364143228844, "grad_norm": 3.0048186090425588e-05, "learning_rate": 5.264469703267172e-05, "loss": 0.0, "step": 5580 }, { "epoch": 0.6919836451493, "grad_norm": 1.3460218724503648e-05, "learning_rate": 5.245432150594118e-05, "loss": 0.0, "step": 5585 }, { "epoch": 0.6926031470697559, "grad_norm": 2.2985715986578725e-05, "learning_rate": 5.226416837089589e-05, "loss": 0.0, "step": 5590 }, { "epoch": 0.6932226489902119, "grad_norm": 1.7614551325095817e-05, "learning_rate": 5.207423851696418e-05, "loss": 0.0, "step": 5595 }, { "epoch": 0.6938421509106678, "grad_norm": 1.1636286217253655e-05, "learning_rate": 5.188453283253008e-05, "loss": 0.0, "step": 5600 }, { "epoch": 0.6944616528311238, "grad_norm": 4.415443254401907e-05, "learning_rate": 5.1695052204929004e-05, "loss": 0.0, "step": 5605 }, { "epoch": 0.6950811547515797, "grad_norm": 2.7673100703395903e-05, "learning_rate": 5.150579752044376e-05, "loss": 0.0, "step": 5610 }, { "epoch": 0.6957006566720357, "grad_norm": 5.483908535097726e-05, "learning_rate": 5.1316769664300325e-05, "loss": 0.0, "step": 5615 }, { "epoch": 0.6963201585924916, "grad_norm": 1.9681188859976828e-05, "learning_rate": 5.1127969520663656e-05, "loss": 0.0, "step": 5620 }, { "epoch": 0.6969396605129476, "grad_norm": 1.7341284547001123e-05, "learning_rate": 5.093939797263361e-05, "loss": 0.0, "step": 5625 }, { "epoch": 0.6975591624334035, "grad_norm": 1.69951072166441e-05, "learning_rate": 5.0751055902240786e-05, "loss": 0.0, "step": 5630 }, { "epoch": 0.6981786643538594, "grad_norm": 5.110988422529772e-05, "learning_rate": 5.056294419044243e-05, "loss": 0.0, "step": 5635 }, { "epoch": 0.6987981662743155, "grad_norm": 1.956735286512412e-05, "learning_rate": 5.037506371711838e-05, "loss": 0.0, "step": 5640 }, { "epoch": 0.6994176681947714, "grad_norm": 2.0030485757160932e-05, "learning_rate": 5.018741536106676e-05, "loss": 0.0, "step": 5645 }, { "epoch": 0.7000371701152274, "grad_norm": 3.7795463867951185e-05, "learning_rate": 5.000000000000002e-05, "loss": 0.0, "step": 5650 }, { "epoch": 0.7006566720356833, "grad_norm": 7.28815357433632e-05, "learning_rate": 4.98128185105408e-05, "loss": 0.0, "step": 5655 }, { "epoch": 0.7012761739561393, "grad_norm": 1.596094989508856e-05, "learning_rate": 4.962587176821782e-05, "loss": 0.0, "step": 5660 }, { "epoch": 0.7018956758765952, "grad_norm": 1.443778455723077e-05, "learning_rate": 4.943916064746175e-05, "loss": 0.0, "step": 5665 }, { "epoch": 0.7025151777970512, "grad_norm": 1.9767168851103634e-05, "learning_rate": 4.9252686021601236e-05, "loss": 0.0, "step": 5670 }, { "epoch": 0.7031346797175071, "grad_norm": 2.4066939658951014e-05, "learning_rate": 4.906644876285869e-05, "loss": 0.0, "step": 5675 }, { "epoch": 0.7037541816379631, "grad_norm": 2.9560080292867497e-05, "learning_rate": 4.8880449742346215e-05, "loss": 0.0, "step": 5680 }, { "epoch": 0.704373683558419, "grad_norm": 2.436608338030055e-05, "learning_rate": 4.8694689830061636e-05, "loss": 0.0, "step": 5685 }, { "epoch": 0.704993185478875, "grad_norm": 1.9833974874927662e-05, "learning_rate": 4.85091698948843e-05, "loss": 0.0, "step": 5690 }, { "epoch": 0.7056126873993309, "grad_norm": 3.6132278182776645e-05, "learning_rate": 4.832389080457118e-05, "loss": 0.0, "step": 5695 }, { "epoch": 0.7062321893197869, "grad_norm": 5.256558142718859e-05, "learning_rate": 4.8138853425752585e-05, "loss": 0.0, "step": 5700 }, { "epoch": 0.7068516912402428, "grad_norm": 2.6447787604411133e-05, "learning_rate": 4.7954058623928366e-05, "loss": 0.0, "step": 5705 }, { "epoch": 0.7074711931606988, "grad_norm": 5.034935747971758e-05, "learning_rate": 4.7769507263463645e-05, "loss": 0.0, "step": 5710 }, { "epoch": 0.7080906950811547, "grad_norm": 1.1244621418882161e-05, "learning_rate": 4.758520020758487e-05, "loss": 0.0, "step": 5715 }, { "epoch": 0.7087101970016108, "grad_norm": 2.425115235382691e-05, "learning_rate": 4.7401138318375805e-05, "loss": 0.0, "step": 5720 }, { "epoch": 0.7093296989220667, "grad_norm": 3.455072874203324e-05, "learning_rate": 4.7217322456773406e-05, "loss": 0.0, "step": 5725 }, { "epoch": 0.7099492008425227, "grad_norm": 2.6769484975375235e-05, "learning_rate": 4.703375348256397e-05, "loss": 0.0, "step": 5730 }, { "epoch": 0.7105687027629786, "grad_norm": 1.408438038197346e-05, "learning_rate": 4.685043225437887e-05, "loss": 0.0, "step": 5735 }, { "epoch": 0.7111882046834345, "grad_norm": 2.884298919525463e-05, "learning_rate": 4.666735962969073e-05, "loss": 0.0, "step": 5740 }, { "epoch": 0.7118077066038905, "grad_norm": 2.6016841729870066e-05, "learning_rate": 4.648453646480933e-05, "loss": 0.0, "step": 5745 }, { "epoch": 0.7124272085243464, "grad_norm": 1.4960092812543735e-05, "learning_rate": 4.630196361487758e-05, "loss": 0.0, "step": 5750 }, { "epoch": 0.7130467104448024, "grad_norm": 1.7150698113255203e-05, "learning_rate": 4.611964193386765e-05, "loss": 0.0, "step": 5755 }, { "epoch": 0.7136662123652583, "grad_norm": 1.2638804037123919e-05, "learning_rate": 4.5937572274576825e-05, "loss": 0.0, "step": 5760 }, { "epoch": 0.7142857142857143, "grad_norm": 1.3143064279574901e-05, "learning_rate": 4.575575548862356e-05, "loss": 0.0, "step": 5765 }, { "epoch": 0.7149052162061702, "grad_norm": 3.2505689887329936e-05, "learning_rate": 4.5574192426443555e-05, "loss": 0.0, "step": 5770 }, { "epoch": 0.7155247181266262, "grad_norm": 2.0111252524657175e-05, "learning_rate": 4.539288393728567e-05, "loss": 0.0, "step": 5775 }, { "epoch": 0.7161442200470821, "grad_norm": 1.2322168913669884e-05, "learning_rate": 4.521183086920813e-05, "loss": 0.0, "step": 5780 }, { "epoch": 0.7167637219675381, "grad_norm": 1.5946972780511715e-05, "learning_rate": 4.503103406907434e-05, "loss": 0.0, "step": 5785 }, { "epoch": 0.717383223887994, "grad_norm": 2.4248549379990436e-05, "learning_rate": 4.485049438254908e-05, "loss": 0.0, "step": 5790 }, { "epoch": 0.71800272580845, "grad_norm": 2.5677923986222595e-05, "learning_rate": 4.4670212654094465e-05, "loss": 0.0, "step": 5795 }, { "epoch": 0.7186222277289059, "grad_norm": 1.3196062354836613e-05, "learning_rate": 4.449018972696606e-05, "loss": 0.0, "step": 5800 }, { "epoch": 0.719241729649362, "grad_norm": 4.7831767005845904e-05, "learning_rate": 4.431042644320885e-05, "loss": 0.0, "step": 5805 }, { "epoch": 0.7198612315698178, "grad_norm": 1.1922708836209495e-05, "learning_rate": 4.413092364365346e-05, "loss": 0.0, "step": 5810 }, { "epoch": 0.7204807334902739, "grad_norm": 1.7749000107869506e-05, "learning_rate": 4.3951682167912056e-05, "loss": 0.0, "step": 5815 }, { "epoch": 0.7211002354107298, "grad_norm": 2.1261575966491364e-05, "learning_rate": 4.377270285437445e-05, "loss": 0.0, "step": 5820 }, { "epoch": 0.7217197373311858, "grad_norm": 1.6872894775588065e-05, "learning_rate": 4.3593986540204225e-05, "loss": 0.0, "step": 5825 }, { "epoch": 0.7223392392516417, "grad_norm": 0.00011370722495485097, "learning_rate": 4.3415534061334805e-05, "loss": 0.0, "step": 5830 }, { "epoch": 0.7229587411720977, "grad_norm": 2.1559371816692874e-05, "learning_rate": 4.323734625246563e-05, "loss": 0.0, "step": 5835 }, { "epoch": 0.7235782430925536, "grad_norm": 2.3347423848463222e-05, "learning_rate": 4.305942394705802e-05, "loss": 0.0, "step": 5840 }, { "epoch": 0.7241977450130095, "grad_norm": 1.8243754311697558e-05, "learning_rate": 4.28817679773315e-05, "loss": 0.0, "step": 5845 }, { "epoch": 0.7248172469334655, "grad_norm": 3.490842937026173e-05, "learning_rate": 4.2704379174259824e-05, "loss": 0.0, "step": 5850 }, { "epoch": 0.7254367488539214, "grad_norm": 1.763487671269104e-05, "learning_rate": 4.2527258367567034e-05, "loss": 0.0, "step": 5855 }, { "epoch": 0.7260562507743774, "grad_norm": 1.3970754480396863e-05, "learning_rate": 4.2350406385723764e-05, "loss": 0.0, "step": 5860 }, { "epoch": 0.7266757526948333, "grad_norm": 1.6644751667627133e-05, "learning_rate": 4.217382405594312e-05, "loss": 0.0, "step": 5865 }, { "epoch": 0.7272952546152893, "grad_norm": 1.3323816347110551e-05, "learning_rate": 4.199751220417698e-05, "loss": 0.0, "step": 5870 }, { "epoch": 0.7279147565357452, "grad_norm": 2.0221381419105455e-05, "learning_rate": 4.182147165511205e-05, "loss": 0.0, "step": 5875 }, { "epoch": 0.7285342584562012, "grad_norm": 2.6441668524057604e-05, "learning_rate": 4.164570323216607e-05, "loss": 0.0, "step": 5880 }, { "epoch": 0.7291537603766571, "grad_norm": 2.598913124529645e-05, "learning_rate": 4.147020775748387e-05, "loss": 0.0, "step": 5885 }, { "epoch": 0.7297732622971131, "grad_norm": 2.1700567231164314e-05, "learning_rate": 4.1294986051933696e-05, "loss": 0.0, "step": 5890 }, { "epoch": 0.730392764217569, "grad_norm": 4.6660974476253614e-05, "learning_rate": 4.112003893510315e-05, "loss": 0.0, "step": 5895 }, { "epoch": 0.7310122661380251, "grad_norm": 3.3409178286092356e-05, "learning_rate": 4.0945367225295496e-05, "loss": 0.0, "step": 5900 }, { "epoch": 0.731631768058481, "grad_norm": 0.00013187967124395072, "learning_rate": 4.077097173952581e-05, "loss": 0.0, "step": 5905 }, { "epoch": 0.732251269978937, "grad_norm": 1.984833215828985e-05, "learning_rate": 4.0596853293517125e-05, "loss": 0.0, "step": 5910 }, { "epoch": 0.7328707718993929, "grad_norm": 2.4290558940265328e-05, "learning_rate": 4.0423012701696705e-05, "loss": 0.0, "step": 5915 }, { "epoch": 0.7334902738198489, "grad_norm": 1.030064686347032e-05, "learning_rate": 4.02494507771921e-05, "loss": 0.0, "step": 5920 }, { "epoch": 0.7341097757403048, "grad_norm": 3.500220554997213e-05, "learning_rate": 4.0076168331827434e-05, "loss": 0.0, "step": 5925 }, { "epoch": 0.7347292776607608, "grad_norm": 4.16638795286417e-05, "learning_rate": 3.990316617611959e-05, "loss": 0.0, "step": 5930 }, { "epoch": 0.7353487795812167, "grad_norm": 3.0173272534739226e-05, "learning_rate": 3.973044511927441e-05, "loss": 0.0, "step": 5935 }, { "epoch": 0.7359682815016727, "grad_norm": 1.5511430319747888e-05, "learning_rate": 3.9558005969182874e-05, "loss": 0.0, "step": 5940 }, { "epoch": 0.7365877834221286, "grad_norm": 2.5033592464751564e-05, "learning_rate": 3.938584953241749e-05, "loss": 0.0, "step": 5945 }, { "epoch": 0.7372072853425846, "grad_norm": 1.856298877100926e-05, "learning_rate": 3.9213976614228275e-05, "loss": 0.0, "step": 5950 }, { "epoch": 0.7378267872630405, "grad_norm": 1.6000503819668666e-05, "learning_rate": 3.9042388018539144e-05, "loss": 0.0, "step": 5955 }, { "epoch": 0.7384462891834964, "grad_norm": 2.3428914573742077e-05, "learning_rate": 3.88710845479441e-05, "loss": 0.0, "step": 5960 }, { "epoch": 0.7390657911039524, "grad_norm": 1.1896031537617091e-05, "learning_rate": 3.8700067003703474e-05, "loss": 0.0, "step": 5965 }, { "epoch": 0.7396852930244083, "grad_norm": 2.569893877080176e-05, "learning_rate": 3.852933618574031e-05, "loss": 0.0, "step": 5970 }, { "epoch": 0.7403047949448643, "grad_norm": 1.640944174141623e-05, "learning_rate": 3.8358892892636355e-05, "loss": 0.0, "step": 5975 }, { "epoch": 0.7409242968653202, "grad_norm": 1.264644288312411e-05, "learning_rate": 3.818873792162858e-05, "loss": 0.0, "step": 5980 }, { "epoch": 0.7415437987857763, "grad_norm": 3.89708875445649e-05, "learning_rate": 3.801887206860532e-05, "loss": 0.0, "step": 5985 }, { "epoch": 0.7421633007062322, "grad_norm": 2.5439192540943623e-05, "learning_rate": 3.7849296128102504e-05, "loss": 0.0, "step": 5990 }, { "epoch": 0.7427828026266882, "grad_norm": 2.5525247110635974e-05, "learning_rate": 3.768001089330016e-05, "loss": 0.0, "step": 5995 }, { "epoch": 0.7434023045471441, "grad_norm": 1.6645943105686456e-05, "learning_rate": 3.751101715601852e-05, "loss": 0.0, "step": 6000 }, { "epoch": 0.7440218064676001, "grad_norm": 2.1535006453632377e-05, "learning_rate": 3.734231570671427e-05, "loss": 0.0, "step": 6005 }, { "epoch": 0.744641308388056, "grad_norm": 3.9755188481649384e-05, "learning_rate": 3.7173907334477e-05, "loss": 0.0, "step": 6010 }, { "epoch": 0.745260810308512, "grad_norm": 2.3519085516454652e-05, "learning_rate": 3.7005792827025455e-05, "loss": 0.0, "step": 6015 }, { "epoch": 0.7458803122289679, "grad_norm": 2.5366882255184464e-05, "learning_rate": 3.683797297070383e-05, "loss": 0.0, "step": 6020 }, { "epoch": 0.7464998141494239, "grad_norm": 4.986883868696168e-05, "learning_rate": 3.667044855047808e-05, "loss": 0.0, "step": 6025 }, { "epoch": 0.7471193160698798, "grad_norm": 2.4789995222818106e-05, "learning_rate": 3.650322034993238e-05, "loss": 0.0, "step": 6030 }, { "epoch": 0.7477388179903358, "grad_norm": 2.552688602008857e-05, "learning_rate": 3.6336289151265254e-05, "loss": 0.0, "step": 6035 }, { "epoch": 0.7483583199107917, "grad_norm": 1.330025952483993e-05, "learning_rate": 3.6169655735286076e-05, "loss": 0.0, "step": 6040 }, { "epoch": 0.7489778218312477, "grad_norm": 4.299165084376e-05, "learning_rate": 3.600332088141133e-05, "loss": 0.0, "step": 6045 }, { "epoch": 0.7495973237517036, "grad_norm": 2.0221181330271065e-05, "learning_rate": 3.5837285367660975e-05, "loss": 0.0, "step": 6050 }, { "epoch": 0.7502168256721596, "grad_norm": 2.9246053600218147e-05, "learning_rate": 3.567154997065494e-05, "loss": 0.0, "step": 6055 }, { "epoch": 0.7508363275926155, "grad_norm": 3.461592859821394e-05, "learning_rate": 3.5506115465609244e-05, "loss": 0.0, "step": 6060 }, { "epoch": 0.7514558295130714, "grad_norm": 1.7670767192612402e-05, "learning_rate": 3.534098262633259e-05, "loss": 0.0, "step": 6065 }, { "epoch": 0.7520753314335274, "grad_norm": 1.9524228264344856e-05, "learning_rate": 3.517615222522259e-05, "loss": 0.0, "step": 6070 }, { "epoch": 0.7526948333539834, "grad_norm": 2.225159369118046e-05, "learning_rate": 3.501162503326226e-05, "loss": 0.0, "step": 6075 }, { "epoch": 0.7533143352744394, "grad_norm": 1.8668615666683763e-05, "learning_rate": 3.484740182001644e-05, "loss": 0.0, "step": 6080 }, { "epoch": 0.7539338371948953, "grad_norm": 3.177594771841541e-05, "learning_rate": 3.468348335362803e-05, "loss": 0.0, "step": 6085 }, { "epoch": 0.7545533391153513, "grad_norm": 2.497501736797858e-05, "learning_rate": 3.451987040081453e-05, "loss": 0.0, "step": 6090 }, { "epoch": 0.7551728410358072, "grad_norm": 1.79350081452867e-05, "learning_rate": 3.435656372686443e-05, "loss": 0.0, "step": 6095 }, { "epoch": 0.7557923429562632, "grad_norm": 1.408641128364252e-05, "learning_rate": 3.419356409563361e-05, "loss": 0.0, "step": 6100 }, { "epoch": 0.7564118448767191, "grad_norm": 1.328645976172993e-05, "learning_rate": 3.403087226954177e-05, "loss": 0.0, "step": 6105 }, { "epoch": 0.7570313467971751, "grad_norm": 2.429859341646079e-05, "learning_rate": 3.386848900956894e-05, "loss": 0.0, "step": 6110 }, { "epoch": 0.757650848717631, "grad_norm": 2.459171628288459e-05, "learning_rate": 3.370641507525176e-05, "loss": 0.0, "step": 6115 }, { "epoch": 0.758270350638087, "grad_norm": 1.4037535038369242e-05, "learning_rate": 3.354465122468008e-05, "loss": 0.0, "step": 6120 }, { "epoch": 0.7588898525585429, "grad_norm": 2.36577197938459e-05, "learning_rate": 3.338319821449333e-05, "loss": 0.0, "step": 6125 }, { "epoch": 0.7595093544789989, "grad_norm": 2.781433067866601e-05, "learning_rate": 3.3222056799876964e-05, "loss": 0.0, "step": 6130 }, { "epoch": 0.7601288563994548, "grad_norm": 1.1206011549802497e-05, "learning_rate": 3.3061227734559096e-05, "loss": 0.0, "step": 6135 }, { "epoch": 0.7607483583199108, "grad_norm": 1.5770770914969034e-05, "learning_rate": 3.2900711770806736e-05, "loss": 0.0, "step": 6140 }, { "epoch": 0.7613678602403667, "grad_norm": 9.911016604746692e-06, "learning_rate": 3.27405096594224e-05, "loss": 0.0, "step": 6145 }, { "epoch": 0.7619873621608227, "grad_norm": 3.7788122426718473e-05, "learning_rate": 3.258062214974062e-05, "loss": 0.0, "step": 6150 }, { "epoch": 0.7626068640812786, "grad_norm": 1.6305322787957266e-05, "learning_rate": 3.2421049989624345e-05, "loss": 0.0, "step": 6155 }, { "epoch": 0.7632263660017347, "grad_norm": 1.628719655855093e-05, "learning_rate": 3.2261793925461494e-05, "loss": 0.0, "step": 6160 }, { "epoch": 0.7638458679221906, "grad_norm": 2.308249349880498e-05, "learning_rate": 3.210285470216159e-05, "loss": 0.0, "step": 6165 }, { "epoch": 0.7644653698426465, "grad_norm": 1.6870817489689216e-05, "learning_rate": 3.194423306315202e-05, "loss": 0.0, "step": 6170 }, { "epoch": 0.7650848717631025, "grad_norm": 2.7414851501816884e-05, "learning_rate": 3.178592975037474e-05, "loss": 0.0, "step": 6175 }, { "epoch": 0.7657043736835584, "grad_norm": 1.751149284245912e-05, "learning_rate": 3.162794550428275e-05, "loss": 0.0, "step": 6180 }, { "epoch": 0.7663238756040144, "grad_norm": 3.6596113204723224e-05, "learning_rate": 3.147028106383663e-05, "loss": 0.0, "step": 6185 }, { "epoch": 0.7669433775244703, "grad_norm": 1.653031176829245e-05, "learning_rate": 3.1312937166501135e-05, "loss": 0.0, "step": 6190 }, { "epoch": 0.7675628794449263, "grad_norm": 1.4197270502336323e-05, "learning_rate": 3.115591454824166e-05, "loss": 0.0, "step": 6195 }, { "epoch": 0.7681823813653822, "grad_norm": 1.940778201969806e-05, "learning_rate": 3.099921394352083e-05, "loss": 0.0, "step": 6200 }, { "epoch": 0.7688018832858382, "grad_norm": 1.594780223967973e-05, "learning_rate": 3.084283608529512e-05, "loss": 0.0, "step": 6205 }, { "epoch": 0.7694213852062941, "grad_norm": 1.7185289834742434e-05, "learning_rate": 3.068678170501129e-05, "loss": 0.0, "step": 6210 }, { "epoch": 0.7700408871267501, "grad_norm": 2.0800114725716412e-05, "learning_rate": 3.053105153260321e-05, "loss": 0.0, "step": 6215 }, { "epoch": 0.770660389047206, "grad_norm": 1.2353677448118106e-05, "learning_rate": 3.0375646296488125e-05, "loss": 0.0, "step": 6220 }, { "epoch": 0.771279890967662, "grad_norm": 1.5871075447648764e-05, "learning_rate": 3.0220566723563516e-05, "loss": 0.0, "step": 6225 }, { "epoch": 0.7718993928881179, "grad_norm": 2.0202585801598616e-05, "learning_rate": 3.0065813539203547e-05, "loss": 0.0, "step": 6230 }, { "epoch": 0.7725188948085739, "grad_norm": 2.005276473937556e-05, "learning_rate": 2.9911387467255734e-05, "loss": 0.0, "step": 6235 }, { "epoch": 0.7731383967290298, "grad_norm": 2.1032012227806263e-05, "learning_rate": 2.9757289230037534e-05, "loss": 0.0, "step": 6240 }, { "epoch": 0.7737578986494859, "grad_norm": 1.4408114111574832e-05, "learning_rate": 2.9603519548333047e-05, "loss": 0.0, "step": 6245 }, { "epoch": 0.7743774005699418, "grad_norm": 6.642552034463733e-05, "learning_rate": 2.9450079141389508e-05, "loss": 0.0, "step": 6250 }, { "epoch": 0.7749969024903978, "grad_norm": 8.485853868478443e-06, "learning_rate": 2.9296968726914e-05, "loss": 0.0, "step": 6255 }, { "epoch": 0.7756164044108537, "grad_norm": 1.2380613952700514e-05, "learning_rate": 2.9144189021070088e-05, "loss": 0.0, "step": 6260 }, { "epoch": 0.7762359063313097, "grad_norm": 2.811436752381269e-05, "learning_rate": 2.899174073847446e-05, "loss": 0.0, "step": 6265 }, { "epoch": 0.7768554082517656, "grad_norm": 2.5710474801599048e-05, "learning_rate": 2.8839624592193693e-05, "loss": 0.0, "step": 6270 }, { "epoch": 0.7774749101722216, "grad_norm": 2.0674478946602903e-05, "learning_rate": 2.868784129374068e-05, "loss": 0.0, "step": 6275 }, { "epoch": 0.7780944120926775, "grad_norm": 1.2345736649876926e-05, "learning_rate": 2.8536391553071507e-05, "loss": 0.0, "step": 6280 }, { "epoch": 0.7787139140131334, "grad_norm": 1.3747967386734672e-05, "learning_rate": 2.8385276078582047e-05, "loss": 0.0, "step": 6285 }, { "epoch": 0.7793334159335894, "grad_norm": 2.0004017642349936e-05, "learning_rate": 2.8234495577104725e-05, "loss": 0.0, "step": 6290 }, { "epoch": 0.7799529178540453, "grad_norm": 6.693748582620174e-05, "learning_rate": 2.8084050753905057e-05, "loss": 0.0, "step": 6295 }, { "epoch": 0.7805724197745013, "grad_norm": 2.6092444386449642e-05, "learning_rate": 2.793394231267854e-05, "loss": 0.0, "step": 6300 }, { "epoch": 0.7811919216949572, "grad_norm": 3.922541145584546e-05, "learning_rate": 2.7784170955547194e-05, "loss": 0.0, "step": 6305 }, { "epoch": 0.7818114236154132, "grad_norm": 1.3000539183849469e-05, "learning_rate": 2.763473738305641e-05, "loss": 0.0, "step": 6310 }, { "epoch": 0.7824309255358691, "grad_norm": 2.9469289074768312e-05, "learning_rate": 2.7485642294171542e-05, "loss": 0.0, "step": 6315 }, { "epoch": 0.7830504274563251, "grad_norm": 4.842556154471822e-05, "learning_rate": 2.7336886386274784e-05, "loss": 0.0, "step": 6320 }, { "epoch": 0.783669929376781, "grad_norm": 1.0508871127967723e-05, "learning_rate": 2.7188470355161755e-05, "loss": 0.0, "step": 6325 }, { "epoch": 0.784289431297237, "grad_norm": 4.7115423512877896e-05, "learning_rate": 2.7040394895038434e-05, "loss": 0.0, "step": 6330 }, { "epoch": 0.784908933217693, "grad_norm": 1.5159424947341904e-05, "learning_rate": 2.6892660698517712e-05, "loss": 0.0, "step": 6335 }, { "epoch": 0.785528435138149, "grad_norm": 2.4471853976137936e-05, "learning_rate": 2.674526845661628e-05, "loss": 0.0, "step": 6340 }, { "epoch": 0.7861479370586049, "grad_norm": 2.458971539454069e-05, "learning_rate": 2.659821885875132e-05, "loss": 0.0, "step": 6345 }, { "epoch": 0.7867674389790609, "grad_norm": 1.4144274246064015e-05, "learning_rate": 2.6451512592737348e-05, "loss": 0.0, "step": 6350 }, { "epoch": 0.7873869408995168, "grad_norm": 1.7276377548114397e-05, "learning_rate": 2.6305150344783013e-05, "loss": 0.0, "step": 6355 }, { "epoch": 0.7880064428199728, "grad_norm": 2.8547334295581095e-05, "learning_rate": 2.6159132799487774e-05, "loss": 0.0, "step": 6360 }, { "epoch": 0.7886259447404287, "grad_norm": 1.4751542948943097e-05, "learning_rate": 2.6013460639838793e-05, "loss": 0.0, "step": 6365 }, { "epoch": 0.7892454466608847, "grad_norm": 9.410257007402834e-06, "learning_rate": 2.5868134547207713e-05, "loss": 0.0, "step": 6370 }, { "epoch": 0.7898649485813406, "grad_norm": 1.1432239261921495e-05, "learning_rate": 2.5723155201347458e-05, "loss": 0.0, "step": 6375 }, { "epoch": 0.7904844505017966, "grad_norm": 1.6911257262108847e-05, "learning_rate": 2.5578523280389077e-05, "loss": 0.0, "step": 6380 }, { "epoch": 0.7911039524222525, "grad_norm": 3.643070158432238e-05, "learning_rate": 2.5434239460838617e-05, "loss": 0.0, "step": 6385 }, { "epoch": 0.7917234543427084, "grad_norm": 2.6388999685877934e-05, "learning_rate": 2.5290304417573807e-05, "loss": 0.0, "step": 6390 }, { "epoch": 0.7923429562631644, "grad_norm": 3.1028477678773925e-05, "learning_rate": 2.5146718823841077e-05, "loss": 0.0, "step": 6395 }, { "epoch": 0.7929624581836203, "grad_norm": 5.162321758689359e-05, "learning_rate": 2.5003483351252267e-05, "loss": 0.0, "step": 6400 }, { "epoch": 0.7935819601040763, "grad_norm": 0.00010153974290005863, "learning_rate": 2.4860598669781577e-05, "loss": 0.0, "step": 6405 }, { "epoch": 0.7942014620245322, "grad_norm": 1.6839298041304573e-05, "learning_rate": 2.471806544776246e-05, "loss": 0.0, "step": 6410 }, { "epoch": 0.7948209639449882, "grad_norm": 1.962680289580021e-05, "learning_rate": 2.457588435188436e-05, "loss": 0.0, "step": 6415 }, { "epoch": 0.7954404658654441, "grad_norm": 0.00014576059766113758, "learning_rate": 2.4434056047189703e-05, "loss": 0.0, "step": 6420 }, { "epoch": 0.7960599677859002, "grad_norm": 1.808621709642466e-05, "learning_rate": 2.4292581197070763e-05, "loss": 0.0, "step": 6425 }, { "epoch": 0.7966794697063561, "grad_norm": 1.1732083294191398e-05, "learning_rate": 2.415146046326654e-05, "loss": 0.0, "step": 6430 }, { "epoch": 0.7972989716268121, "grad_norm": 9.997383131121751e-06, "learning_rate": 2.4010694505859722e-05, "loss": 0.0, "step": 6435 }, { "epoch": 0.797918473547268, "grad_norm": 1.9503107978380285e-05, "learning_rate": 2.3870283983273512e-05, "loss": 0.0, "step": 6440 }, { "epoch": 0.798537975467724, "grad_norm": 1.747414717101492e-05, "learning_rate": 2.37302295522686e-05, "loss": 0.0, "step": 6445 }, { "epoch": 0.7991574773881799, "grad_norm": 1.9351025912328623e-05, "learning_rate": 2.359053186794008e-05, "loss": 0.0, "step": 6450 }, { "epoch": 0.7997769793086359, "grad_norm": 1.3271847819851246e-05, "learning_rate": 2.3451191583714404e-05, "loss": 0.0, "step": 6455 }, { "epoch": 0.8003964812290918, "grad_norm": 1.529365181340836e-05, "learning_rate": 2.331220935134625e-05, "loss": 0.0, "step": 6460 }, { "epoch": 0.8010159831495478, "grad_norm": 2.5430967070860788e-05, "learning_rate": 2.3173585820915655e-05, "loss": 0.0, "step": 6465 }, { "epoch": 0.8016354850700037, "grad_norm": 1.982057437999174e-05, "learning_rate": 2.3035321640824735e-05, "loss": 0.0, "step": 6470 }, { "epoch": 0.8022549869904597, "grad_norm": 3.606089376262389e-05, "learning_rate": 2.289741745779482e-05, "loss": 0.0, "step": 6475 }, { "epoch": 0.8028744889109156, "grad_norm": 4.043238368467428e-05, "learning_rate": 2.275987391686336e-05, "loss": 0.0, "step": 6480 }, { "epoch": 0.8034939908313716, "grad_norm": 4.3776857637567446e-05, "learning_rate": 2.2622691661380925e-05, "loss": 0.0, "step": 6485 }, { "epoch": 0.8041134927518275, "grad_norm": 2.1455885871546343e-05, "learning_rate": 2.2485871333008247e-05, "loss": 0.0, "step": 6490 }, { "epoch": 0.8047329946722834, "grad_norm": 2.32715410675155e-05, "learning_rate": 2.2349413571713096e-05, "loss": 0.0, "step": 6495 }, { "epoch": 0.8053524965927394, "grad_norm": 2.9718476071138866e-05, "learning_rate": 2.2213319015767408e-05, "loss": 0.0, "step": 6500 }, { "epoch": 0.8059719985131953, "grad_norm": 3.374613879714161e-05, "learning_rate": 2.2077588301744233e-05, "loss": 0.0, "step": 6505 }, { "epoch": 0.8065915004336514, "grad_norm": 1.945515577972401e-05, "learning_rate": 2.194222206451474e-05, "loss": 0.0, "step": 6510 }, { "epoch": 0.8072110023541073, "grad_norm": 4.601416003424674e-05, "learning_rate": 2.1807220937245376e-05, "loss": 0.0, "step": 6515 }, { "epoch": 0.8078305042745633, "grad_norm": 1.3127208148944192e-05, "learning_rate": 2.167258555139473e-05, "loss": 0.0, "step": 6520 }, { "epoch": 0.8084500061950192, "grad_norm": 2.844023401848972e-05, "learning_rate": 2.153831653671069e-05, "loss": 0.0, "step": 6525 }, { "epoch": 0.8090695081154752, "grad_norm": 1.5930434528854676e-05, "learning_rate": 2.1404414521227446e-05, "loss": 0.0, "step": 6530 }, { "epoch": 0.8096890100359311, "grad_norm": 1.1981824172835331e-05, "learning_rate": 2.1270880131262604e-05, "loss": 0.0, "step": 6535 }, { "epoch": 0.8103085119563871, "grad_norm": 1.1925362741749268e-05, "learning_rate": 2.1137713991414177e-05, "loss": 0.0, "step": 6540 }, { "epoch": 0.810928013876843, "grad_norm": 2.9348229872994125e-05, "learning_rate": 2.100491672455781e-05, "loss": 0.0, "step": 6545 }, { "epoch": 0.811547515797299, "grad_norm": 2.6582503778627142e-05, "learning_rate": 2.0872488951843684e-05, "loss": 0.0, "step": 6550 }, { "epoch": 0.8121670177177549, "grad_norm": 1.1188927601324394e-05, "learning_rate": 2.0740431292693706e-05, "loss": 0.0, "step": 6555 }, { "epoch": 0.8127865196382109, "grad_norm": 5.817688725073822e-05, "learning_rate": 2.0608744364798627e-05, "loss": 0.0, "step": 6560 }, { "epoch": 0.8134060215586668, "grad_norm": 1.8270680811838247e-05, "learning_rate": 2.0477428784115094e-05, "loss": 0.0, "step": 6565 }, { "epoch": 0.8140255234791228, "grad_norm": 3.236956399632618e-05, "learning_rate": 2.0346485164862872e-05, "loss": 0.0, "step": 6570 }, { "epoch": 0.8146450253995787, "grad_norm": 1.4748973626410589e-05, "learning_rate": 2.021591411952183e-05, "loss": 0.0, "step": 6575 }, { "epoch": 0.8152645273200347, "grad_norm": 1.862555836851243e-05, "learning_rate": 2.0085716258829145e-05, "loss": 0.0, "step": 6580 }, { "epoch": 0.8158840292404906, "grad_norm": 2.197445064666681e-05, "learning_rate": 1.9955892191776538e-05, "loss": 0.0, "step": 6585 }, { "epoch": 0.8165035311609466, "grad_norm": 1.3464506992022507e-05, "learning_rate": 1.9826442525607246e-05, "loss": 0.0, "step": 6590 }, { "epoch": 0.8171230330814025, "grad_norm": 1.5122725017135963e-05, "learning_rate": 1.96973678658133e-05, "loss": 0.0, "step": 6595 }, { "epoch": 0.8177425350018585, "grad_norm": 1.6291398424073122e-05, "learning_rate": 1.956866881613262e-05, "loss": 0.0, "step": 6600 }, { "epoch": 0.8183620369223145, "grad_norm": 4.197514499537647e-05, "learning_rate": 1.944034597854635e-05, "loss": 0.0, "step": 6605 }, { "epoch": 0.8189815388427704, "grad_norm": 9.260013939638156e-06, "learning_rate": 1.9312399953275828e-05, "loss": 0.0, "step": 6610 }, { "epoch": 0.8196010407632264, "grad_norm": 1.7516265870654024e-05, "learning_rate": 1.9184831338779895e-05, "loss": 0.0, "step": 6615 }, { "epoch": 0.8202205426836823, "grad_norm": 1.411936591466656e-05, "learning_rate": 1.9057640731752103e-05, "loss": 0.0, "step": 6620 }, { "epoch": 0.8208400446041383, "grad_norm": 1.9056351447943598e-05, "learning_rate": 1.8930828727117854e-05, "loss": 0.0, "step": 6625 }, { "epoch": 0.8214595465245942, "grad_norm": 1.6040998161770403e-05, "learning_rate": 1.880439591803175e-05, "loss": 0.0, "step": 6630 }, { "epoch": 0.8220790484450502, "grad_norm": 1.8521535821491852e-05, "learning_rate": 1.8678342895874644e-05, "loss": 0.0, "step": 6635 }, { "epoch": 0.8226985503655061, "grad_norm": 1.4082383131608367e-05, "learning_rate": 1.8552670250251003e-05, "loss": 0.0, "step": 6640 }, { "epoch": 0.8233180522859621, "grad_norm": 2.090873385895975e-05, "learning_rate": 1.8427378568986097e-05, "loss": 0.0, "step": 6645 }, { "epoch": 0.823937554206418, "grad_norm": 7.868772081565112e-05, "learning_rate": 1.8302468438123244e-05, "loss": 0.0, "step": 6650 }, { "epoch": 0.824557056126874, "grad_norm": 1.2922583664476406e-05, "learning_rate": 1.8177940441921138e-05, "loss": 0.0, "step": 6655 }, { "epoch": 0.8251765580473299, "grad_norm": 3.0692302971147e-05, "learning_rate": 1.805379516285104e-05, "loss": 0.0, "step": 6660 }, { "epoch": 0.8257960599677859, "grad_norm": 1.993810474232305e-05, "learning_rate": 1.793003318159403e-05, "loss": 0.0, "step": 6665 }, { "epoch": 0.8264155618882418, "grad_norm": 1.7213864339282736e-05, "learning_rate": 1.7806655077038416e-05, "loss": 0.0, "step": 6670 }, { "epoch": 0.8270350638086978, "grad_norm": 1.9202643670723774e-05, "learning_rate": 1.7683661426276878e-05, "loss": 0.0, "step": 6675 }, { "epoch": 0.8276545657291537, "grad_norm": 2.1244128220132552e-05, "learning_rate": 1.7561052804603873e-05, "loss": 0.0, "step": 6680 }, { "epoch": 0.8282740676496098, "grad_norm": 1.1887999789905734e-05, "learning_rate": 1.7438829785512933e-05, "loss": 0.0, "step": 6685 }, { "epoch": 0.8288935695700657, "grad_norm": 1.2584196156240068e-05, "learning_rate": 1.7316992940693943e-05, "loss": 0.0, "step": 6690 }, { "epoch": 0.8295130714905217, "grad_norm": 7.570809884782648e-06, "learning_rate": 1.7195542840030465e-05, "loss": 0.0, "step": 6695 }, { "epoch": 0.8301325734109776, "grad_norm": 1.1055812137783505e-05, "learning_rate": 1.7074480051597096e-05, "loss": 0.0, "step": 6700 }, { "epoch": 0.8307520753314336, "grad_norm": 1.3164172742108349e-05, "learning_rate": 1.6953805141656798e-05, "loss": 0.0, "step": 6705 }, { "epoch": 0.8313715772518895, "grad_norm": 7.636749069206417e-05, "learning_rate": 1.683351867465832e-05, "loss": 0.0, "step": 6710 }, { "epoch": 0.8319910791723454, "grad_norm": 2.1622399799525738e-05, "learning_rate": 1.6713621213233432e-05, "loss": 0.0, "step": 6715 }, { "epoch": 0.8326105810928014, "grad_norm": 1.1925879334739875e-05, "learning_rate": 1.659411331819437e-05, "loss": 0.0, "step": 6720 }, { "epoch": 0.8332300830132573, "grad_norm": 1.3386555110628251e-05, "learning_rate": 1.6474995548531215e-05, "loss": 0.0, "step": 6725 }, { "epoch": 0.8338495849337133, "grad_norm": 2.141722507076338e-05, "learning_rate": 1.6356268461409208e-05, "loss": 0.0, "step": 6730 }, { "epoch": 0.8344690868541692, "grad_norm": 1.897481342894025e-05, "learning_rate": 1.6237932612166296e-05, "loss": 0.0, "step": 6735 }, { "epoch": 0.8350885887746252, "grad_norm": 3.8364683859981596e-05, "learning_rate": 1.6119988554310384e-05, "loss": 0.0, "step": 6740 }, { "epoch": 0.8357080906950811, "grad_norm": 2.5895327780744992e-05, "learning_rate": 1.6002436839516766e-05, "loss": 0.0, "step": 6745 }, { "epoch": 0.8363275926155371, "grad_norm": 1.47181999636814e-05, "learning_rate": 1.588527801762564e-05, "loss": 0.0, "step": 6750 }, { "epoch": 0.836947094535993, "grad_norm": 1.2013568266411312e-05, "learning_rate": 1.5768512636639432e-05, "loss": 0.0, "step": 6755 }, { "epoch": 0.837566596456449, "grad_norm": 3.109215685981326e-05, "learning_rate": 1.5652141242720274e-05, "loss": 0.0, "step": 6760 }, { "epoch": 0.8381860983769049, "grad_norm": 7.566995191155002e-05, "learning_rate": 1.55361643801875e-05, "loss": 0.0, "step": 6765 }, { "epoch": 0.838805600297361, "grad_norm": 1.658032851992175e-05, "learning_rate": 1.542058259151501e-05, "loss": 0.0, "step": 6770 }, { "epoch": 0.8394251022178169, "grad_norm": 2.7613246857072227e-05, "learning_rate": 1.5305396417328756e-05, "loss": 0.0, "step": 6775 }, { "epoch": 0.8400446041382729, "grad_norm": 2.9427139452309348e-05, "learning_rate": 1.5190606396404272e-05, "loss": 0.0, "step": 6780 }, { "epoch": 0.8406641060587288, "grad_norm": 1.985735070775263e-05, "learning_rate": 1.5076213065664058e-05, "loss": 0.0, "step": 6785 }, { "epoch": 0.8412836079791848, "grad_norm": 1.2723244253720623e-05, "learning_rate": 1.4962216960175213e-05, "loss": 0.0, "step": 6790 }, { "epoch": 0.8419031098996407, "grad_norm": 1.0825544450199232e-05, "learning_rate": 1.4848618613146747e-05, "loss": 0.0, "step": 6795 }, { "epoch": 0.8425226118200967, "grad_norm": 1.7413185560144484e-05, "learning_rate": 1.4735418555927238e-05, "loss": 0.0, "step": 6800 }, { "epoch": 0.8431421137405526, "grad_norm": 1.4474117961071897e-05, "learning_rate": 1.4622617318002263e-05, "loss": 0.0, "step": 6805 }, { "epoch": 0.8437616156610086, "grad_norm": 1.5103494661161676e-05, "learning_rate": 1.4510215426991936e-05, "loss": 0.0, "step": 6810 }, { "epoch": 0.8443811175814645, "grad_norm": 1.3918818694946822e-05, "learning_rate": 1.439821340864852e-05, "loss": 0.0, "step": 6815 }, { "epoch": 0.8450006195019204, "grad_norm": 1.2741608770738821e-05, "learning_rate": 1.4286611786853843e-05, "loss": 0.0, "step": 6820 }, { "epoch": 0.8456201214223764, "grad_norm": 3.831136928056367e-05, "learning_rate": 1.4175411083616919e-05, "loss": 0.0, "step": 6825 }, { "epoch": 0.8462396233428323, "grad_norm": 2.4360950192203745e-05, "learning_rate": 1.4064611819071483e-05, "loss": 0.0, "step": 6830 }, { "epoch": 0.8468591252632883, "grad_norm": 1.5076285308168735e-05, "learning_rate": 1.3954214511473574e-05, "loss": 0.0, "step": 6835 }, { "epoch": 0.8474786271837442, "grad_norm": 1.0759928045445122e-05, "learning_rate": 1.38442196771991e-05, "loss": 0.0, "step": 6840 }, { "epoch": 0.8480981291042002, "grad_norm": 2.367113484069705e-05, "learning_rate": 1.3734627830741464e-05, "loss": 0.0, "step": 6845 }, { "epoch": 0.8487176310246561, "grad_norm": 3.4356489777565e-05, "learning_rate": 1.362543948470909e-05, "loss": 0.0, "step": 6850 }, { "epoch": 0.8493371329451121, "grad_norm": 2.67359682766255e-05, "learning_rate": 1.3516655149823033e-05, "loss": 0.0, "step": 6855 }, { "epoch": 0.849956634865568, "grad_norm": 0.000291889940854162, "learning_rate": 1.3408275334914656e-05, "loss": 0.0, "step": 6860 }, { "epoch": 0.8505761367860241, "grad_norm": 2.0533305360004306e-05, "learning_rate": 1.3300300546923172e-05, "loss": 0.0, "step": 6865 }, { "epoch": 0.85119563870648, "grad_norm": 9.812847565626726e-06, "learning_rate": 1.3192731290893357e-05, "loss": 0.0, "step": 6870 }, { "epoch": 0.851815140626936, "grad_norm": 1.3135982953826897e-05, "learning_rate": 1.3085568069973064e-05, "loss": 0.0, "step": 6875 }, { "epoch": 0.8524346425473919, "grad_norm": 2.193776163039729e-05, "learning_rate": 1.2978811385411026e-05, "loss": 0.0, "step": 6880 }, { "epoch": 0.8530541444678479, "grad_norm": 1.2074950973328669e-05, "learning_rate": 1.2872461736554398e-05, "loss": 0.0, "step": 6885 }, { "epoch": 0.8536736463883038, "grad_norm": 1.322369553236058e-05, "learning_rate": 1.2766519620846418e-05, "loss": 0.0, "step": 6890 }, { "epoch": 0.8542931483087598, "grad_norm": 4.831133264815435e-05, "learning_rate": 1.2660985533824155e-05, "loss": 0.0, "step": 6895 }, { "epoch": 0.8549126502292157, "grad_norm": 1.6663949281792156e-05, "learning_rate": 1.2555859969116123e-05, "loss": 0.0, "step": 6900 }, { "epoch": 0.8555321521496717, "grad_norm": 5.2078012231504545e-05, "learning_rate": 1.2451143418440047e-05, "loss": 0.0, "step": 6905 }, { "epoch": 0.8561516540701276, "grad_norm": 3.6786237615160644e-05, "learning_rate": 1.234683637160048e-05, "loss": 0.0, "step": 6910 }, { "epoch": 0.8567711559905836, "grad_norm": 7.876505696913227e-05, "learning_rate": 1.2242939316486557e-05, "loss": 0.0, "step": 6915 }, { "epoch": 0.8573906579110395, "grad_norm": 1.3520474567485508e-05, "learning_rate": 1.213945273906969e-05, "loss": 0.0, "step": 6920 }, { "epoch": 0.8580101598314954, "grad_norm": 1.8507518689148128e-05, "learning_rate": 1.203637712340132e-05, "loss": 0.0, "step": 6925 }, { "epoch": 0.8586296617519514, "grad_norm": 1.34787132992642e-05, "learning_rate": 1.1933712951610676e-05, "loss": 0.0, "step": 6930 }, { "epoch": 0.8592491636724073, "grad_norm": 1.8340282622375526e-05, "learning_rate": 1.1831460703902442e-05, "loss": 0.0, "step": 6935 }, { "epoch": 0.8598686655928633, "grad_norm": 1.8783681298373267e-05, "learning_rate": 1.1729620858554557e-05, "loss": 0.0, "step": 6940 }, { "epoch": 0.8604881675133192, "grad_norm": 1.3193873201089446e-05, "learning_rate": 1.1628193891915996e-05, "loss": 0.0, "step": 6945 }, { "epoch": 0.8611076694337753, "grad_norm": 3.9144259062595665e-05, "learning_rate": 1.1527180278404492e-05, "loss": 0.0, "step": 6950 }, { "epoch": 0.8617271713542312, "grad_norm": 2.9871567676309496e-05, "learning_rate": 1.1426580490504413e-05, "loss": 0.0, "step": 6955 }, { "epoch": 0.8623466732746872, "grad_norm": 1.1404430551920086e-05, "learning_rate": 1.1326394998764423e-05, "loss": 0.0, "step": 6960 }, { "epoch": 0.8629661751951431, "grad_norm": 1.748653630784247e-05, "learning_rate": 1.122662427179535e-05, "loss": 0.0, "step": 6965 }, { "epoch": 0.8635856771155991, "grad_norm": 4.28209277743008e-05, "learning_rate": 1.1127268776268007e-05, "loss": 0.0, "step": 6970 }, { "epoch": 0.864205179036055, "grad_norm": 1.5479788999073207e-05, "learning_rate": 1.1028328976910985e-05, "loss": 0.0, "step": 6975 }, { "epoch": 0.864824680956511, "grad_norm": 0.00013594584015663713, "learning_rate": 1.0929805336508458e-05, "loss": 0.0, "step": 6980 }, { "epoch": 0.8654441828769669, "grad_norm": 1.396751122229034e-05, "learning_rate": 1.083169831589812e-05, "loss": 0.0, "step": 6985 }, { "epoch": 0.8660636847974229, "grad_norm": 1.621201772650238e-05, "learning_rate": 1.0734008373968862e-05, "loss": 0.0, "step": 6990 }, { "epoch": 0.8666831867178788, "grad_norm": 5.3716230468126014e-05, "learning_rate": 1.0636735967658784e-05, "loss": 0.0, "step": 6995 }, { "epoch": 0.8673026886383348, "grad_norm": 2.255929575767368e-05, "learning_rate": 1.0539881551952945e-05, "loss": 0.0, "step": 7000 }, { "epoch": 0.8679221905587907, "grad_norm": 3.407151598366909e-05, "learning_rate": 1.0443445579881306e-05, "loss": 0.0, "step": 7005 }, { "epoch": 0.8685416924792467, "grad_norm": 1.596362199052237e-05, "learning_rate": 1.0347428502516599e-05, "loss": 0.0, "step": 7010 }, { "epoch": 0.8691611943997026, "grad_norm": 1.572237306390889e-05, "learning_rate": 1.0251830768972181e-05, "loss": 0.0, "step": 7015 }, { "epoch": 0.8697806963201586, "grad_norm": 1.9655644791782834e-05, "learning_rate": 1.0156652826399959e-05, "loss": 0.0, "step": 7020 }, { "epoch": 0.8704001982406145, "grad_norm": 2.2492818970931694e-05, "learning_rate": 1.0061895119988318e-05, "loss": 0.0, "step": 7025 }, { "epoch": 0.8710197001610706, "grad_norm": 2.2541578800883144e-05, "learning_rate": 9.967558092959961e-06, "loss": 0.0, "step": 7030 }, { "epoch": 0.8716392020815265, "grad_norm": 1.82479307113681e-05, "learning_rate": 9.873642186569975e-06, "loss": 0.0, "step": 7035 }, { "epoch": 0.8722587040019824, "grad_norm": 2.0989456970710307e-05, "learning_rate": 9.780147840103627e-06, "loss": 0.0, "step": 7040 }, { "epoch": 0.8728782059224384, "grad_norm": 2.010043135669548e-05, "learning_rate": 9.687075490874376e-06, "loss": 0.0, "step": 7045 }, { "epoch": 0.8734977078428943, "grad_norm": 1.2836719179176725e-05, "learning_rate": 9.594425574221822e-06, "loss": 0.0, "step": 7050 }, { "epoch": 0.8741172097633503, "grad_norm": 1.8718739738687873e-05, "learning_rate": 9.502198523509653e-06, "loss": 0.0, "step": 7055 }, { "epoch": 0.8747367116838062, "grad_norm": 3.294113412266597e-05, "learning_rate": 9.410394770123642e-06, "loss": 0.0, "step": 7060 }, { "epoch": 0.8753562136042622, "grad_norm": 1.419221098331036e-05, "learning_rate": 9.319014743469634e-06, "loss": 0.0, "step": 7065 }, { "epoch": 0.8759757155247181, "grad_norm": 1.0328141797799617e-05, "learning_rate": 9.228058870971502e-06, "loss": 0.0, "step": 7070 }, { "epoch": 0.8765952174451741, "grad_norm": 1.5302061001420952e-05, "learning_rate": 9.13752757806916e-06, "loss": 0.0, "step": 7075 }, { "epoch": 0.87721471936563, "grad_norm": 4.893111690762453e-05, "learning_rate": 9.047421288216584e-06, "loss": 0.0, "step": 7080 }, { "epoch": 0.877834221286086, "grad_norm": 1.538233846076764e-05, "learning_rate": 8.957740422879812e-06, "loss": 0.0, "step": 7085 }, { "epoch": 0.8784537232065419, "grad_norm": 7.612728222738951e-05, "learning_rate": 8.868485401535054e-06, "loss": 0.0, "step": 7090 }, { "epoch": 0.8790732251269979, "grad_norm": 1.072336635843385e-05, "learning_rate": 8.7796566416666e-06, "loss": 0.0, "step": 7095 }, { "epoch": 0.8796927270474538, "grad_norm": 2.2037012968212366e-05, "learning_rate": 8.691254558764928e-06, "loss": 0.0, "step": 7100 }, { "epoch": 0.8803122289679098, "grad_norm": 1.5995523426681757e-05, "learning_rate": 8.603279566324806e-06, "loss": 0.0, "step": 7105 }, { "epoch": 0.8809317308883657, "grad_norm": 7.886389357736334e-05, "learning_rate": 8.515732075843274e-06, "loss": 0.0, "step": 7110 }, { "epoch": 0.8815512328088217, "grad_norm": 3.173883305862546e-05, "learning_rate": 8.428612496817767e-06, "loss": 0.0, "step": 7115 }, { "epoch": 0.8821707347292777, "grad_norm": 1.5020637874840759e-05, "learning_rate": 8.341921236744243e-06, "loss": 0.0, "step": 7120 }, { "epoch": 0.8827902366497337, "grad_norm": 2.9183574952185154e-05, "learning_rate": 8.255658701115176e-06, "loss": 0.0, "step": 7125 }, { "epoch": 0.8834097385701896, "grad_norm": 2.006657450692728e-05, "learning_rate": 8.16982529341771e-06, "loss": 0.0, "step": 7130 }, { "epoch": 0.8840292404906456, "grad_norm": 2.4589358872617595e-05, "learning_rate": 8.084421415131794e-06, "loss": 0.0, "step": 7135 }, { "epoch": 0.8846487424111015, "grad_norm": 1.4448784895648714e-05, "learning_rate": 7.999447465728249e-06, "loss": 0.0, "step": 7140 }, { "epoch": 0.8852682443315574, "grad_norm": 1.5997164155123755e-05, "learning_rate": 7.91490384266701e-06, "loss": 0.0, "step": 7145 }, { "epoch": 0.8858877462520134, "grad_norm": 1.538196738692932e-05, "learning_rate": 7.830790941395105e-06, "loss": 0.0, "step": 7150 }, { "epoch": 0.8865072481724693, "grad_norm": 1.40582378662657e-05, "learning_rate": 7.747109155344923e-06, "loss": 0.0, "step": 7155 }, { "epoch": 0.8871267500929253, "grad_norm": 3.2710264349589124e-05, "learning_rate": 7.66385887593235e-06, "loss": 0.0, "step": 7160 }, { "epoch": 0.8877462520133812, "grad_norm": 1.1049595741496887e-05, "learning_rate": 7.581040492554892e-06, "loss": 0.0, "step": 7165 }, { "epoch": 0.8883657539338372, "grad_norm": 1.230348880199017e-05, "learning_rate": 7.498654392589944e-06, "loss": 0.0, "step": 7170 }, { "epoch": 0.8889852558542931, "grad_norm": 3.353979263920337e-05, "learning_rate": 7.416700961392908e-06, "loss": 0.0, "step": 7175 }, { "epoch": 0.8896047577747491, "grad_norm": 2.7195450456929393e-05, "learning_rate": 7.335180582295386e-06, "loss": 0.0, "step": 7180 }, { "epoch": 0.890224259695205, "grad_norm": 3.3053598599508405e-05, "learning_rate": 7.25409363660342e-06, "loss": 0.0, "step": 7185 }, { "epoch": 0.890843761615661, "grad_norm": 3.394218219909817e-05, "learning_rate": 7.173440503595685e-06, "loss": 0.0, "step": 7190 }, { "epoch": 0.8914632635361169, "grad_norm": 2.8316790121607482e-05, "learning_rate": 7.093221560521768e-06, "loss": 0.0, "step": 7195 }, { "epoch": 0.892082765456573, "grad_norm": 1.5196836102404632e-05, "learning_rate": 7.01343718260028e-06, "loss": 0.0, "step": 7200 }, { "epoch": 0.8927022673770288, "grad_norm": 2.4619954274385236e-05, "learning_rate": 6.9340877430172925e-06, "loss": 0.0, "step": 7205 }, { "epoch": 0.8933217692974849, "grad_norm": 1.653758044994902e-05, "learning_rate": 6.855173612924404e-06, "loss": 0.0, "step": 7210 }, { "epoch": 0.8939412712179408, "grad_norm": 2.1499723516171798e-05, "learning_rate": 6.7766951614370965e-06, "loss": 0.0, "step": 7215 }, { "epoch": 0.8945607731383968, "grad_norm": 2.367531124036759e-05, "learning_rate": 6.698652755633006e-06, "loss": 0.0, "step": 7220 }, { "epoch": 0.8951802750588527, "grad_norm": 2.0414643586263992e-05, "learning_rate": 6.621046760550176e-06, "loss": 0.0, "step": 7225 }, { "epoch": 0.8957997769793087, "grad_norm": 1.560066084493883e-05, "learning_rate": 6.5438775391854164e-06, "loss": 0.0, "step": 7230 }, { "epoch": 0.8964192788997646, "grad_norm": 1.6813059119158424e-05, "learning_rate": 6.467145452492507e-06, "loss": 0.0, "step": 7235 }, { "epoch": 0.8970387808202206, "grad_norm": 1.643185896682553e-05, "learning_rate": 6.390850859380571e-06, "loss": 0.0, "step": 7240 }, { "epoch": 0.8976582827406765, "grad_norm": 1.967262323887553e-05, "learning_rate": 6.314994116712403e-06, "loss": 0.0, "step": 7245 }, { "epoch": 0.8982777846611324, "grad_norm": 1.5629608242306858e-05, "learning_rate": 6.239575579302736e-06, "loss": 0.0, "step": 7250 }, { "epoch": 0.8988972865815884, "grad_norm": 2.059936014120467e-05, "learning_rate": 6.164595599916712e-06, "loss": 0.0, "step": 7255 }, { "epoch": 0.8995167885020443, "grad_norm": 1.2677743143285625e-05, "learning_rate": 6.090054529268074e-06, "loss": 0.0, "step": 7260 }, { "epoch": 0.9001362904225003, "grad_norm": 0.00024229065456893295, "learning_rate": 6.0159527160176256e-06, "loss": 0.0, "step": 7265 }, { "epoch": 0.9007557923429562, "grad_norm": 1.5036132936074864e-05, "learning_rate": 5.942290506771564e-06, "loss": 0.0, "step": 7270 }, { "epoch": 0.9013752942634122, "grad_norm": 1.742842505336739e-05, "learning_rate": 5.869068246079878e-06, "loss": 0.0, "step": 7275 }, { "epoch": 0.9019947961838681, "grad_norm": 1.2098686966055539e-05, "learning_rate": 5.796286276434704e-06, "loss": 0.0, "step": 7280 }, { "epoch": 0.9026142981043241, "grad_norm": 1.952095772139728e-05, "learning_rate": 5.723944938268811e-06, "loss": 0.0, "step": 7285 }, { "epoch": 0.90323380002478, "grad_norm": 1.9280856577097438e-05, "learning_rate": 5.652044569953874e-06, "loss": 0.0, "step": 7290 }, { "epoch": 0.903853301945236, "grad_norm": 1.9867486116709188e-05, "learning_rate": 5.5805855077989855e-06, "loss": 0.0, "step": 7295 }, { "epoch": 0.904472803865692, "grad_norm": 1.751690433593467e-05, "learning_rate": 5.509568086049066e-06, "loss": 0.0, "step": 7300 }, { "epoch": 0.905092305786148, "grad_norm": 2.6974543288815767e-05, "learning_rate": 5.438992636883266e-06, "loss": 0.0, "step": 7305 }, { "epoch": 0.9057118077066039, "grad_norm": 1.1795488717325497e-05, "learning_rate": 5.368859490413503e-06, "loss": 0.0, "step": 7310 }, { "epoch": 0.9063313096270599, "grad_norm": 1.3424416465568356e-05, "learning_rate": 5.2991689746827885e-06, "loss": 0.0, "step": 7315 }, { "epoch": 0.9069508115475158, "grad_norm": 1.5938099750201218e-05, "learning_rate": 5.229921415663774e-06, "loss": 0.0, "step": 7320 }, { "epoch": 0.9075703134679718, "grad_norm": 3.7522389902733266e-05, "learning_rate": 5.161117137257221e-06, "loss": 0.0, "step": 7325 }, { "epoch": 0.9081898153884277, "grad_norm": 3.0215720471460372e-05, "learning_rate": 5.0927564612904824e-06, "loss": 0.0, "step": 7330 }, { "epoch": 0.9088093173088837, "grad_norm": 2.6825335226021707e-05, "learning_rate": 5.024839707515972e-06, "loss": 0.0, "step": 7335 }, { "epoch": 0.9094288192293396, "grad_norm": 1.9117160263704136e-05, "learning_rate": 4.957367193609708e-06, "loss": 0.0, "step": 7340 }, { "epoch": 0.9100483211497956, "grad_norm": 1.3385351849137805e-05, "learning_rate": 4.890339235169783e-06, "loss": 0.0, "step": 7345 }, { "epoch": 0.9106678230702515, "grad_norm": 1.64795510499971e-05, "learning_rate": 4.8237561457149415e-06, "loss": 0.0, "step": 7350 }, { "epoch": 0.9112873249907075, "grad_norm": 1.557102041260805e-05, "learning_rate": 4.757618236683059e-06, "loss": 0.0, "step": 7355 }, { "epoch": 0.9119068269111634, "grad_norm": 1.738732316880487e-05, "learning_rate": 4.691925817429699e-06, "loss": 0.0, "step": 7360 }, { "epoch": 0.9125263288316193, "grad_norm": 1.4602952433051541e-05, "learning_rate": 4.626679195226724e-06, "loss": 0.0, "step": 7365 }, { "epoch": 0.9131458307520753, "grad_norm": 1.1885648746101651e-05, "learning_rate": 4.561878675260767e-06, "loss": 0.0, "step": 7370 }, { "epoch": 0.9137653326725312, "grad_norm": 1.782663275662344e-05, "learning_rate": 4.497524560631883e-06, "loss": 0.0, "step": 7375 }, { "epoch": 0.9143848345929873, "grad_norm": 3.269453372922726e-05, "learning_rate": 4.433617152352043e-06, "loss": 0.0, "step": 7380 }, { "epoch": 0.9150043365134432, "grad_norm": 1.289015472138999e-05, "learning_rate": 4.370156749343834e-06, "loss": 0.0, "step": 7385 }, { "epoch": 0.9156238384338992, "grad_norm": 1.0937179467873648e-05, "learning_rate": 4.307143648438983e-06, "loss": 0.0, "step": 7390 }, { "epoch": 0.9162433403543551, "grad_norm": 4.9988520913757384e-05, "learning_rate": 4.244578144376999e-06, "loss": 0.0, "step": 7395 }, { "epoch": 0.9168628422748111, "grad_norm": 0.00013568642316386104, "learning_rate": 4.182460529803778e-06, "loss": 0.0, "step": 7400 }, { "epoch": 0.917482344195267, "grad_norm": 1.685322422417812e-05, "learning_rate": 4.120791095270249e-06, "loss": 0.0, "step": 7405 }, { "epoch": 0.918101846115723, "grad_norm": 2.0997145838919096e-05, "learning_rate": 4.059570129230994e-06, "loss": 0.0, "step": 7410 }, { "epoch": 0.9187213480361789, "grad_norm": 1.1377662303857505e-05, "learning_rate": 3.99879791804294e-06, "loss": 0.0, "step": 7415 }, { "epoch": 0.9193408499566349, "grad_norm": 2.591474913060665e-05, "learning_rate": 3.938474745963983e-06, "loss": 0.0, "step": 7420 }, { "epoch": 0.9199603518770908, "grad_norm": 4.5707125536864623e-05, "learning_rate": 3.878600895151674e-06, "loss": 0.0, "step": 7425 }, { "epoch": 0.9205798537975468, "grad_norm": 1.553055517433677e-05, "learning_rate": 3.81917664566187e-06, "loss": 0.0, "step": 7430 }, { "epoch": 0.9211993557180027, "grad_norm": 1.4369362361321691e-05, "learning_rate": 3.7602022754474777e-06, "loss": 0.0, "step": 7435 }, { "epoch": 0.9218188576384587, "grad_norm": 3.0761573725612834e-05, "learning_rate": 3.7016780603570944e-06, "loss": 0.0, "step": 7440 }, { "epoch": 0.9224383595589146, "grad_norm": 0.00021068814385216683, "learning_rate": 3.6436042741337937e-06, "loss": 0.0, "step": 7445 }, { "epoch": 0.9230578614793706, "grad_norm": 2.362461236771196e-05, "learning_rate": 3.585981188413767e-06, "loss": 0.0, "step": 7450 }, { "epoch": 0.9236773633998265, "grad_norm": 1.4773780094401445e-05, "learning_rate": 3.52880907272507e-06, "loss": 0.0, "step": 7455 }, { "epoch": 0.9242968653202825, "grad_norm": 1.5393856301670894e-05, "learning_rate": 3.472088194486389e-06, "loss": 0.0, "step": 7460 }, { "epoch": 0.9249163672407384, "grad_norm": 3.613448643591255e-05, "learning_rate": 3.4158188190058117e-06, "loss": 0.0, "step": 7465 }, { "epoch": 0.9255358691611943, "grad_norm": 4.2731942812679335e-05, "learning_rate": 3.360001209479502e-06, "loss": 0.0, "step": 7470 }, { "epoch": 0.9261553710816504, "grad_norm": 1.0626364201016258e-05, "learning_rate": 3.3046356269905486e-06, "loss": 0.0, "step": 7475 }, { "epoch": 0.9267748730021063, "grad_norm": 8.180577424354851e-06, "learning_rate": 3.2497223305077208e-06, "loss": 0.0, "step": 7480 }, { "epoch": 0.9273943749225623, "grad_norm": 1.239361608895706e-05, "learning_rate": 3.195261576884212e-06, "loss": 0.0, "step": 7485 }, { "epoch": 0.9280138768430182, "grad_norm": 1.4830884538241662e-05, "learning_rate": 3.141253620856521e-06, "loss": 0.0, "step": 7490 }, { "epoch": 0.9286333787634742, "grad_norm": 1.9064626030740328e-05, "learning_rate": 3.0876987150431858e-06, "loss": 0.0, "step": 7495 }, { "epoch": 0.9292528806839301, "grad_norm": 9.959793715097476e-06, "learning_rate": 3.0345971099436486e-06, "loss": 0.0, "step": 7500 }, { "epoch": 0.9298723826043861, "grad_norm": 1.7425496480427682e-05, "learning_rate": 2.981949053937072e-06, "loss": 0.0, "step": 7505 }, { "epoch": 0.930491884524842, "grad_norm": 1.7770293197827414e-05, "learning_rate": 2.9297547932811477e-06, "loss": 0.0, "step": 7510 }, { "epoch": 0.931111386445298, "grad_norm": 2.174586734327022e-05, "learning_rate": 2.8780145721110097e-06, "loss": 0.0, "step": 7515 }, { "epoch": 0.9317308883657539, "grad_norm": 1.2456471267796587e-05, "learning_rate": 2.826728632438025e-06, "loss": 0.0, "step": 7520 }, { "epoch": 0.9323503902862099, "grad_norm": 1.4962816749175545e-05, "learning_rate": 2.7758972141486706e-06, "loss": 0.0, "step": 7525 }, { "epoch": 0.9329698922066658, "grad_norm": 1.4517198906105477e-05, "learning_rate": 2.725520555003502e-06, "loss": 0.0, "step": 7530 }, { "epoch": 0.9335893941271218, "grad_norm": 1.7709196981741115e-05, "learning_rate": 2.6755988906358997e-06, "loss": 0.0, "step": 7535 }, { "epoch": 0.9342088960475777, "grad_norm": 1.240099300048314e-05, "learning_rate": 2.626132454551067e-06, "loss": 0.0, "step": 7540 }, { "epoch": 0.9348283979680337, "grad_norm": 4.078456913703121e-05, "learning_rate": 2.5771214781248887e-06, "loss": 0.0, "step": 7545 }, { "epoch": 0.9354478998884896, "grad_norm": 2.1800753529532813e-05, "learning_rate": 2.528566190602899e-06, "loss": 0.0, "step": 7550 }, { "epoch": 0.9360674018089457, "grad_norm": 2.4177201339625753e-05, "learning_rate": 2.4804668190991476e-06, "loss": 0.0, "step": 7555 }, { "epoch": 0.9366869037294016, "grad_norm": 1.5613381037837826e-05, "learning_rate": 2.43282358859519e-06, "loss": 0.0, "step": 7560 }, { "epoch": 0.9373064056498576, "grad_norm": 3.978759195888415e-05, "learning_rate": 2.3856367219390108e-06, "loss": 0.0, "step": 7565 }, { "epoch": 0.9379259075703135, "grad_norm": 1.5061892554513179e-05, "learning_rate": 2.3389064398439577e-06, "loss": 0.0, "step": 7570 }, { "epoch": 0.9385454094907694, "grad_norm": 2.331317591597326e-05, "learning_rate": 2.292632960887775e-06, "loss": 0.0, "step": 7575 }, { "epoch": 0.9391649114112254, "grad_norm": 3.2341584301320836e-05, "learning_rate": 2.246816501511495e-06, "loss": 0.0, "step": 7580 }, { "epoch": 0.9397844133316813, "grad_norm": 1.6652284102747217e-05, "learning_rate": 2.201457276018526e-06, "loss": 0.0, "step": 7585 }, { "epoch": 0.9404039152521373, "grad_norm": 2.3358388716587797e-05, "learning_rate": 2.1565554965735537e-06, "loss": 0.0, "step": 7590 }, { "epoch": 0.9410234171725932, "grad_norm": 1.1500771506689489e-05, "learning_rate": 2.1121113732016084e-06, "loss": 0.0, "step": 7595 }, { "epoch": 0.9416429190930492, "grad_norm": 4.1334551497129723e-05, "learning_rate": 2.0681251137870673e-06, "loss": 0.0, "step": 7600 }, { "epoch": 0.9422624210135051, "grad_norm": 9.406086974195205e-06, "learning_rate": 2.0245969240726525e-06, "loss": 0.0, "step": 7605 }, { "epoch": 0.9428819229339611, "grad_norm": 1.632814201002475e-05, "learning_rate": 1.9815270076585345e-06, "loss": 0.0, "step": 7610 }, { "epoch": 0.943501424854417, "grad_norm": 5.412558311945759e-05, "learning_rate": 1.9389155660013312e-06, "loss": 0.0, "step": 7615 }, { "epoch": 0.944120926774873, "grad_norm": 1.5063080354593694e-05, "learning_rate": 1.8967627984131652e-06, "loss": 0.0, "step": 7620 }, { "epoch": 0.9447404286953289, "grad_norm": 1.2432846233423334e-05, "learning_rate": 1.8550689020607305e-06, "loss": 0.0, "step": 7625 }, { "epoch": 0.9453599306157849, "grad_norm": 1.087307191482978e-05, "learning_rate": 1.8138340719644263e-06, "loss": 0.0, "step": 7630 }, { "epoch": 0.9459794325362408, "grad_norm": 1.2929647709825076e-05, "learning_rate": 1.7730585009973377e-06, "loss": 0.0, "step": 7635 }, { "epoch": 0.9465989344566968, "grad_norm": 1.1567200090212282e-05, "learning_rate": 1.7327423798844666e-06, "loss": 0.0, "step": 7640 }, { "epoch": 0.9472184363771528, "grad_norm": 2.047250200121198e-05, "learning_rate": 1.6928858972017125e-06, "loss": 0.0, "step": 7645 }, { "epoch": 0.9478379382976088, "grad_norm": 1.5213406186376233e-05, "learning_rate": 1.6534892393750833e-06, "loss": 0.0, "step": 7650 }, { "epoch": 0.9484574402180647, "grad_norm": 1.5688718121964484e-05, "learning_rate": 1.6145525906797521e-06, "loss": 0.0, "step": 7655 }, { "epoch": 0.9490769421385207, "grad_norm": 1.3124458746460732e-05, "learning_rate": 1.5760761332392681e-06, "loss": 0.0, "step": 7660 }, { "epoch": 0.9496964440589766, "grad_norm": 1.7506878066342324e-05, "learning_rate": 1.5380600470246476e-06, "loss": 0.0, "step": 7665 }, { "epoch": 0.9503159459794326, "grad_norm": 0.00019138396601192653, "learning_rate": 1.500504509853562e-06, "loss": 0.0, "step": 7670 }, { "epoch": 0.9509354478998885, "grad_norm": 1.2044631148455665e-05, "learning_rate": 1.463409697389473e-06, "loss": 0.0, "step": 7675 }, { "epoch": 0.9515549498203445, "grad_norm": 1.1821906809927896e-05, "learning_rate": 1.4267757831408546e-06, "loss": 0.0, "step": 7680 }, { "epoch": 0.9521744517408004, "grad_norm": 1.2987155059818178e-05, "learning_rate": 1.3906029384603393e-06, "loss": 0.0, "step": 7685 }, { "epoch": 0.9527939536612563, "grad_norm": 1.2113920092815533e-05, "learning_rate": 1.3548913325439949e-06, "loss": 0.0, "step": 7690 }, { "epoch": 0.9534134555817123, "grad_norm": 1.5708454156992957e-05, "learning_rate": 1.3196411324304047e-06, "loss": 0.0, "step": 7695 }, { "epoch": 0.9540329575021682, "grad_norm": 8.612557394371834e-06, "learning_rate": 1.2848525029999891e-06, "loss": 0.0, "step": 7700 }, { "epoch": 0.9546524594226242, "grad_norm": 3.200208084308542e-05, "learning_rate": 1.2505256069742065e-06, "loss": 0.0, "step": 7705 }, { "epoch": 0.9552719613430801, "grad_norm": 2.178146860387642e-05, "learning_rate": 1.2166606049147877e-06, "loss": 0.0, "step": 7710 }, { "epoch": 0.9558914632635361, "grad_norm": 1.3898727047489956e-05, "learning_rate": 1.1832576552229691e-06, "loss": 0.0, "step": 7715 }, { "epoch": 0.956510965183992, "grad_norm": 1.1332055692037102e-05, "learning_rate": 1.1503169141388047e-06, "loss": 0.0, "step": 7720 }, { "epoch": 0.957130467104448, "grad_norm": 1.6206869986490346e-05, "learning_rate": 1.1178385357403564e-06, "loss": 0.0, "step": 7725 }, { "epoch": 0.957749969024904, "grad_norm": 4.0796585381031036e-05, "learning_rate": 1.0858226719430486e-06, "loss": 0.0, "step": 7730 }, { "epoch": 0.95836947094536, "grad_norm": 1.9753004380618222e-05, "learning_rate": 1.0542694724988923e-06, "loss": 0.0, "step": 7735 }, { "epoch": 0.9589889728658159, "grad_norm": 1.6900154150789604e-05, "learning_rate": 1.023179084995851e-06, "loss": 0.0, "step": 7740 }, { "epoch": 0.9596084747862719, "grad_norm": 3.521572216413915e-05, "learning_rate": 9.9255165485711e-07, "loss": 0.0, "step": 7745 }, { "epoch": 0.9602279767067278, "grad_norm": 1.656348285905551e-05, "learning_rate": 9.623873253403749e-07, "loss": 0.0, "step": 7750 }, { "epoch": 0.9608474786271838, "grad_norm": 1.2179970326542389e-05, "learning_rate": 9.326862375372725e-07, "loss": 0.0, "step": 7755 }, { "epoch": 0.9614669805476397, "grad_norm": 1.674490158620756e-05, "learning_rate": 9.034485303726082e-07, "loss": 0.0, "step": 7760 }, { "epoch": 0.9620864824680957, "grad_norm": 3.836236646748148e-05, "learning_rate": 8.746743406037872e-07, "loss": 0.0, "step": 7765 }, { "epoch": 0.9627059843885516, "grad_norm": 1.317010901402682e-05, "learning_rate": 8.463638028201271e-07, "loss": 0.0, "step": 7770 }, { "epoch": 0.9633254863090076, "grad_norm": 2.333957309019752e-05, "learning_rate": 8.185170494422246e-07, "loss": 0.0, "step": 7775 }, { "epoch": 0.9639449882294635, "grad_norm": 1.1039879609597847e-05, "learning_rate": 7.911342107214226e-07, "loss": 0.0, "step": 7780 }, { "epoch": 0.9645644901499195, "grad_norm": 1.2601029993675184e-05, "learning_rate": 7.642154147390557e-07, "loss": 0.0, "step": 7785 }, { "epoch": 0.9651839920703754, "grad_norm": 1.2647015864786226e-05, "learning_rate": 7.377607874059722e-07, "loss": 0.0, "step": 7790 }, { "epoch": 0.9658034939908313, "grad_norm": 3.5365912481211126e-05, "learning_rate": 7.117704524619129e-07, "loss": 0.0, "step": 7795 }, { "epoch": 0.9664229959112873, "grad_norm": 1.7322068742942065e-05, "learning_rate": 6.862445314748889e-07, "loss": 0.0, "step": 7800 }, { "epoch": 0.9670424978317432, "grad_norm": 2.3366548703052104e-05, "learning_rate": 6.611831438406713e-07, "loss": 0.0, "step": 7805 }, { "epoch": 0.9676619997521992, "grad_norm": 1.0968607057293411e-05, "learning_rate": 6.365864067821914e-07, "loss": 0.0, "step": 7810 }, { "epoch": 0.9682815016726551, "grad_norm": 2.7040619897888973e-05, "learning_rate": 6.124544353490303e-07, "loss": 0.0, "step": 7815 }, { "epoch": 0.9689010035931112, "grad_norm": 1.2410858289513271e-05, "learning_rate": 5.887873424168521e-07, "loss": 0.0, "step": 7820 }, { "epoch": 0.969520505513567, "grad_norm": 1.7025924535118975e-05, "learning_rate": 5.655852386868499e-07, "loss": 0.0, "step": 7825 }, { "epoch": 0.9701400074340231, "grad_norm": 1.0113310963788535e-05, "learning_rate": 5.428482326853224e-07, "loss": 0.0, "step": 7830 }, { "epoch": 0.970759509354479, "grad_norm": 1.1934193935303483e-05, "learning_rate": 5.205764307630534e-07, "loss": 0.0, "step": 7835 }, { "epoch": 0.971379011274935, "grad_norm": 4.547064600046724e-05, "learning_rate": 4.987699370948895e-07, "loss": 0.0, "step": 7840 }, { "epoch": 0.9719985131953909, "grad_norm": 1.7326912711723708e-05, "learning_rate": 4.774288536792182e-07, "loss": 0.0, "step": 7845 }, { "epoch": 0.9726180151158469, "grad_norm": 1.4628738426836208e-05, "learning_rate": 4.5655328033752387e-07, "loss": 0.0, "step": 7850 }, { "epoch": 0.9732375170363028, "grad_norm": 2.835465238604229e-05, "learning_rate": 4.3614331471387714e-07, "loss": 0.0, "step": 7855 }, { "epoch": 0.9738570189567588, "grad_norm": 1.4035920685273595e-05, "learning_rate": 4.1619905227450187e-07, "loss": 0.0, "step": 7860 }, { "epoch": 0.9744765208772147, "grad_norm": 1.2818127288483083e-05, "learning_rate": 3.9672058630734196e-07, "loss": 0.0, "step": 7865 }, { "epoch": 0.9750960227976707, "grad_norm": 1.994063495658338e-05, "learning_rate": 3.7770800792159555e-07, "loss": 0.0, "step": 7870 }, { "epoch": 0.9757155247181266, "grad_norm": 1.9438450181041844e-05, "learning_rate": 3.5916140604731474e-07, "loss": 0.0, "step": 7875 }, { "epoch": 0.9763350266385826, "grad_norm": 1.881604293885175e-05, "learning_rate": 3.4108086743495084e-07, "loss": 0.0, "step": 7880 }, { "epoch": 0.9769545285590385, "grad_norm": 1.6535568647668697e-05, "learning_rate": 3.2346647665502106e-07, "loss": 0.0, "step": 7885 }, { "epoch": 0.9775740304794945, "grad_norm": 1.3070878594589885e-05, "learning_rate": 3.063183160976313e-07, "loss": 0.0, "step": 7890 }, { "epoch": 0.9781935323999504, "grad_norm": 1.974356382561382e-05, "learning_rate": 2.8963646597214287e-07, "loss": 0.0, "step": 7895 }, { "epoch": 0.9788130343204063, "grad_norm": 3.253783143009059e-05, "learning_rate": 2.734210043067731e-07, "loss": 0.0, "step": 7900 }, { "epoch": 0.9794325362408624, "grad_norm": 1.190515195048647e-05, "learning_rate": 2.576720069482397e-07, "loss": 0.0, "step": 7905 }, { "epoch": 0.9800520381613183, "grad_norm": 2.878196755773388e-05, "learning_rate": 2.4238954756142793e-07, "loss": 0.0, "step": 7910 }, { "epoch": 0.9806715400817743, "grad_norm": 1.5060357327456586e-05, "learning_rate": 2.2757369762899106e-07, "loss": 0.0, "step": 7915 }, { "epoch": 0.9812910420022302, "grad_norm": 2.230923746537883e-05, "learning_rate": 2.1322452645106128e-07, "loss": 0.0, "step": 7920 }, { "epoch": 0.9819105439226862, "grad_norm": 1.2020076610497199e-05, "learning_rate": 1.9934210114490593e-07, "loss": 0.0, "step": 7925 }, { "epoch": 0.9825300458431421, "grad_norm": 1.557873656565789e-05, "learning_rate": 1.8592648664464973e-07, "loss": 0.0, "step": 7930 }, { "epoch": 0.9831495477635981, "grad_norm": 1.4026416465640068e-05, "learning_rate": 1.7297774570089743e-07, "loss": 0.0, "step": 7935 }, { "epoch": 0.983769049684054, "grad_norm": 3.2637559343129396e-05, "learning_rate": 1.6049593888052272e-07, "loss": 0.0, "step": 7940 }, { "epoch": 0.98438855160451, "grad_norm": 4.813147825188935e-05, "learning_rate": 1.4848112456632423e-07, "loss": 0.0, "step": 7945 }, { "epoch": 0.9850080535249659, "grad_norm": 1.5387495295726694e-05, "learning_rate": 1.3693335895677006e-07, "loss": 0.0, "step": 7950 }, { "epoch": 0.9856275554454219, "grad_norm": 1.9888593669747934e-05, "learning_rate": 1.2585269606576466e-07, "loss": 0.0, "step": 7955 }, { "epoch": 0.9862470573658778, "grad_norm": 1.5588269889121875e-05, "learning_rate": 1.152391877223491e-07, "loss": 0.0, "step": 7960 }, { "epoch": 0.9868665592863338, "grad_norm": 3.954503335990012e-05, "learning_rate": 1.0509288357050117e-07, "loss": 0.0, "step": 7965 }, { "epoch": 0.9874860612067897, "grad_norm": 7.765534064674284e-06, "learning_rate": 9.541383106884683e-08, "loss": 0.0, "step": 7970 }, { "epoch": 0.9881055631272457, "grad_norm": 8.243837328336667e-06, "learning_rate": 8.620207549051573e-08, "loss": 0.0, "step": 7975 }, { "epoch": 0.9887250650477016, "grad_norm": 1.7084112187149003e-05, "learning_rate": 7.74576599228638e-08, "loss": 0.0, "step": 7980 }, { "epoch": 0.9893445669681576, "grad_norm": 2.6098810849362053e-05, "learning_rate": 6.918062526730662e-08, "loss": 0.0, "step": 7985 }, { "epoch": 0.9899640688886135, "grad_norm": 1.324158893112326e-05, "learning_rate": 6.137101023910852e-08, "loss": 0.0, "step": 7990 }, { "epoch": 0.9905835708090696, "grad_norm": 1.3696650057681836e-05, "learning_rate": 5.4028851367204925e-08, "loss": 0.0, "step": 7995 }, { "epoch": 0.9912030727295255, "grad_norm": 5.182771565159783e-05, "learning_rate": 4.7154182994058046e-08, "loss": 0.0, "step": 8000 }, { "epoch": 0.9918225746499815, "grad_norm": 1.6978799976641312e-05, "learning_rate": 4.0747037275457036e-08, "loss": 0.0, "step": 8005 }, { "epoch": 0.9924420765704374, "grad_norm": 2.1746163838542998e-05, "learning_rate": 3.4807444180395834e-08, "loss": 0.0, "step": 8010 }, { "epoch": 0.9930615784908933, "grad_norm": 1.4939757420506794e-05, "learning_rate": 2.9335431490917776e-08, "loss": 0.0, "step": 8015 }, { "epoch": 0.9936810804113493, "grad_norm": 5.799506470793858e-05, "learning_rate": 2.433102480198235e-08, "loss": 0.0, "step": 8020 }, { "epoch": 0.9943005823318052, "grad_norm": 1.7270534954150207e-05, "learning_rate": 1.9794247521376375e-08, "loss": 0.0, "step": 8025 }, { "epoch": 0.9949200842522612, "grad_norm": 2.6063697077916004e-05, "learning_rate": 1.5725120869547472e-08, "loss": 0.0, "step": 8030 }, { "epoch": 0.9955395861727171, "grad_norm": 1.1660726158879697e-05, "learning_rate": 1.2123663879581859e-08, "loss": 0.0, "step": 8035 }, { "epoch": 0.9961590880931731, "grad_norm": 1.1441545211710036e-05, "learning_rate": 8.989893397037819e-09, "loss": 0.0, "step": 8040 }, { "epoch": 0.996778590013629, "grad_norm": 1.054462518368382e-05, "learning_rate": 6.3238240799234905e-09, "loss": 0.0, "step": 8045 }, { "epoch": 0.997398091934085, "grad_norm": 1.5436646208399907e-05, "learning_rate": 4.12546839857475e-09, "loss": 0.0, "step": 8050 }, { "epoch": 0.9980175938545409, "grad_norm": 1.496723143645795e-05, "learning_rate": 2.394836635677411e-09, "loss": 0.0, "step": 8055 }, { "epoch": 0.9986370957749969, "grad_norm": 1.3885872249375097e-05, "learning_rate": 1.1319368861339997e-09, "loss": 0.0, "step": 8060 }, { "epoch": 0.9992565976954528, "grad_norm": 1.791957583918702e-05, "learning_rate": 3.3677505707485626e-10, "loss": 0.0, "step": 8065 }, { "epoch": 0.9998760996159088, "grad_norm": 1.0679298611648846e-05, "learning_rate": 9.354867802624512e-12, "loss": 0.0, "step": 8070 }, { "epoch": 1.0, "step": 8071, "total_flos": 7.174616766788665e+18, "train_loss": 0.0014582729011872687, "train_runtime": 42532.2079, "train_samples_per_second": 3.036, "train_steps_per_second": 0.19 } ], "logging_steps": 5, "max_steps": 8071, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.174616766788665e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }