{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 19532, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005119803399549457, "grad_norm": 1.4939812421798706, "learning_rate": 7.679705099324186e-05, "loss": 8.8823, "step": 10 }, { "epoch": 0.0010239606799098914, "grad_norm": 1.7099491357803345, "learning_rate": 0.00015359410198648372, "loss": 8.4098, "step": 20 }, { "epoch": 0.0015359410198648373, "grad_norm": 1.341354489326477, "learning_rate": 0.00023039115297972558, "loss": 7.809, "step": 30 }, { "epoch": 0.0020479213598197828, "grad_norm": 1.2555238008499146, "learning_rate": 0.00030718820397296744, "loss": 7.2814, "step": 40 }, { "epoch": 0.0025599016997747285, "grad_norm": 1.1430288553237915, "learning_rate": 0.0003839852549662093, "loss": 6.8009, "step": 50 }, { "epoch": 0.0030718820397296746, "grad_norm": 0.7848866581916809, "learning_rate": 0.00046078230595945115, "loss": 6.4164, "step": 60 }, { "epoch": 0.0035838623796846203, "grad_norm": 1.1270220279693604, "learning_rate": 0.000537579356952693, "loss": 6.1553, "step": 70 }, { "epoch": 0.0040958427196395655, "grad_norm": 0.5496548414230347, "learning_rate": 0.0006143764079459349, "loss": 5.9572, "step": 80 }, { "epoch": 0.004607823059594511, "grad_norm": 0.8258134126663208, "learning_rate": 0.0006911734589391768, "loss": 5.7536, "step": 90 }, { "epoch": 0.005119803399549457, "grad_norm": 0.544425368309021, "learning_rate": 0.0007679705099324186, "loss": 5.6043, "step": 100 }, { "epoch": 0.005631783739504403, "grad_norm": 1.129820466041565, "learning_rate": 0.0008447675609256605, "loss": 5.3984, "step": 110 }, { "epoch": 0.006143764079459349, "grad_norm": 1.6234118938446045, "learning_rate": 0.0009215646119189023, "loss": 5.2392, "step": 120 }, { "epoch": 0.006655744419414295, "grad_norm": 0.7183708548545837, "learning_rate": 0.0009983616629121441, "loss": 5.107, "step": 130 }, { "epoch": 0.0071677247593692405, "grad_norm": 1.0296348333358765, "learning_rate": 0.001075158713905386, "loss": 4.9662, "step": 140 }, { "epoch": 0.007679705099324186, "grad_norm": 1.6978133916854858, "learning_rate": 0.001151955764898628, "loss": 4.8161, "step": 150 }, { "epoch": 0.008191685439279131, "grad_norm": 0.8946409821510315, "learning_rate": 0.0012287528158918697, "loss": 4.7119, "step": 160 }, { "epoch": 0.008703665779234078, "grad_norm": 1.0135765075683594, "learning_rate": 0.0013055498668851117, "loss": 4.6082, "step": 170 }, { "epoch": 0.009215646119189022, "grad_norm": 0.8236331343650818, "learning_rate": 0.0013823469178783536, "loss": 4.496, "step": 180 }, { "epoch": 0.009727626459143969, "grad_norm": 1.161008596420288, "learning_rate": 0.0014591439688715956, "loss": 4.4071, "step": 190 }, { "epoch": 0.010239606799098914, "grad_norm": 1.3253235816955566, "learning_rate": 0.0015, "loss": 4.3286, "step": 200 }, { "epoch": 0.01075158713905386, "grad_norm": 1.6026867628097534, "learning_rate": 0.0015, "loss": 4.2374, "step": 210 }, { "epoch": 0.011263567479008805, "grad_norm": 1.0043503046035767, "learning_rate": 0.0015, "loss": 4.1526, "step": 220 }, { "epoch": 0.011775547818963752, "grad_norm": 0.963283121585846, "learning_rate": 0.0015, "loss": 4.0709, "step": 230 }, { "epoch": 0.012287528158918698, "grad_norm": 0.8025517463684082, "learning_rate": 0.0015, "loss": 3.9997, "step": 240 }, { "epoch": 0.012799508498873643, "grad_norm": 0.7000623345375061, "learning_rate": 0.0015, "loss": 3.91, "step": 250 }, { "epoch": 0.01331148883882859, "grad_norm": 0.8964600563049316, "learning_rate": 0.0015, "loss": 3.8844, "step": 260 }, { "epoch": 0.013823469178783535, "grad_norm": 0.7321097254753113, "learning_rate": 0.0015, "loss": 3.8324, "step": 270 }, { "epoch": 0.014335449518738481, "grad_norm": 0.8242825269699097, "learning_rate": 0.0015, "loss": 3.7653, "step": 280 }, { "epoch": 0.014847429858693426, "grad_norm": 1.045832633972168, "learning_rate": 0.0015, "loss": 3.7115, "step": 290 }, { "epoch": 0.015359410198648372, "grad_norm": 1.0511783361434937, "learning_rate": 0.0015, "loss": 3.6754, "step": 300 }, { "epoch": 0.015871390538603317, "grad_norm": 0.79283607006073, "learning_rate": 0.0015, "loss": 3.615, "step": 310 }, { "epoch": 0.016383370878558262, "grad_norm": 0.7592840194702148, "learning_rate": 0.0015, "loss": 3.5692, "step": 320 }, { "epoch": 0.01689535121851321, "grad_norm": 0.6317871809005737, "learning_rate": 0.0015, "loss": 3.5581, "step": 330 }, { "epoch": 0.017407331558468155, "grad_norm": 0.8634727597236633, "learning_rate": 0.0015, "loss": 3.5035, "step": 340 }, { "epoch": 0.0179193118984231, "grad_norm": 0.9801504611968994, "learning_rate": 0.0015, "loss": 3.4543, "step": 350 }, { "epoch": 0.018431292238378045, "grad_norm": 0.9941282868385315, "learning_rate": 0.0015, "loss": 3.4323, "step": 360 }, { "epoch": 0.018943272578332993, "grad_norm": 1.1075271368026733, "learning_rate": 0.0015, "loss": 3.3992, "step": 370 }, { "epoch": 0.019455252918287938, "grad_norm": 0.9263769388198853, "learning_rate": 0.0015, "loss": 3.3484, "step": 380 }, { "epoch": 0.019967233258242883, "grad_norm": 0.6879151463508606, "learning_rate": 0.0015, "loss": 3.3255, "step": 390 }, { "epoch": 0.020479213598197828, "grad_norm": 1.0170198678970337, "learning_rate": 0.0015, "loss": 3.2744, "step": 400 }, { "epoch": 0.020991193938152776, "grad_norm": 0.9534377455711365, "learning_rate": 0.0015, "loss": 3.2513, "step": 410 }, { "epoch": 0.02150317427810772, "grad_norm": 1.1487725973129272, "learning_rate": 0.0015, "loss": 3.2043, "step": 420 }, { "epoch": 0.022015154618062666, "grad_norm": 0.8081286549568176, "learning_rate": 0.0015, "loss": 3.1891, "step": 430 }, { "epoch": 0.02252713495801761, "grad_norm": 0.8324559926986694, "learning_rate": 0.0015, "loss": 3.1025, "step": 440 }, { "epoch": 0.02303911529797256, "grad_norm": 0.9536003470420837, "learning_rate": 0.0015, "loss": 3.1029, "step": 450 }, { "epoch": 0.023551095637927504, "grad_norm": 1.3307809829711914, "learning_rate": 0.0015, "loss": 3.0508, "step": 460 }, { "epoch": 0.02406307597788245, "grad_norm": 1.237606167793274, "learning_rate": 0.0015, "loss": 3.0528, "step": 470 }, { "epoch": 0.024575056317837397, "grad_norm": 0.9293427467346191, "learning_rate": 0.0015, "loss": 2.9933, "step": 480 }, { "epoch": 0.02508703665779234, "grad_norm": 0.8388038873672485, "learning_rate": 0.0015, "loss": 2.9593, "step": 490 }, { "epoch": 0.025599016997747286, "grad_norm": 0.7568584084510803, "learning_rate": 0.0015, "loss": 2.9442, "step": 500 }, { "epoch": 0.02611099733770223, "grad_norm": 0.7443001866340637, "learning_rate": 0.0015, "loss": 2.9138, "step": 510 }, { "epoch": 0.02662297767765718, "grad_norm": 0.9567376375198364, "learning_rate": 0.0015, "loss": 2.8952, "step": 520 }, { "epoch": 0.027134958017612124, "grad_norm": 0.7521085143089294, "learning_rate": 0.0015, "loss": 2.8719, "step": 530 }, { "epoch": 0.02764693835756707, "grad_norm": 1.0200743675231934, "learning_rate": 0.0015, "loss": 2.8533, "step": 540 }, { "epoch": 0.028158918697522014, "grad_norm": 0.8097197413444519, "learning_rate": 0.0015, "loss": 2.8476, "step": 550 }, { "epoch": 0.028670899037476962, "grad_norm": 0.7335869669914246, "learning_rate": 0.0015, "loss": 2.7611, "step": 560 }, { "epoch": 0.029182879377431907, "grad_norm": 0.7385020852088928, "learning_rate": 0.0015, "loss": 2.7824, "step": 570 }, { "epoch": 0.029694859717386852, "grad_norm": 0.8730366826057434, "learning_rate": 0.0015, "loss": 2.7236, "step": 580 }, { "epoch": 0.030206840057341797, "grad_norm": 0.8042418360710144, "learning_rate": 0.0015, "loss": 2.7331, "step": 590 }, { "epoch": 0.030718820397296745, "grad_norm": 0.7750236392021179, "learning_rate": 0.0015, "loss": 2.6946, "step": 600 }, { "epoch": 0.03123080073725169, "grad_norm": 1.130753755569458, "learning_rate": 0.0015, "loss": 2.7127, "step": 610 }, { "epoch": 0.031742781077206635, "grad_norm": 0.7699748277664185, "learning_rate": 0.0015, "loss": 2.665, "step": 620 }, { "epoch": 0.03225476141716158, "grad_norm": 0.7676917314529419, "learning_rate": 0.0015, "loss": 2.6516, "step": 630 }, { "epoch": 0.032766741757116524, "grad_norm": 0.9566435217857361, "learning_rate": 0.0015, "loss": 2.6311, "step": 640 }, { "epoch": 0.03327872209707147, "grad_norm": 0.9319092631340027, "learning_rate": 0.0015, "loss": 2.6062, "step": 650 }, { "epoch": 0.03379070243702642, "grad_norm": 0.7314916849136353, "learning_rate": 0.0015, "loss": 2.5822, "step": 660 }, { "epoch": 0.03430268277698136, "grad_norm": 0.765346109867096, "learning_rate": 0.0015, "loss": 2.587, "step": 670 }, { "epoch": 0.03481466311693631, "grad_norm": 0.8714979887008667, "learning_rate": 0.0015, "loss": 2.5479, "step": 680 }, { "epoch": 0.03532664345689126, "grad_norm": 0.7182953357696533, "learning_rate": 0.0015, "loss": 2.5388, "step": 690 }, { "epoch": 0.0358386237968462, "grad_norm": 0.71555095911026, "learning_rate": 0.0015, "loss": 2.5196, "step": 700 }, { "epoch": 0.03635060413680115, "grad_norm": 0.6901549696922302, "learning_rate": 0.0015, "loss": 2.4948, "step": 710 }, { "epoch": 0.03686258447675609, "grad_norm": 0.7073848247528076, "learning_rate": 0.0015, "loss": 2.4814, "step": 720 }, { "epoch": 0.03737456481671104, "grad_norm": 0.6590971350669861, "learning_rate": 0.0015, "loss": 2.4799, "step": 730 }, { "epoch": 0.037886545156665986, "grad_norm": 0.6124588251113892, "learning_rate": 0.0015, "loss": 2.4529, "step": 740 }, { "epoch": 0.03839852549662093, "grad_norm": 0.7170097231864929, "learning_rate": 0.0015, "loss": 2.4397, "step": 750 }, { "epoch": 0.038910505836575876, "grad_norm": 0.7509459853172302, "learning_rate": 0.0015, "loss": 2.433, "step": 760 }, { "epoch": 0.039422486176530824, "grad_norm": 0.8185219168663025, "learning_rate": 0.0015, "loss": 2.4364, "step": 770 }, { "epoch": 0.039934466516485766, "grad_norm": 0.6452121734619141, "learning_rate": 0.0015, "loss": 2.4375, "step": 780 }, { "epoch": 0.040446446856440714, "grad_norm": 0.7798700928688049, "learning_rate": 0.0015, "loss": 2.4082, "step": 790 }, { "epoch": 0.040958427196395655, "grad_norm": 0.905072808265686, "learning_rate": 0.0015, "loss": 2.3811, "step": 800 }, { "epoch": 0.041470407536350604, "grad_norm": 0.7047348618507385, "learning_rate": 0.0015, "loss": 2.3955, "step": 810 }, { "epoch": 0.04198238787630555, "grad_norm": 0.6472852230072021, "learning_rate": 0.0015, "loss": 2.3776, "step": 820 }, { "epoch": 0.04249436821626049, "grad_norm": 0.729308545589447, "learning_rate": 0.0015, "loss": 2.3465, "step": 830 }, { "epoch": 0.04300634855621544, "grad_norm": 0.8292624950408936, "learning_rate": 0.0015, "loss": 2.3578, "step": 840 }, { "epoch": 0.04351832889617039, "grad_norm": 0.6298139691352844, "learning_rate": 0.0015, "loss": 2.3349, "step": 850 }, { "epoch": 0.04403030923612533, "grad_norm": 0.647214949131012, "learning_rate": 0.0015, "loss": 2.299, "step": 860 }, { "epoch": 0.04454228957608028, "grad_norm": 0.7034851312637329, "learning_rate": 0.0015, "loss": 2.2927, "step": 870 }, { "epoch": 0.04505426991603522, "grad_norm": 0.6373961567878723, "learning_rate": 0.0015, "loss": 2.2776, "step": 880 }, { "epoch": 0.04556625025599017, "grad_norm": 0.8384701609611511, "learning_rate": 0.0015, "loss": 2.2948, "step": 890 }, { "epoch": 0.04607823059594512, "grad_norm": 0.7856025695800781, "learning_rate": 0.0015, "loss": 2.3034, "step": 900 }, { "epoch": 0.04659021093590006, "grad_norm": 0.6041284799575806, "learning_rate": 0.0015, "loss": 2.2773, "step": 910 }, { "epoch": 0.04710219127585501, "grad_norm": 0.8801588416099548, "learning_rate": 0.0015, "loss": 2.2706, "step": 920 }, { "epoch": 0.047614171615809955, "grad_norm": 0.7567424178123474, "learning_rate": 0.0015, "loss": 2.2754, "step": 930 }, { "epoch": 0.0481261519557649, "grad_norm": 0.6421610713005066, "learning_rate": 0.0015, "loss": 2.2514, "step": 940 }, { "epoch": 0.048638132295719845, "grad_norm": 0.7311142683029175, "learning_rate": 0.0015, "loss": 2.2005, "step": 950 }, { "epoch": 0.04915011263567479, "grad_norm": 0.7399065494537354, "learning_rate": 0.0015, "loss": 2.2038, "step": 960 }, { "epoch": 0.049662092975629735, "grad_norm": 0.708454430103302, "learning_rate": 0.0015, "loss": 2.1758, "step": 970 }, { "epoch": 0.05017407331558468, "grad_norm": 0.6199438571929932, "learning_rate": 0.0015, "loss": 2.227, "step": 980 }, { "epoch": 0.050686053655539624, "grad_norm": 0.6159200668334961, "learning_rate": 0.0015, "loss": 2.1547, "step": 990 }, { "epoch": 0.05119803399549457, "grad_norm": 0.6560512781143188, "learning_rate": 0.0015, "loss": 2.1787, "step": 1000 }, { "epoch": 0.05171001433544952, "grad_norm": 0.6151387691497803, "learning_rate": 0.0015, "loss": 2.1776, "step": 1010 }, { "epoch": 0.05222199467540446, "grad_norm": 0.6162774562835693, "learning_rate": 0.0015, "loss": 2.1604, "step": 1020 }, { "epoch": 0.05273397501535941, "grad_norm": 0.6564657092094421, "learning_rate": 0.0015, "loss": 2.1837, "step": 1030 }, { "epoch": 0.05324595535531436, "grad_norm": 0.5790508985519409, "learning_rate": 0.0015, "loss": 2.1561, "step": 1040 }, { "epoch": 0.0537579356952693, "grad_norm": 0.6484589576721191, "learning_rate": 0.0015, "loss": 2.1676, "step": 1050 }, { "epoch": 0.05426991603522425, "grad_norm": 0.6969457268714905, "learning_rate": 0.0015, "loss": 2.1462, "step": 1060 }, { "epoch": 0.05478189637517919, "grad_norm": 0.7145557403564453, "learning_rate": 0.0015, "loss": 2.13, "step": 1070 }, { "epoch": 0.05529387671513414, "grad_norm": 0.6353093981742859, "learning_rate": 0.0015, "loss": 2.1197, "step": 1080 }, { "epoch": 0.055805857055089086, "grad_norm": 0.5896279811859131, "learning_rate": 0.0015, "loss": 2.1177, "step": 1090 }, { "epoch": 0.05631783739504403, "grad_norm": 0.6247608661651611, "learning_rate": 0.0015, "loss": 2.1123, "step": 1100 }, { "epoch": 0.056829817734998976, "grad_norm": 0.6024080514907837, "learning_rate": 0.0015, "loss": 2.0949, "step": 1110 }, { "epoch": 0.057341798074953924, "grad_norm": 0.7400630116462708, "learning_rate": 0.0015, "loss": 2.0915, "step": 1120 }, { "epoch": 0.057853778414908866, "grad_norm": 0.6276081800460815, "learning_rate": 0.0015, "loss": 2.0916, "step": 1130 }, { "epoch": 0.058365758754863814, "grad_norm": 0.7214579582214355, "learning_rate": 0.0015, "loss": 2.1027, "step": 1140 }, { "epoch": 0.05887773909481876, "grad_norm": 0.7833266258239746, "learning_rate": 0.0015, "loss": 2.0884, "step": 1150 }, { "epoch": 0.059389719434773704, "grad_norm": 0.7453588247299194, "learning_rate": 0.0015, "loss": 2.0764, "step": 1160 }, { "epoch": 0.05990169977472865, "grad_norm": 0.5965461134910583, "learning_rate": 0.0015, "loss": 2.0941, "step": 1170 }, { "epoch": 0.06041368011468359, "grad_norm": 0.6565614938735962, "learning_rate": 0.0015, "loss": 2.0396, "step": 1180 }, { "epoch": 0.06092566045463854, "grad_norm": 0.670816957950592, "learning_rate": 0.0015, "loss": 2.0629, "step": 1190 }, { "epoch": 0.06143764079459349, "grad_norm": 0.6220470666885376, "learning_rate": 0.0015, "loss": 2.064, "step": 1200 }, { "epoch": 0.06194962113454843, "grad_norm": 0.5919376015663147, "learning_rate": 0.0015, "loss": 2.0385, "step": 1210 }, { "epoch": 0.06246160147450338, "grad_norm": 0.6242793202400208, "learning_rate": 0.0015, "loss": 2.0487, "step": 1220 }, { "epoch": 0.06297358181445832, "grad_norm": 0.5903810262680054, "learning_rate": 0.0015, "loss": 2.0348, "step": 1230 }, { "epoch": 0.06348556215441327, "grad_norm": 0.6573896408081055, "learning_rate": 0.0015, "loss": 2.0186, "step": 1240 }, { "epoch": 0.06399754249436822, "grad_norm": 0.6017488241195679, "learning_rate": 0.0015, "loss": 2.0126, "step": 1250 }, { "epoch": 0.06450952283432317, "grad_norm": 0.533431351184845, "learning_rate": 0.0015, "loss": 2.026, "step": 1260 }, { "epoch": 0.06502150317427811, "grad_norm": 0.5461450815200806, "learning_rate": 0.0015, "loss": 1.9961, "step": 1270 }, { "epoch": 0.06553348351423305, "grad_norm": 0.5579766035079956, "learning_rate": 0.0015, "loss": 2.0064, "step": 1280 }, { "epoch": 0.066045463854188, "grad_norm": 0.5514289736747742, "learning_rate": 0.0015, "loss": 2.0079, "step": 1290 }, { "epoch": 0.06655744419414295, "grad_norm": 0.5938010215759277, "learning_rate": 0.0015, "loss": 1.9811, "step": 1300 }, { "epoch": 0.0670694245340979, "grad_norm": 0.703124463558197, "learning_rate": 0.0015, "loss": 1.9634, "step": 1310 }, { "epoch": 0.06758140487405284, "grad_norm": 0.545432448387146, "learning_rate": 0.0015, "loss": 1.9927, "step": 1320 }, { "epoch": 0.06809338521400778, "grad_norm": 0.5673125386238098, "learning_rate": 0.0015, "loss": 1.9911, "step": 1330 }, { "epoch": 0.06860536555396272, "grad_norm": 0.5682245492935181, "learning_rate": 0.0015, "loss": 1.9733, "step": 1340 }, { "epoch": 0.06911734589391767, "grad_norm": 0.5960274934768677, "learning_rate": 0.0015, "loss": 1.9733, "step": 1350 }, { "epoch": 0.06962932623387262, "grad_norm": 0.6102215051651001, "learning_rate": 0.0015, "loss": 1.9559, "step": 1360 }, { "epoch": 0.07014130657382757, "grad_norm": 0.5990728735923767, "learning_rate": 0.0015, "loss": 1.9463, "step": 1370 }, { "epoch": 0.07065328691378252, "grad_norm": 0.6161502003669739, "learning_rate": 0.0015, "loss": 1.978, "step": 1380 }, { "epoch": 0.07116526725373745, "grad_norm": 0.5682898759841919, "learning_rate": 0.0015, "loss": 1.9558, "step": 1390 }, { "epoch": 0.0716772475936924, "grad_norm": 0.5973048210144043, "learning_rate": 0.0015, "loss": 1.9376, "step": 1400 }, { "epoch": 0.07218922793364735, "grad_norm": 0.5553535223007202, "learning_rate": 0.0015, "loss": 1.9468, "step": 1410 }, { "epoch": 0.0727012082736023, "grad_norm": 0.5181711912155151, "learning_rate": 0.0015, "loss": 1.9188, "step": 1420 }, { "epoch": 0.07321318861355725, "grad_norm": 0.6532855033874512, "learning_rate": 0.0015, "loss": 1.9069, "step": 1430 }, { "epoch": 0.07372516895351218, "grad_norm": 0.531043291091919, "learning_rate": 0.0015, "loss": 1.9319, "step": 1440 }, { "epoch": 0.07423714929346713, "grad_norm": 0.5700235962867737, "learning_rate": 0.0015, "loss": 1.8891, "step": 1450 }, { "epoch": 0.07474912963342208, "grad_norm": 0.523414134979248, "learning_rate": 0.0015, "loss": 1.9165, "step": 1460 }, { "epoch": 0.07526110997337702, "grad_norm": 0.5649904608726501, "learning_rate": 0.0015, "loss": 1.905, "step": 1470 }, { "epoch": 0.07577309031333197, "grad_norm": 0.5912672877311707, "learning_rate": 0.0015, "loss": 1.9162, "step": 1480 }, { "epoch": 0.07628507065328691, "grad_norm": 0.5597636699676514, "learning_rate": 0.0015, "loss": 1.9158, "step": 1490 }, { "epoch": 0.07679705099324186, "grad_norm": 0.553896963596344, "learning_rate": 0.0015, "loss": 1.871, "step": 1500 }, { "epoch": 0.0773090313331968, "grad_norm": 0.5018342137336731, "learning_rate": 0.0015, "loss": 1.9119, "step": 1510 }, { "epoch": 0.07782101167315175, "grad_norm": 0.5367796421051025, "learning_rate": 0.0015, "loss": 1.8706, "step": 1520 }, { "epoch": 0.0783329920131067, "grad_norm": 0.5023203492164612, "learning_rate": 0.0015, "loss": 1.8808, "step": 1530 }, { "epoch": 0.07884497235306165, "grad_norm": 0.5962059497833252, "learning_rate": 0.0015, "loss": 1.9022, "step": 1540 }, { "epoch": 0.07935695269301658, "grad_norm": 0.5200186967849731, "learning_rate": 0.0015, "loss": 1.8728, "step": 1550 }, { "epoch": 0.07986893303297153, "grad_norm": 0.5361810922622681, "learning_rate": 0.0015, "loss": 1.8462, "step": 1560 }, { "epoch": 0.08038091337292648, "grad_norm": 0.5771626830101013, "learning_rate": 0.0015, "loss": 1.873, "step": 1570 }, { "epoch": 0.08089289371288143, "grad_norm": 0.5451227426528931, "learning_rate": 0.0015, "loss": 1.8693, "step": 1580 }, { "epoch": 0.08140487405283638, "grad_norm": 0.5574854016304016, "learning_rate": 0.0015, "loss": 1.8615, "step": 1590 }, { "epoch": 0.08191685439279131, "grad_norm": 0.574317455291748, "learning_rate": 0.0015, "loss": 1.8424, "step": 1600 }, { "epoch": 0.08242883473274626, "grad_norm": 0.545906662940979, "learning_rate": 0.0015, "loss": 1.8572, "step": 1610 }, { "epoch": 0.08294081507270121, "grad_norm": 0.5127050280570984, "learning_rate": 0.0015, "loss": 1.8391, "step": 1620 }, { "epoch": 0.08345279541265616, "grad_norm": 0.5646129250526428, "learning_rate": 0.0015, "loss": 1.8316, "step": 1630 }, { "epoch": 0.0839647757526111, "grad_norm": 0.5549367666244507, "learning_rate": 0.0015, "loss": 1.8371, "step": 1640 }, { "epoch": 0.08447675609256605, "grad_norm": 0.5479699373245239, "learning_rate": 0.0015, "loss": 1.8378, "step": 1650 }, { "epoch": 0.08498873643252099, "grad_norm": 0.5359328985214233, "learning_rate": 0.0015, "loss": 1.8372, "step": 1660 }, { "epoch": 0.08550071677247593, "grad_norm": 0.5599870085716248, "learning_rate": 0.0015, "loss": 1.8499, "step": 1670 }, { "epoch": 0.08601269711243088, "grad_norm": 0.5272551774978638, "learning_rate": 0.0015, "loss": 1.8381, "step": 1680 }, { "epoch": 0.08652467745238583, "grad_norm": 0.534377932548523, "learning_rate": 0.0015, "loss": 1.8124, "step": 1690 }, { "epoch": 0.08703665779234078, "grad_norm": 0.6432906985282898, "learning_rate": 0.0015, "loss": 1.8354, "step": 1700 }, { "epoch": 0.08754863813229571, "grad_norm": 0.5227901935577393, "learning_rate": 0.0015, "loss": 1.8091, "step": 1710 }, { "epoch": 0.08806061847225066, "grad_norm": 0.48951131105422974, "learning_rate": 0.0015, "loss": 1.7854, "step": 1720 }, { "epoch": 0.08857259881220561, "grad_norm": 0.5127034783363342, "learning_rate": 0.0015, "loss": 1.8208, "step": 1730 }, { "epoch": 0.08908457915216056, "grad_norm": 0.5147260427474976, "learning_rate": 0.0015, "loss": 1.8289, "step": 1740 }, { "epoch": 0.08959655949211551, "grad_norm": 0.536268413066864, "learning_rate": 0.0015, "loss": 1.7894, "step": 1750 }, { "epoch": 0.09010853983207044, "grad_norm": 0.537369966506958, "learning_rate": 0.0015, "loss": 1.7985, "step": 1760 }, { "epoch": 0.09062052017202539, "grad_norm": 0.5217599868774414, "learning_rate": 0.0015, "loss": 1.8196, "step": 1770 }, { "epoch": 0.09113250051198034, "grad_norm": 0.47711503505706787, "learning_rate": 0.0015, "loss": 1.7931, "step": 1780 }, { "epoch": 0.09164448085193529, "grad_norm": 0.5544558763504028, "learning_rate": 0.0015, "loss": 1.8201, "step": 1790 }, { "epoch": 0.09215646119189023, "grad_norm": 0.5024393200874329, "learning_rate": 0.0015, "loss": 1.7974, "step": 1800 }, { "epoch": 0.09266844153184518, "grad_norm": 0.5126355290412903, "learning_rate": 0.0015, "loss": 1.7874, "step": 1810 }, { "epoch": 0.09318042187180012, "grad_norm": 0.5882781744003296, "learning_rate": 0.0015, "loss": 1.791, "step": 1820 }, { "epoch": 0.09369240221175507, "grad_norm": 0.508765697479248, "learning_rate": 0.0015, "loss": 1.7819, "step": 1830 }, { "epoch": 0.09420438255171001, "grad_norm": 0.5449949502944946, "learning_rate": 0.0015, "loss": 1.7838, "step": 1840 }, { "epoch": 0.09471636289166496, "grad_norm": 0.4996667802333832, "learning_rate": 0.0015, "loss": 1.7618, "step": 1850 }, { "epoch": 0.09522834323161991, "grad_norm": 0.5014889240264893, "learning_rate": 0.0015, "loss": 1.7752, "step": 1860 }, { "epoch": 0.09574032357157485, "grad_norm": 0.5011769533157349, "learning_rate": 0.0015, "loss": 1.7768, "step": 1870 }, { "epoch": 0.0962523039115298, "grad_norm": 0.49963292479515076, "learning_rate": 0.0015, "loss": 1.778, "step": 1880 }, { "epoch": 0.09676428425148474, "grad_norm": 0.46659213304519653, "learning_rate": 0.0015, "loss": 1.7668, "step": 1890 }, { "epoch": 0.09727626459143969, "grad_norm": 0.5140760540962219, "learning_rate": 0.0015, "loss": 1.7448, "step": 1900 }, { "epoch": 0.09778824493139464, "grad_norm": 0.49709445238113403, "learning_rate": 0.0015, "loss": 1.7573, "step": 1910 }, { "epoch": 0.09830022527134959, "grad_norm": 0.464329332113266, "learning_rate": 0.0015, "loss": 1.7435, "step": 1920 }, { "epoch": 0.09881220561130452, "grad_norm": 0.4815766215324402, "learning_rate": 0.0015, "loss": 1.7533, "step": 1930 }, { "epoch": 0.09932418595125947, "grad_norm": 0.4601441025733948, "learning_rate": 0.0015, "loss": 1.7339, "step": 1940 }, { "epoch": 0.09983616629121442, "grad_norm": 0.46905994415283203, "learning_rate": 0.0015, "loss": 1.7421, "step": 1950 }, { "epoch": 0.10034814663116937, "grad_norm": 0.4927903413772583, "learning_rate": 0.0015, "loss": 1.7259, "step": 1960 }, { "epoch": 0.10086012697112431, "grad_norm": 0.4930973947048187, "learning_rate": 0.0015, "loss": 1.735, "step": 1970 }, { "epoch": 0.10137210731107925, "grad_norm": 0.4698399305343628, "learning_rate": 0.0015, "loss": 1.7478, "step": 1980 }, { "epoch": 0.1018840876510342, "grad_norm": 0.5083284974098206, "learning_rate": 0.0015, "loss": 1.7491, "step": 1990 }, { "epoch": 0.10239606799098915, "grad_norm": 0.4888325035572052, "learning_rate": 0.0015, "loss": 1.7261, "step": 2000 }, { "epoch": 0.1029080483309441, "grad_norm": 0.524994432926178, "learning_rate": 0.0015, "loss": 1.7221, "step": 2010 }, { "epoch": 0.10342002867089904, "grad_norm": 0.49820294976234436, "learning_rate": 0.0015, "loss": 1.7279, "step": 2020 }, { "epoch": 0.10393200901085399, "grad_norm": 0.49288976192474365, "learning_rate": 0.0015, "loss": 1.746, "step": 2030 }, { "epoch": 0.10444398935080892, "grad_norm": 0.4776252806186676, "learning_rate": 0.0015, "loss": 1.7384, "step": 2040 }, { "epoch": 0.10495596969076387, "grad_norm": 0.46143004298210144, "learning_rate": 0.0015, "loss": 1.7037, "step": 2050 }, { "epoch": 0.10546795003071882, "grad_norm": 0.4855809211730957, "learning_rate": 0.0015, "loss": 1.7052, "step": 2060 }, { "epoch": 0.10597993037067377, "grad_norm": 0.491964727640152, "learning_rate": 0.0015, "loss": 1.7275, "step": 2070 }, { "epoch": 0.10649191071062872, "grad_norm": 0.5072810053825378, "learning_rate": 0.0015, "loss": 1.7262, "step": 2080 }, { "epoch": 0.10700389105058365, "grad_norm": 0.5020768642425537, "learning_rate": 0.0015, "loss": 1.7106, "step": 2090 }, { "epoch": 0.1075158713905386, "grad_norm": 0.4881630837917328, "learning_rate": 0.0015, "loss": 1.7411, "step": 2100 }, { "epoch": 0.10802785173049355, "grad_norm": 0.5104793906211853, "learning_rate": 0.0015, "loss": 1.7053, "step": 2110 }, { "epoch": 0.1085398320704485, "grad_norm": 0.4574519991874695, "learning_rate": 0.0015, "loss": 1.7219, "step": 2120 }, { "epoch": 0.10905181241040345, "grad_norm": 0.4427832365036011, "learning_rate": 0.0015, "loss": 1.6966, "step": 2130 }, { "epoch": 0.10956379275035838, "grad_norm": 0.46723929047584534, "learning_rate": 0.0015, "loss": 1.7106, "step": 2140 }, { "epoch": 0.11007577309031333, "grad_norm": 0.4710049629211426, "learning_rate": 0.0015, "loss": 1.7, "step": 2150 }, { "epoch": 0.11058775343026828, "grad_norm": 0.46849745512008667, "learning_rate": 0.0015, "loss": 1.7071, "step": 2160 }, { "epoch": 0.11109973377022322, "grad_norm": 0.4712335765361786, "learning_rate": 0.0015, "loss": 1.685, "step": 2170 }, { "epoch": 0.11161171411017817, "grad_norm": 0.45318537950515747, "learning_rate": 0.0015, "loss": 1.6996, "step": 2180 }, { "epoch": 0.11212369445013312, "grad_norm": 0.4772440791130066, "learning_rate": 0.0015, "loss": 1.705, "step": 2190 }, { "epoch": 0.11263567479008806, "grad_norm": 0.4854085147380829, "learning_rate": 0.0015, "loss": 1.691, "step": 2200 }, { "epoch": 0.113147655130043, "grad_norm": 0.4931398928165436, "learning_rate": 0.0015, "loss": 1.6979, "step": 2210 }, { "epoch": 0.11365963546999795, "grad_norm": 0.4212550222873688, "learning_rate": 0.0015, "loss": 1.6792, "step": 2220 }, { "epoch": 0.1141716158099529, "grad_norm": 0.4916476905345917, "learning_rate": 0.0015, "loss": 1.682, "step": 2230 }, { "epoch": 0.11468359614990785, "grad_norm": 0.44974076747894287, "learning_rate": 0.0015, "loss": 1.6734, "step": 2240 }, { "epoch": 0.11519557648986278, "grad_norm": 0.4464137554168701, "learning_rate": 0.0015, "loss": 1.7032, "step": 2250 }, { "epoch": 0.11570755682981773, "grad_norm": 0.4473714530467987, "learning_rate": 0.0015, "loss": 1.6868, "step": 2260 }, { "epoch": 0.11621953716977268, "grad_norm": 0.4802720844745636, "learning_rate": 0.0015, "loss": 1.6805, "step": 2270 }, { "epoch": 0.11673151750972763, "grad_norm": 0.45060625672340393, "learning_rate": 0.0015, "loss": 1.6716, "step": 2280 }, { "epoch": 0.11724349784968258, "grad_norm": 0.47407498955726624, "learning_rate": 0.0015, "loss": 1.6569, "step": 2290 }, { "epoch": 0.11775547818963752, "grad_norm": 0.45615556836128235, "learning_rate": 0.0015, "loss": 1.6682, "step": 2300 }, { "epoch": 0.11826745852959246, "grad_norm": 0.4670998156070709, "learning_rate": 0.0015, "loss": 1.6785, "step": 2310 }, { "epoch": 0.11877943886954741, "grad_norm": 0.45432570576667786, "learning_rate": 0.0015, "loss": 1.674, "step": 2320 }, { "epoch": 0.11929141920950236, "grad_norm": 0.44804081320762634, "learning_rate": 0.0015, "loss": 1.6619, "step": 2330 }, { "epoch": 0.1198033995494573, "grad_norm": 0.4523905813694, "learning_rate": 0.0015, "loss": 1.6652, "step": 2340 }, { "epoch": 0.12031537988941225, "grad_norm": 0.4514728784561157, "learning_rate": 0.0015, "loss": 1.6652, "step": 2350 }, { "epoch": 0.12082736022936719, "grad_norm": 0.41209134459495544, "learning_rate": 0.0015, "loss": 1.658, "step": 2360 }, { "epoch": 0.12133934056932213, "grad_norm": 0.4219752252101898, "learning_rate": 0.0015, "loss": 1.6379, "step": 2370 }, { "epoch": 0.12185132090927708, "grad_norm": 0.47252357006073, "learning_rate": 0.0015, "loss": 1.6636, "step": 2380 }, { "epoch": 0.12236330124923203, "grad_norm": 0.4292849004268646, "learning_rate": 0.0015, "loss": 1.6528, "step": 2390 }, { "epoch": 0.12287528158918698, "grad_norm": 0.4734489917755127, "learning_rate": 0.0015, "loss": 1.6297, "step": 2400 }, { "epoch": 0.12338726192914191, "grad_norm": 0.48543623089790344, "learning_rate": 0.0015, "loss": 1.6404, "step": 2410 }, { "epoch": 0.12389924226909686, "grad_norm": 0.4184911549091339, "learning_rate": 0.0015, "loss": 1.6315, "step": 2420 }, { "epoch": 0.12441122260905181, "grad_norm": 0.42600351572036743, "learning_rate": 0.0015, "loss": 1.6502, "step": 2430 }, { "epoch": 0.12492320294900676, "grad_norm": 0.4201619029045105, "learning_rate": 0.0015, "loss": 1.6372, "step": 2440 }, { "epoch": 0.1254351832889617, "grad_norm": 0.4165250360965729, "learning_rate": 0.0015, "loss": 1.6334, "step": 2450 }, { "epoch": 0.12594716362891664, "grad_norm": 0.4470268487930298, "learning_rate": 0.0015, "loss": 1.6359, "step": 2460 }, { "epoch": 0.1264591439688716, "grad_norm": 0.4310542941093445, "learning_rate": 0.0015, "loss": 1.6439, "step": 2470 }, { "epoch": 0.12697112430882654, "grad_norm": 0.4297926425933838, "learning_rate": 0.0015, "loss": 1.6222, "step": 2480 }, { "epoch": 0.1274831046487815, "grad_norm": 0.45335137844085693, "learning_rate": 0.0015, "loss": 1.6559, "step": 2490 }, { "epoch": 0.12799508498873644, "grad_norm": 0.4176558256149292, "learning_rate": 0.0015, "loss": 1.6561, "step": 2500 }, { "epoch": 0.12850706532869138, "grad_norm": 0.4358290433883667, "learning_rate": 0.0015, "loss": 1.6241, "step": 2510 }, { "epoch": 0.12901904566864633, "grad_norm": 0.44109201431274414, "learning_rate": 0.0015, "loss": 1.6022, "step": 2520 }, { "epoch": 0.12953102600860128, "grad_norm": 0.44387978315353394, "learning_rate": 0.0015, "loss": 1.6335, "step": 2530 }, { "epoch": 0.13004300634855623, "grad_norm": 0.434861421585083, "learning_rate": 0.0015, "loss": 1.6377, "step": 2540 }, { "epoch": 0.13055498668851115, "grad_norm": 0.419826865196228, "learning_rate": 0.0015, "loss": 1.6238, "step": 2550 }, { "epoch": 0.1310669670284661, "grad_norm": 0.471110463142395, "learning_rate": 0.0015, "loss": 1.6383, "step": 2560 }, { "epoch": 0.13157894736842105, "grad_norm": 0.44935643672943115, "learning_rate": 0.0015, "loss": 1.6006, "step": 2570 }, { "epoch": 0.132090927708376, "grad_norm": 0.4497852027416229, "learning_rate": 0.0015, "loss": 1.6115, "step": 2580 }, { "epoch": 0.13260290804833094, "grad_norm": 0.45850351452827454, "learning_rate": 0.0015, "loss": 1.6194, "step": 2590 }, { "epoch": 0.1331148883882859, "grad_norm": 0.40869665145874023, "learning_rate": 0.0015, "loss": 1.6159, "step": 2600 }, { "epoch": 0.13362686872824084, "grad_norm": 0.4347962737083435, "learning_rate": 0.0015, "loss": 1.6254, "step": 2610 }, { "epoch": 0.1341388490681958, "grad_norm": 0.4899897873401642, "learning_rate": 0.0015, "loss": 1.6296, "step": 2620 }, { "epoch": 0.13465082940815074, "grad_norm": 0.44309839606285095, "learning_rate": 0.0015, "loss": 1.6179, "step": 2630 }, { "epoch": 0.13516280974810568, "grad_norm": 0.3890606164932251, "learning_rate": 0.0015, "loss": 1.6044, "step": 2640 }, { "epoch": 0.13567479008806063, "grad_norm": 0.42358025908470154, "learning_rate": 0.0015, "loss": 1.619, "step": 2650 }, { "epoch": 0.13618677042801555, "grad_norm": 0.42111581563949585, "learning_rate": 0.0015, "loss": 1.6127, "step": 2660 }, { "epoch": 0.1366987507679705, "grad_norm": 0.4441932141780853, "learning_rate": 0.0015, "loss": 1.6224, "step": 2670 }, { "epoch": 0.13721073110792545, "grad_norm": 0.4351959228515625, "learning_rate": 0.0015, "loss": 1.5957, "step": 2680 }, { "epoch": 0.1377227114478804, "grad_norm": 0.43544304370880127, "learning_rate": 0.0015, "loss": 1.5925, "step": 2690 }, { "epoch": 0.13823469178783535, "grad_norm": 0.4298728406429291, "learning_rate": 0.0015, "loss": 1.5893, "step": 2700 }, { "epoch": 0.1387466721277903, "grad_norm": 0.4463229477405548, "learning_rate": 0.0015, "loss": 1.5881, "step": 2710 }, { "epoch": 0.13925865246774524, "grad_norm": 0.43847158551216125, "learning_rate": 0.0015, "loss": 1.5982, "step": 2720 }, { "epoch": 0.1397706328077002, "grad_norm": 0.44918614625930786, "learning_rate": 0.0015, "loss": 1.6095, "step": 2730 }, { "epoch": 0.14028261314765514, "grad_norm": 0.45398586988449097, "learning_rate": 0.0015, "loss": 1.5985, "step": 2740 }, { "epoch": 0.1407945934876101, "grad_norm": 0.41213494539260864, "learning_rate": 0.0015, "loss": 1.6153, "step": 2750 }, { "epoch": 0.14130657382756504, "grad_norm": 0.41266897320747375, "learning_rate": 0.0015, "loss": 1.5919, "step": 2760 }, { "epoch": 0.14181855416751996, "grad_norm": 0.42942896485328674, "learning_rate": 0.0015, "loss": 1.5793, "step": 2770 }, { "epoch": 0.1423305345074749, "grad_norm": 0.4180223047733307, "learning_rate": 0.0015, "loss": 1.5938, "step": 2780 }, { "epoch": 0.14284251484742985, "grad_norm": 0.4204559922218323, "learning_rate": 0.0015, "loss": 1.5927, "step": 2790 }, { "epoch": 0.1433544951873848, "grad_norm": 0.43727442622184753, "learning_rate": 0.0015, "loss": 1.6018, "step": 2800 }, { "epoch": 0.14386647552733975, "grad_norm": 0.4330785870552063, "learning_rate": 0.0015, "loss": 1.6004, "step": 2810 }, { "epoch": 0.1443784558672947, "grad_norm": 0.415101021528244, "learning_rate": 0.0015, "loss": 1.5708, "step": 2820 }, { "epoch": 0.14489043620724965, "grad_norm": 0.41477903723716736, "learning_rate": 0.0015, "loss": 1.5747, "step": 2830 }, { "epoch": 0.1454024165472046, "grad_norm": 0.4343889653682709, "learning_rate": 0.0015, "loss": 1.5958, "step": 2840 }, { "epoch": 0.14591439688715954, "grad_norm": 0.4018150866031647, "learning_rate": 0.0015, "loss": 1.5589, "step": 2850 }, { "epoch": 0.1464263772271145, "grad_norm": 0.4799724817276001, "learning_rate": 0.0015, "loss": 1.5745, "step": 2860 }, { "epoch": 0.1469383575670694, "grad_norm": 0.42355528473854065, "learning_rate": 0.0015, "loss": 1.5928, "step": 2870 }, { "epoch": 0.14745033790702436, "grad_norm": 0.40638747811317444, "learning_rate": 0.0015, "loss": 1.5623, "step": 2880 }, { "epoch": 0.1479623182469793, "grad_norm": 0.39846664667129517, "learning_rate": 0.0015, "loss": 1.577, "step": 2890 }, { "epoch": 0.14847429858693426, "grad_norm": 0.4010321795940399, "learning_rate": 0.0015, "loss": 1.5821, "step": 2900 }, { "epoch": 0.1489862789268892, "grad_norm": 0.42778313159942627, "learning_rate": 0.0015, "loss": 1.5623, "step": 2910 }, { "epoch": 0.14949825926684415, "grad_norm": 0.39266425371170044, "learning_rate": 0.0015, "loss": 1.5821, "step": 2920 }, { "epoch": 0.1500102396067991, "grad_norm": 0.40784794092178345, "learning_rate": 0.0015, "loss": 1.5664, "step": 2930 }, { "epoch": 0.15052221994675405, "grad_norm": 0.43437501788139343, "learning_rate": 0.0015, "loss": 1.5658, "step": 2940 }, { "epoch": 0.151034200286709, "grad_norm": 0.4373057186603546, "learning_rate": 0.0015, "loss": 1.5591, "step": 2950 }, { "epoch": 0.15154618062666395, "grad_norm": 0.40370023250579834, "learning_rate": 0.0015, "loss": 1.555, "step": 2960 }, { "epoch": 0.1520581609666189, "grad_norm": 0.4626748263835907, "learning_rate": 0.0015, "loss": 1.5808, "step": 2970 }, { "epoch": 0.15257014130657381, "grad_norm": 0.4095107614994049, "learning_rate": 0.0015, "loss": 1.5705, "step": 2980 }, { "epoch": 0.15308212164652876, "grad_norm": 0.4343841075897217, "learning_rate": 0.0015, "loss": 1.5738, "step": 2990 }, { "epoch": 0.1535941019864837, "grad_norm": 0.42325645685195923, "learning_rate": 0.0015, "loss": 1.567, "step": 3000 }, { "epoch": 0.15410608232643866, "grad_norm": 0.39237692952156067, "learning_rate": 0.0015, "loss": 1.5748, "step": 3010 }, { "epoch": 0.1546180626663936, "grad_norm": 0.39682793617248535, "learning_rate": 0.0015, "loss": 1.5711, "step": 3020 }, { "epoch": 0.15513004300634856, "grad_norm": 0.4060477614402771, "learning_rate": 0.0015, "loss": 1.5623, "step": 3030 }, { "epoch": 0.1556420233463035, "grad_norm": 0.4088119864463806, "learning_rate": 0.0015, "loss": 1.5532, "step": 3040 }, { "epoch": 0.15615400368625845, "grad_norm": 0.39976736903190613, "learning_rate": 0.0015, "loss": 1.5436, "step": 3050 }, { "epoch": 0.1566659840262134, "grad_norm": 0.42855167388916016, "learning_rate": 0.0015, "loss": 1.5577, "step": 3060 }, { "epoch": 0.15717796436616835, "grad_norm": 0.4451335072517395, "learning_rate": 0.0015, "loss": 1.5375, "step": 3070 }, { "epoch": 0.1576899447061233, "grad_norm": 0.3867264688014984, "learning_rate": 0.0015, "loss": 1.5418, "step": 3080 }, { "epoch": 0.15820192504607822, "grad_norm": 0.4165036976337433, "learning_rate": 0.0015, "loss": 1.564, "step": 3090 }, { "epoch": 0.15871390538603317, "grad_norm": 0.3978787958621979, "learning_rate": 0.0015, "loss": 1.5408, "step": 3100 }, { "epoch": 0.15922588572598811, "grad_norm": 0.37848272919654846, "learning_rate": 0.0015, "loss": 1.5477, "step": 3110 }, { "epoch": 0.15973786606594306, "grad_norm": 0.4218755066394806, "learning_rate": 0.0015, "loss": 1.5533, "step": 3120 }, { "epoch": 0.160249846405898, "grad_norm": 0.38090386986732483, "learning_rate": 0.0015, "loss": 1.5453, "step": 3130 }, { "epoch": 0.16076182674585296, "grad_norm": 0.39693617820739746, "learning_rate": 0.0015, "loss": 1.5633, "step": 3140 }, { "epoch": 0.1612738070858079, "grad_norm": 0.3855767250061035, "learning_rate": 0.0015, "loss": 1.5381, "step": 3150 }, { "epoch": 0.16178578742576286, "grad_norm": 0.3672980070114136, "learning_rate": 0.0015, "loss": 1.5458, "step": 3160 }, { "epoch": 0.1622977677657178, "grad_norm": 0.3810063302516937, "learning_rate": 0.0015, "loss": 1.559, "step": 3170 }, { "epoch": 0.16280974810567275, "grad_norm": 0.4658653140068054, "learning_rate": 0.0015, "loss": 1.5274, "step": 3180 }, { "epoch": 0.1633217284456277, "grad_norm": 0.40785935521125793, "learning_rate": 0.0015, "loss": 1.5279, "step": 3190 }, { "epoch": 0.16383370878558262, "grad_norm": 0.40147677063941956, "learning_rate": 0.0015, "loss": 1.542, "step": 3200 }, { "epoch": 0.16434568912553757, "grad_norm": 0.39116302132606506, "learning_rate": 0.0015, "loss": 1.5148, "step": 3210 }, { "epoch": 0.16485766946549252, "grad_norm": 0.3875216245651245, "learning_rate": 0.0015, "loss": 1.5289, "step": 3220 }, { "epoch": 0.16536964980544747, "grad_norm": 0.4106022119522095, "learning_rate": 0.0015, "loss": 1.5358, "step": 3230 }, { "epoch": 0.16588163014540241, "grad_norm": 0.393637090921402, "learning_rate": 0.0015, "loss": 1.5334, "step": 3240 }, { "epoch": 0.16639361048535736, "grad_norm": 0.3800962269306183, "learning_rate": 0.0015, "loss": 1.5364, "step": 3250 }, { "epoch": 0.1669055908253123, "grad_norm": 0.3848235011100769, "learning_rate": 0.0015, "loss": 1.5411, "step": 3260 }, { "epoch": 0.16741757116526726, "grad_norm": 0.38832154870033264, "learning_rate": 0.0015, "loss": 1.5373, "step": 3270 }, { "epoch": 0.1679295515052222, "grad_norm": 0.43623119592666626, "learning_rate": 0.0015, "loss": 1.5558, "step": 3280 }, { "epoch": 0.16844153184517716, "grad_norm": 0.3507107198238373, "learning_rate": 0.0015, "loss": 1.5365, "step": 3290 }, { "epoch": 0.1689535121851321, "grad_norm": 0.38700392842292786, "learning_rate": 0.0015, "loss": 1.5383, "step": 3300 }, { "epoch": 0.16946549252508702, "grad_norm": 0.38841623067855835, "learning_rate": 0.0015, "loss": 1.5399, "step": 3310 }, { "epoch": 0.16997747286504197, "grad_norm": 0.39128798246383667, "learning_rate": 0.0015, "loss": 1.5271, "step": 3320 }, { "epoch": 0.17048945320499692, "grad_norm": 0.38994646072387695, "learning_rate": 0.0015, "loss": 1.5317, "step": 3330 }, { "epoch": 0.17100143354495187, "grad_norm": 0.37731438875198364, "learning_rate": 0.0015, "loss": 1.5251, "step": 3340 }, { "epoch": 0.17151341388490682, "grad_norm": 0.4156712293624878, "learning_rate": 0.0015, "loss": 1.5221, "step": 3350 }, { "epoch": 0.17202539422486177, "grad_norm": 0.38232874870300293, "learning_rate": 0.0015, "loss": 1.5196, "step": 3360 }, { "epoch": 0.17253737456481671, "grad_norm": 0.3940838575363159, "learning_rate": 0.0015, "loss": 1.5213, "step": 3370 }, { "epoch": 0.17304935490477166, "grad_norm": 0.4050334393978119, "learning_rate": 0.0015, "loss": 1.5159, "step": 3380 }, { "epoch": 0.1735613352447266, "grad_norm": 0.3736588954925537, "learning_rate": 0.0015, "loss": 1.5157, "step": 3390 }, { "epoch": 0.17407331558468156, "grad_norm": 0.40355414152145386, "learning_rate": 0.0015, "loss": 1.5446, "step": 3400 }, { "epoch": 0.1745852959246365, "grad_norm": 0.37198445200920105, "learning_rate": 0.0015, "loss": 1.5322, "step": 3410 }, { "epoch": 0.17509727626459143, "grad_norm": 0.35825085639953613, "learning_rate": 0.0015, "loss": 1.5136, "step": 3420 }, { "epoch": 0.17560925660454638, "grad_norm": 0.4174591302871704, "learning_rate": 0.0015, "loss": 1.5092, "step": 3430 }, { "epoch": 0.17612123694450132, "grad_norm": 0.38272011280059814, "learning_rate": 0.0015, "loss": 1.515, "step": 3440 }, { "epoch": 0.17663321728445627, "grad_norm": 0.4088602364063263, "learning_rate": 0.0015, "loss": 1.5089, "step": 3450 }, { "epoch": 0.17714519762441122, "grad_norm": 0.37706780433654785, "learning_rate": 0.0015, "loss": 1.513, "step": 3460 }, { "epoch": 0.17765717796436617, "grad_norm": 0.3772091865539551, "learning_rate": 0.0015, "loss": 1.5096, "step": 3470 }, { "epoch": 0.17816915830432112, "grad_norm": 0.3540133535861969, "learning_rate": 0.0015, "loss": 1.5099, "step": 3480 }, { "epoch": 0.17868113864427607, "grad_norm": 0.36549830436706543, "learning_rate": 0.0015, "loss": 1.511, "step": 3490 }, { "epoch": 0.17919311898423101, "grad_norm": 0.39273905754089355, "learning_rate": 0.0015, "loss": 1.5005, "step": 3500 }, { "epoch": 0.17970509932418596, "grad_norm": 0.35500046610832214, "learning_rate": 0.0015, "loss": 1.4962, "step": 3510 }, { "epoch": 0.18021707966414088, "grad_norm": 0.39818084239959717, "learning_rate": 0.0015, "loss": 1.4951, "step": 3520 }, { "epoch": 0.18072906000409583, "grad_norm": 0.3649390637874603, "learning_rate": 0.0015, "loss": 1.5038, "step": 3530 }, { "epoch": 0.18124104034405078, "grad_norm": 0.376000314950943, "learning_rate": 0.0015, "loss": 1.4945, "step": 3540 }, { "epoch": 0.18175302068400573, "grad_norm": 0.3638756573200226, "learning_rate": 0.0015, "loss": 1.5012, "step": 3550 }, { "epoch": 0.18226500102396068, "grad_norm": 0.3695107400417328, "learning_rate": 0.0015, "loss": 1.5261, "step": 3560 }, { "epoch": 0.18277698136391562, "grad_norm": 0.424125999212265, "learning_rate": 0.0015, "loss": 1.5245, "step": 3570 }, { "epoch": 0.18328896170387057, "grad_norm": 0.3683246374130249, "learning_rate": 0.0015, "loss": 1.507, "step": 3580 }, { "epoch": 0.18380094204382552, "grad_norm": 0.3763924241065979, "learning_rate": 0.0015, "loss": 1.4671, "step": 3590 }, { "epoch": 0.18431292238378047, "grad_norm": 0.3692323565483093, "learning_rate": 0.0015, "loss": 1.5182, "step": 3600 }, { "epoch": 0.18482490272373542, "grad_norm": 0.37030673027038574, "learning_rate": 0.0015, "loss": 1.5037, "step": 3610 }, { "epoch": 0.18533688306369037, "grad_norm": 0.3666503429412842, "learning_rate": 0.0015, "loss": 1.499, "step": 3620 }, { "epoch": 0.1858488634036453, "grad_norm": 0.3609069287776947, "learning_rate": 0.0015, "loss": 1.5052, "step": 3630 }, { "epoch": 0.18636084374360024, "grad_norm": 0.3748449683189392, "learning_rate": 0.0015, "loss": 1.4596, "step": 3640 }, { "epoch": 0.18687282408355518, "grad_norm": 0.4080664813518524, "learning_rate": 0.0015, "loss": 1.5051, "step": 3650 }, { "epoch": 0.18738480442351013, "grad_norm": 0.3743340075016022, "learning_rate": 0.0015, "loss": 1.4658, "step": 3660 }, { "epoch": 0.18789678476346508, "grad_norm": 0.36924538016319275, "learning_rate": 0.0015, "loss": 1.474, "step": 3670 }, { "epoch": 0.18840876510342003, "grad_norm": 0.3834936022758484, "learning_rate": 0.0015, "loss": 1.4952, "step": 3680 }, { "epoch": 0.18892074544337498, "grad_norm": 0.3493509590625763, "learning_rate": 0.0015, "loss": 1.4765, "step": 3690 }, { "epoch": 0.18943272578332992, "grad_norm": 0.3550162613391876, "learning_rate": 0.0015, "loss": 1.4928, "step": 3700 }, { "epoch": 0.18994470612328487, "grad_norm": 0.3747323155403137, "learning_rate": 0.0015, "loss": 1.4872, "step": 3710 }, { "epoch": 0.19045668646323982, "grad_norm": 0.3649948835372925, "learning_rate": 0.0015, "loss": 1.5015, "step": 3720 }, { "epoch": 0.19096866680319477, "grad_norm": 0.37357765436172485, "learning_rate": 0.0015, "loss": 1.4828, "step": 3730 }, { "epoch": 0.1914806471431497, "grad_norm": 0.36136525869369507, "learning_rate": 0.0015, "loss": 1.5063, "step": 3740 }, { "epoch": 0.19199262748310464, "grad_norm": 0.35555464029312134, "learning_rate": 0.0015, "loss": 1.4797, "step": 3750 }, { "epoch": 0.1925046078230596, "grad_norm": 0.3460323214530945, "learning_rate": 0.0015, "loss": 1.4913, "step": 3760 }, { "epoch": 0.19301658816301454, "grad_norm": 0.35079696774482727, "learning_rate": 0.0015, "loss": 1.4714, "step": 3770 }, { "epoch": 0.19352856850296948, "grad_norm": 0.3562418818473816, "learning_rate": 0.0015, "loss": 1.4816, "step": 3780 }, { "epoch": 0.19404054884292443, "grad_norm": 0.3714292049407959, "learning_rate": 0.0015, "loss": 1.496, "step": 3790 }, { "epoch": 0.19455252918287938, "grad_norm": 0.37646958231925964, "learning_rate": 0.0015, "loss": 1.4814, "step": 3800 }, { "epoch": 0.19506450952283433, "grad_norm": 0.37127116322517395, "learning_rate": 0.0015, "loss": 1.4902, "step": 3810 }, { "epoch": 0.19557648986278928, "grad_norm": 0.3644818961620331, "learning_rate": 0.0015, "loss": 1.4811, "step": 3820 }, { "epoch": 0.19608847020274423, "grad_norm": 0.38677945733070374, "learning_rate": 0.0015, "loss": 1.5001, "step": 3830 }, { "epoch": 0.19660045054269917, "grad_norm": 0.379823237657547, "learning_rate": 0.0015, "loss": 1.4665, "step": 3840 }, { "epoch": 0.1971124308826541, "grad_norm": 0.37844884395599365, "learning_rate": 0.0015, "loss": 1.4783, "step": 3850 }, { "epoch": 0.19762441122260904, "grad_norm": 0.36030471324920654, "learning_rate": 0.0015, "loss": 1.4883, "step": 3860 }, { "epoch": 0.198136391562564, "grad_norm": 0.3515039384365082, "learning_rate": 0.0015, "loss": 1.4614, "step": 3870 }, { "epoch": 0.19864837190251894, "grad_norm": 0.3469856381416321, "learning_rate": 0.0015, "loss": 1.4669, "step": 3880 }, { "epoch": 0.1991603522424739, "grad_norm": 0.3526422381401062, "learning_rate": 0.0015, "loss": 1.4568, "step": 3890 }, { "epoch": 0.19967233258242884, "grad_norm": 0.34970229864120483, "learning_rate": 0.0015, "loss": 1.4467, "step": 3900 }, { "epoch": 0.20018431292238378, "grad_norm": 0.35208991169929504, "learning_rate": 0.0015, "loss": 1.5057, "step": 3910 }, { "epoch": 0.20069629326233873, "grad_norm": 0.35446539521217346, "learning_rate": 0.0015, "loss": 1.4677, "step": 3920 }, { "epoch": 0.20120827360229368, "grad_norm": 0.32680749893188477, "learning_rate": 0.0015, "loss": 1.4577, "step": 3930 }, { "epoch": 0.20172025394224863, "grad_norm": 0.3479768931865692, "learning_rate": 0.0015, "loss": 1.4679, "step": 3940 }, { "epoch": 0.20223223428220358, "grad_norm": 0.3349073529243469, "learning_rate": 0.0015, "loss": 1.4497, "step": 3950 }, { "epoch": 0.2027442146221585, "grad_norm": 0.35016781091690063, "learning_rate": 0.0015, "loss": 1.449, "step": 3960 }, { "epoch": 0.20325619496211345, "grad_norm": 0.349086195230484, "learning_rate": 0.0015, "loss": 1.4751, "step": 3970 }, { "epoch": 0.2037681753020684, "grad_norm": 0.36575040221214294, "learning_rate": 0.0015, "loss": 1.4653, "step": 3980 }, { "epoch": 0.20428015564202334, "grad_norm": 0.34002363681793213, "learning_rate": 0.0015, "loss": 1.4826, "step": 3990 }, { "epoch": 0.2047921359819783, "grad_norm": 0.36541834473609924, "learning_rate": 0.0015, "loss": 1.4485, "step": 4000 }, { "epoch": 0.20530411632193324, "grad_norm": 0.3874847888946533, "learning_rate": 0.0015, "loss": 1.478, "step": 4010 }, { "epoch": 0.2058160966618882, "grad_norm": 0.36418798565864563, "learning_rate": 0.0015, "loss": 1.4629, "step": 4020 }, { "epoch": 0.20632807700184314, "grad_norm": 0.34188389778137207, "learning_rate": 0.0015, "loss": 1.4784, "step": 4030 }, { "epoch": 0.20684005734179808, "grad_norm": 0.35976287722587585, "learning_rate": 0.0015, "loss": 1.458, "step": 4040 }, { "epoch": 0.20735203768175303, "grad_norm": 0.37284791469573975, "learning_rate": 0.0015, "loss": 1.471, "step": 4050 }, { "epoch": 0.20786401802170798, "grad_norm": 0.3462198078632355, "learning_rate": 0.0015, "loss": 1.4748, "step": 4060 }, { "epoch": 0.2083759983616629, "grad_norm": 0.3988822102546692, "learning_rate": 0.0015, "loss": 1.4576, "step": 4070 }, { "epoch": 0.20888797870161785, "grad_norm": 0.361892431974411, "learning_rate": 0.0015, "loss": 1.4516, "step": 4080 }, { "epoch": 0.2093999590415728, "grad_norm": 0.3648587763309479, "learning_rate": 0.0015, "loss": 1.4537, "step": 4090 }, { "epoch": 0.20991193938152775, "grad_norm": 0.35592299699783325, "learning_rate": 0.0015, "loss": 1.4346, "step": 4100 }, { "epoch": 0.2104239197214827, "grad_norm": 0.3457651138305664, "learning_rate": 0.0015, "loss": 1.4455, "step": 4110 }, { "epoch": 0.21093590006143764, "grad_norm": 0.3580280542373657, "learning_rate": 0.0015, "loss": 1.452, "step": 4120 }, { "epoch": 0.2114478804013926, "grad_norm": 0.3704809844493866, "learning_rate": 0.0015, "loss": 1.4655, "step": 4130 }, { "epoch": 0.21195986074134754, "grad_norm": 0.37433552742004395, "learning_rate": 0.0015, "loss": 1.4526, "step": 4140 }, { "epoch": 0.2124718410813025, "grad_norm": 0.35324522852897644, "learning_rate": 0.0015, "loss": 1.4651, "step": 4150 }, { "epoch": 0.21298382142125744, "grad_norm": 0.34257858991622925, "learning_rate": 0.0015, "loss": 1.4454, "step": 4160 }, { "epoch": 0.21349580176121236, "grad_norm": 0.34159529209136963, "learning_rate": 0.0015, "loss": 1.4561, "step": 4170 }, { "epoch": 0.2140077821011673, "grad_norm": 0.3691791296005249, "learning_rate": 0.0015, "loss": 1.4496, "step": 4180 }, { "epoch": 0.21451976244112225, "grad_norm": 0.3290902078151703, "learning_rate": 0.0015, "loss": 1.4477, "step": 4190 }, { "epoch": 0.2150317427810772, "grad_norm": 0.35127583146095276, "learning_rate": 0.0015, "loss": 1.4389, "step": 4200 }, { "epoch": 0.21554372312103215, "grad_norm": 0.3416004776954651, "learning_rate": 0.0015, "loss": 1.4569, "step": 4210 }, { "epoch": 0.2160557034609871, "grad_norm": 0.33589133620262146, "learning_rate": 0.0015, "loss": 1.4536, "step": 4220 }, { "epoch": 0.21656768380094205, "grad_norm": 0.3249707818031311, "learning_rate": 0.0015, "loss": 1.4421, "step": 4230 }, { "epoch": 0.217079664140897, "grad_norm": 0.3269306719303131, "learning_rate": 0.0015, "loss": 1.4644, "step": 4240 }, { "epoch": 0.21759164448085194, "grad_norm": 0.34012100100517273, "learning_rate": 0.0015, "loss": 1.4419, "step": 4250 }, { "epoch": 0.2181036248208069, "grad_norm": 0.3248611390590668, "learning_rate": 0.0015, "loss": 1.4321, "step": 4260 }, { "epoch": 0.21861560516076184, "grad_norm": 0.33508434891700745, "learning_rate": 0.0015, "loss": 1.4547, "step": 4270 }, { "epoch": 0.21912758550071676, "grad_norm": 0.3807787597179413, "learning_rate": 0.0015, "loss": 1.441, "step": 4280 }, { "epoch": 0.2196395658406717, "grad_norm": 0.34403491020202637, "learning_rate": 0.0015, "loss": 1.4309, "step": 4290 }, { "epoch": 0.22015154618062666, "grad_norm": 0.339507520198822, "learning_rate": 0.0015, "loss": 1.4408, "step": 4300 }, { "epoch": 0.2206635265205816, "grad_norm": 0.34783267974853516, "learning_rate": 0.0015, "loss": 1.4362, "step": 4310 }, { "epoch": 0.22117550686053655, "grad_norm": 0.3477760851383209, "learning_rate": 0.0015, "loss": 1.4743, "step": 4320 }, { "epoch": 0.2216874872004915, "grad_norm": 0.33150288462638855, "learning_rate": 0.0015, "loss": 1.4338, "step": 4330 }, { "epoch": 0.22219946754044645, "grad_norm": 0.3353327810764313, "learning_rate": 0.0015, "loss": 1.4389, "step": 4340 }, { "epoch": 0.2227114478804014, "grad_norm": 0.35436680912971497, "learning_rate": 0.0015, "loss": 1.4221, "step": 4350 }, { "epoch": 0.22322342822035635, "grad_norm": 0.35052821040153503, "learning_rate": 0.0015, "loss": 1.4463, "step": 4360 }, { "epoch": 0.2237354085603113, "grad_norm": 0.3383365273475647, "learning_rate": 0.0015, "loss": 1.4438, "step": 4370 }, { "epoch": 0.22424738890026624, "grad_norm": 0.33028966188430786, "learning_rate": 0.0015, "loss": 1.4365, "step": 4380 }, { "epoch": 0.22475936924022116, "grad_norm": 0.3439690172672272, "learning_rate": 0.0015, "loss": 1.434, "step": 4390 }, { "epoch": 0.2252713495801761, "grad_norm": 0.3257237374782562, "learning_rate": 0.0015, "loss": 1.4268, "step": 4400 }, { "epoch": 0.22578332992013106, "grad_norm": 0.34487271308898926, "learning_rate": 0.0015, "loss": 1.419, "step": 4410 }, { "epoch": 0.226295310260086, "grad_norm": 0.3513702154159546, "learning_rate": 0.0015, "loss": 1.416, "step": 4420 }, { "epoch": 0.22680729060004096, "grad_norm": 0.32178881764411926, "learning_rate": 0.0015, "loss": 1.4267, "step": 4430 }, { "epoch": 0.2273192709399959, "grad_norm": 0.32011663913726807, "learning_rate": 0.0015, "loss": 1.4269, "step": 4440 }, { "epoch": 0.22783125127995085, "grad_norm": 0.3356774151325226, "learning_rate": 0.0015, "loss": 1.4253, "step": 4450 }, { "epoch": 0.2283432316199058, "grad_norm": 0.33938485383987427, "learning_rate": 0.0015, "loss": 1.4137, "step": 4460 }, { "epoch": 0.22885521195986075, "grad_norm": 0.3313305675983429, "learning_rate": 0.0015, "loss": 1.4178, "step": 4470 }, { "epoch": 0.2293671922998157, "grad_norm": 0.31967252492904663, "learning_rate": 0.0015, "loss": 1.4421, "step": 4480 }, { "epoch": 0.22987917263977065, "grad_norm": 0.3485276401042938, "learning_rate": 0.0015, "loss": 1.4202, "step": 4490 }, { "epoch": 0.23039115297972557, "grad_norm": 0.3465486764907837, "learning_rate": 0.0015, "loss": 1.4364, "step": 4500 }, { "epoch": 0.23090313331968051, "grad_norm": 0.3443972170352936, "learning_rate": 0.0015, "loss": 1.4326, "step": 4510 }, { "epoch": 0.23141511365963546, "grad_norm": 0.33160969614982605, "learning_rate": 0.0015, "loss": 1.4147, "step": 4520 }, { "epoch": 0.2319270939995904, "grad_norm": 0.3427571952342987, "learning_rate": 0.0015, "loss": 1.4316, "step": 4530 }, { "epoch": 0.23243907433954536, "grad_norm": 0.3282462954521179, "learning_rate": 0.0015, "loss": 1.3933, "step": 4540 }, { "epoch": 0.2329510546795003, "grad_norm": 0.3840288519859314, "learning_rate": 0.0015, "loss": 1.4206, "step": 4550 }, { "epoch": 0.23346303501945526, "grad_norm": 0.34188082814216614, "learning_rate": 0.0015, "loss": 1.4286, "step": 4560 }, { "epoch": 0.2339750153594102, "grad_norm": 0.32480111718177795, "learning_rate": 0.0015, "loss": 1.4191, "step": 4570 }, { "epoch": 0.23448699569936515, "grad_norm": 0.3416594862937927, "learning_rate": 0.0015, "loss": 1.432, "step": 4580 }, { "epoch": 0.2349989760393201, "grad_norm": 0.32898756861686707, "learning_rate": 0.0015, "loss": 1.414, "step": 4590 }, { "epoch": 0.23551095637927505, "grad_norm": 0.3290642499923706, "learning_rate": 0.0015, "loss": 1.4272, "step": 4600 }, { "epoch": 0.23602293671922997, "grad_norm": 0.333150178194046, "learning_rate": 0.0015, "loss": 1.4254, "step": 4610 }, { "epoch": 0.23653491705918492, "grad_norm": 0.30599096417427063, "learning_rate": 0.0015, "loss": 1.4255, "step": 4620 }, { "epoch": 0.23704689739913987, "grad_norm": 0.34288567304611206, "learning_rate": 0.0015, "loss": 1.4027, "step": 4630 }, { "epoch": 0.23755887773909481, "grad_norm": 0.36715662479400635, "learning_rate": 0.0015, "loss": 1.4155, "step": 4640 }, { "epoch": 0.23807085807904976, "grad_norm": 0.32257118821144104, "learning_rate": 0.0015, "loss": 1.4178, "step": 4650 }, { "epoch": 0.2385828384190047, "grad_norm": 0.3298852741718292, "learning_rate": 0.0015, "loss": 1.4149, "step": 4660 }, { "epoch": 0.23909481875895966, "grad_norm": 0.32268422842025757, "learning_rate": 0.0015, "loss": 1.4384, "step": 4670 }, { "epoch": 0.2396067990989146, "grad_norm": 0.33715546131134033, "learning_rate": 0.0015, "loss": 1.4014, "step": 4680 }, { "epoch": 0.24011877943886956, "grad_norm": 0.3131064772605896, "learning_rate": 0.0015, "loss": 1.4163, "step": 4690 }, { "epoch": 0.2406307597788245, "grad_norm": 0.3470405042171478, "learning_rate": 0.0015, "loss": 1.4186, "step": 4700 }, { "epoch": 0.24114274011877943, "grad_norm": 0.35475459694862366, "learning_rate": 0.0015, "loss": 1.417, "step": 4710 }, { "epoch": 0.24165472045873437, "grad_norm": 0.3337201178073883, "learning_rate": 0.0015, "loss": 1.4271, "step": 4720 }, { "epoch": 0.24216670079868932, "grad_norm": 0.3554363548755646, "learning_rate": 0.0015, "loss": 1.4182, "step": 4730 }, { "epoch": 0.24267868113864427, "grad_norm": 0.32346460223197937, "learning_rate": 0.0015, "loss": 1.421, "step": 4740 }, { "epoch": 0.24319066147859922, "grad_norm": 0.3117121756076813, "learning_rate": 0.0015, "loss": 1.4278, "step": 4750 }, { "epoch": 0.24370264181855417, "grad_norm": 0.3506932556629181, "learning_rate": 0.0015, "loss": 1.3881, "step": 4760 }, { "epoch": 0.24421462215850911, "grad_norm": 0.3424610495567322, "learning_rate": 0.0015, "loss": 1.4236, "step": 4770 }, { "epoch": 0.24472660249846406, "grad_norm": 0.3284012973308563, "learning_rate": 0.0015, "loss": 1.4147, "step": 4780 }, { "epoch": 0.245238582838419, "grad_norm": 0.3341637849807739, "learning_rate": 0.0015, "loss": 1.4109, "step": 4790 }, { "epoch": 0.24575056317837396, "grad_norm": 0.32382500171661377, "learning_rate": 0.0015, "loss": 1.4063, "step": 4800 }, { "epoch": 0.2462625435183289, "grad_norm": 0.3269002437591553, "learning_rate": 0.0015, "loss": 1.42, "step": 4810 }, { "epoch": 0.24677452385828383, "grad_norm": 0.33705347776412964, "learning_rate": 0.0015, "loss": 1.4108, "step": 4820 }, { "epoch": 0.24728650419823878, "grad_norm": 0.32141435146331787, "learning_rate": 0.0015, "loss": 1.4012, "step": 4830 }, { "epoch": 0.24779848453819373, "grad_norm": 0.32620713114738464, "learning_rate": 0.0015, "loss": 1.3946, "step": 4840 }, { "epoch": 0.24831046487814867, "grad_norm": 0.3150465488433838, "learning_rate": 0.0015, "loss": 1.4239, "step": 4850 }, { "epoch": 0.24882244521810362, "grad_norm": 0.3141099214553833, "learning_rate": 0.0015, "loss": 1.4248, "step": 4860 }, { "epoch": 0.24933442555805857, "grad_norm": 0.31802797317504883, "learning_rate": 0.0015, "loss": 1.3965, "step": 4870 }, { "epoch": 0.24984640589801352, "grad_norm": 0.31748947501182556, "learning_rate": 0.0015, "loss": 1.4222, "step": 4880 }, { "epoch": 0.25035838623796847, "grad_norm": 0.30938032269477844, "learning_rate": 0.0015, "loss": 1.4001, "step": 4890 }, { "epoch": 0.2508703665779234, "grad_norm": 0.3129180371761322, "learning_rate": 0.0015, "loss": 1.3958, "step": 4900 }, { "epoch": 0.25138234691787836, "grad_norm": 0.31602999567985535, "learning_rate": 0.0015, "loss": 1.4114, "step": 4910 }, { "epoch": 0.2518943272578333, "grad_norm": 0.3049462139606476, "learning_rate": 0.0015, "loss": 1.3868, "step": 4920 }, { "epoch": 0.25240630759778826, "grad_norm": 0.3103995621204376, "learning_rate": 0.0015, "loss": 1.401, "step": 4930 }, { "epoch": 0.2529182879377432, "grad_norm": 0.30271056294441223, "learning_rate": 0.0015, "loss": 1.4046, "step": 4940 }, { "epoch": 0.25343026827769816, "grad_norm": 0.32372725009918213, "learning_rate": 0.0015, "loss": 1.3719, "step": 4950 }, { "epoch": 0.2539422486176531, "grad_norm": 0.3129730224609375, "learning_rate": 0.0015, "loss": 1.3797, "step": 4960 }, { "epoch": 0.25445422895760805, "grad_norm": 0.3240148425102234, "learning_rate": 0.0015, "loss": 1.4134, "step": 4970 }, { "epoch": 0.254966209297563, "grad_norm": 0.30317404866218567, "learning_rate": 0.0015, "loss": 1.3894, "step": 4980 }, { "epoch": 0.2554781896375179, "grad_norm": 0.33288583159446716, "learning_rate": 0.0015, "loss": 1.4132, "step": 4990 }, { "epoch": 0.25599016997747287, "grad_norm": 0.3233846127986908, "learning_rate": 0.0015, "loss": 1.3762, "step": 5000 }, { "epoch": 0.2565021503174278, "grad_norm": 0.30729755759239197, "learning_rate": 0.0015, "loss": 1.3975, "step": 5010 }, { "epoch": 0.25701413065738277, "grad_norm": 0.3006018400192261, "learning_rate": 0.0015, "loss": 1.4047, "step": 5020 }, { "epoch": 0.2575261109973377, "grad_norm": 0.3207467794418335, "learning_rate": 0.0015, "loss": 1.4084, "step": 5030 }, { "epoch": 0.25803809133729266, "grad_norm": 0.3039129674434662, "learning_rate": 0.0015, "loss": 1.4209, "step": 5040 }, { "epoch": 0.2585500716772476, "grad_norm": 0.29750290513038635, "learning_rate": 0.0015, "loss": 1.4156, "step": 5050 }, { "epoch": 0.25906205201720256, "grad_norm": 0.314507395029068, "learning_rate": 0.0015, "loss": 1.3685, "step": 5060 }, { "epoch": 0.2595740323571575, "grad_norm": 0.3176608681678772, "learning_rate": 0.0015, "loss": 1.3701, "step": 5070 }, { "epoch": 0.26008601269711246, "grad_norm": 0.3273438513278961, "learning_rate": 0.0015, "loss": 1.3841, "step": 5080 }, { "epoch": 0.2605979930370674, "grad_norm": 0.3173183798789978, "learning_rate": 0.0015, "loss": 1.3732, "step": 5090 }, { "epoch": 0.2611099733770223, "grad_norm": 0.33317986130714417, "learning_rate": 0.0015, "loss": 1.3815, "step": 5100 }, { "epoch": 0.2616219537169773, "grad_norm": 0.3045515716075897, "learning_rate": 0.0015, "loss": 1.4042, "step": 5110 }, { "epoch": 0.2621339340569322, "grad_norm": 0.3056975305080414, "learning_rate": 0.0015, "loss": 1.4156, "step": 5120 }, { "epoch": 0.26264591439688717, "grad_norm": 0.3231489956378937, "learning_rate": 0.0015, "loss": 1.4076, "step": 5130 }, { "epoch": 0.2631578947368421, "grad_norm": 0.3215503990650177, "learning_rate": 0.0015, "loss": 1.3712, "step": 5140 }, { "epoch": 0.26366987507679707, "grad_norm": 0.30379393696784973, "learning_rate": 0.0015, "loss": 1.3648, "step": 5150 }, { "epoch": 0.264181855416752, "grad_norm": 0.2987072765827179, "learning_rate": 0.0015, "loss": 1.3859, "step": 5160 }, { "epoch": 0.26469383575670696, "grad_norm": 0.3293174207210541, "learning_rate": 0.0015, "loss": 1.3974, "step": 5170 }, { "epoch": 0.2652058160966619, "grad_norm": 0.34920957684516907, "learning_rate": 0.0015, "loss": 1.3868, "step": 5180 }, { "epoch": 0.26571779643661686, "grad_norm": 0.3054308295249939, "learning_rate": 0.0015, "loss": 1.3838, "step": 5190 }, { "epoch": 0.2662297767765718, "grad_norm": 0.3131832182407379, "learning_rate": 0.0015, "loss": 1.377, "step": 5200 }, { "epoch": 0.2667417571165267, "grad_norm": 0.30868205428123474, "learning_rate": 0.0015, "loss": 1.3999, "step": 5210 }, { "epoch": 0.2672537374564817, "grad_norm": 0.3193263113498688, "learning_rate": 0.0015, "loss": 1.3789, "step": 5220 }, { "epoch": 0.2677657177964366, "grad_norm": 0.3142963945865631, "learning_rate": 0.0015, "loss": 1.3993, "step": 5230 }, { "epoch": 0.2682776981363916, "grad_norm": 0.3012097179889679, "learning_rate": 0.0015, "loss": 1.3959, "step": 5240 }, { "epoch": 0.2687896784763465, "grad_norm": 0.30580368638038635, "learning_rate": 0.0015, "loss": 1.4106, "step": 5250 }, { "epoch": 0.26930165881630147, "grad_norm": 0.2862599790096283, "learning_rate": 0.0015, "loss": 1.3873, "step": 5260 }, { "epoch": 0.2698136391562564, "grad_norm": 0.3221125602722168, "learning_rate": 0.0015, "loss": 1.3997, "step": 5270 }, { "epoch": 0.27032561949621137, "grad_norm": 0.29167062044143677, "learning_rate": 0.0015, "loss": 1.3707, "step": 5280 }, { "epoch": 0.2708375998361663, "grad_norm": 0.3372457027435303, "learning_rate": 0.0015, "loss": 1.3767, "step": 5290 }, { "epoch": 0.27134958017612126, "grad_norm": 0.308940589427948, "learning_rate": 0.0015, "loss": 1.377, "step": 5300 }, { "epoch": 0.2718615605160762, "grad_norm": 0.2946240305900574, "learning_rate": 0.0015, "loss": 1.3811, "step": 5310 }, { "epoch": 0.2723735408560311, "grad_norm": 0.30118903517723083, "learning_rate": 0.0015, "loss": 1.3991, "step": 5320 }, { "epoch": 0.2728855211959861, "grad_norm": 0.3128001093864441, "learning_rate": 0.0015, "loss": 1.3806, "step": 5330 }, { "epoch": 0.273397501535941, "grad_norm": 0.3355924189090729, "learning_rate": 0.0015, "loss": 1.378, "step": 5340 }, { "epoch": 0.273909481875896, "grad_norm": 0.29809674620628357, "learning_rate": 0.0015, "loss": 1.365, "step": 5350 }, { "epoch": 0.2744214622158509, "grad_norm": 0.2897878885269165, "learning_rate": 0.0015, "loss": 1.3796, "step": 5360 }, { "epoch": 0.2749334425558059, "grad_norm": 0.33131879568099976, "learning_rate": 0.0015, "loss": 1.3789, "step": 5370 }, { "epoch": 0.2754454228957608, "grad_norm": 0.3270549476146698, "learning_rate": 0.0015, "loss": 1.3877, "step": 5380 }, { "epoch": 0.27595740323571577, "grad_norm": 0.3001706898212433, "learning_rate": 0.0015, "loss": 1.376, "step": 5390 }, { "epoch": 0.2764693835756707, "grad_norm": 0.3149849772453308, "learning_rate": 0.0015, "loss": 1.3815, "step": 5400 }, { "epoch": 0.27698136391562567, "grad_norm": 0.28992435336112976, "learning_rate": 0.0015, "loss": 1.3731, "step": 5410 }, { "epoch": 0.2774933442555806, "grad_norm": 0.295311838388443, "learning_rate": 0.0015, "loss": 1.3958, "step": 5420 }, { "epoch": 0.2780053245955355, "grad_norm": 0.2988681495189667, "learning_rate": 0.0015, "loss": 1.3946, "step": 5430 }, { "epoch": 0.2785173049354905, "grad_norm": 0.3085227608680725, "learning_rate": 0.0015, "loss": 1.3776, "step": 5440 }, { "epoch": 0.2790292852754454, "grad_norm": 0.30014750361442566, "learning_rate": 0.0015, "loss": 1.3772, "step": 5450 }, { "epoch": 0.2795412656154004, "grad_norm": 0.3058876693248749, "learning_rate": 0.0015, "loss": 1.3637, "step": 5460 }, { "epoch": 0.2800532459553553, "grad_norm": 0.2952674925327301, "learning_rate": 0.0015, "loss": 1.3888, "step": 5470 }, { "epoch": 0.2805652262953103, "grad_norm": 0.3016969561576843, "learning_rate": 0.0015, "loss": 1.3874, "step": 5480 }, { "epoch": 0.2810772066352652, "grad_norm": 0.30375874042510986, "learning_rate": 0.0015, "loss": 1.3652, "step": 5490 }, { "epoch": 0.2815891869752202, "grad_norm": 0.29380300641059875, "learning_rate": 0.0015, "loss": 1.3768, "step": 5500 }, { "epoch": 0.2821011673151751, "grad_norm": 0.2994033992290497, "learning_rate": 0.0015, "loss": 1.376, "step": 5510 }, { "epoch": 0.28261314765513007, "grad_norm": 0.3174065053462982, "learning_rate": 0.0015, "loss": 1.3873, "step": 5520 }, { "epoch": 0.283125127995085, "grad_norm": 0.3069535791873932, "learning_rate": 0.0015, "loss": 1.3636, "step": 5530 }, { "epoch": 0.2836371083350399, "grad_norm": 0.2826645076274872, "learning_rate": 0.0015, "loss": 1.3567, "step": 5540 }, { "epoch": 0.2841490886749949, "grad_norm": 0.295926034450531, "learning_rate": 0.0015, "loss": 1.361, "step": 5550 }, { "epoch": 0.2846610690149498, "grad_norm": 0.29257112741470337, "learning_rate": 0.0015, "loss": 1.3699, "step": 5560 }, { "epoch": 0.2851730493549048, "grad_norm": 0.28169023990631104, "learning_rate": 0.0015, "loss": 1.353, "step": 5570 }, { "epoch": 0.2856850296948597, "grad_norm": 0.31054553389549255, "learning_rate": 0.0015, "loss": 1.3955, "step": 5580 }, { "epoch": 0.2861970100348147, "grad_norm": 0.28373947739601135, "learning_rate": 0.0015, "loss": 1.3843, "step": 5590 }, { "epoch": 0.2867089903747696, "grad_norm": 0.29920247197151184, "learning_rate": 0.0015, "loss": 1.3588, "step": 5600 }, { "epoch": 0.2872209707147246, "grad_norm": 0.2981637120246887, "learning_rate": 0.0015, "loss": 1.376, "step": 5610 }, { "epoch": 0.2877329510546795, "grad_norm": 0.269811749458313, "learning_rate": 0.0015, "loss": 1.3733, "step": 5620 }, { "epoch": 0.2882449313946345, "grad_norm": 0.28365617990493774, "learning_rate": 0.0015, "loss": 1.3376, "step": 5630 }, { "epoch": 0.2887569117345894, "grad_norm": 0.2953552305698395, "learning_rate": 0.0015, "loss": 1.367, "step": 5640 }, { "epoch": 0.2892688920745443, "grad_norm": 0.2910911440849304, "learning_rate": 0.0015, "loss": 1.3708, "step": 5650 }, { "epoch": 0.2897808724144993, "grad_norm": 0.2998880445957184, "learning_rate": 0.0015, "loss": 1.3917, "step": 5660 }, { "epoch": 0.2902928527544542, "grad_norm": 0.3000008165836334, "learning_rate": 0.0015, "loss": 1.3597, "step": 5670 }, { "epoch": 0.2908048330944092, "grad_norm": 0.3019564747810364, "learning_rate": 0.0015, "loss": 1.3641, "step": 5680 }, { "epoch": 0.2913168134343641, "grad_norm": 0.28087547421455383, "learning_rate": 0.0015, "loss": 1.3427, "step": 5690 }, { "epoch": 0.2918287937743191, "grad_norm": 0.32179591059684753, "learning_rate": 0.0015, "loss": 1.3576, "step": 5700 }, { "epoch": 0.292340774114274, "grad_norm": 0.30196836590766907, "learning_rate": 0.0015, "loss": 1.3866, "step": 5710 }, { "epoch": 0.292852754454229, "grad_norm": 0.29928138852119446, "learning_rate": 0.0015, "loss": 1.3711, "step": 5720 }, { "epoch": 0.2933647347941839, "grad_norm": 0.30917906761169434, "learning_rate": 0.0015, "loss": 1.3481, "step": 5730 }, { "epoch": 0.2938767151341388, "grad_norm": 0.32579630613327026, "learning_rate": 0.0015, "loss": 1.3713, "step": 5740 }, { "epoch": 0.2943886954740938, "grad_norm": 0.3042047321796417, "learning_rate": 0.0015, "loss": 1.3758, "step": 5750 }, { "epoch": 0.2949006758140487, "grad_norm": 0.2910909354686737, "learning_rate": 0.0015, "loss": 1.3675, "step": 5760 }, { "epoch": 0.2954126561540037, "grad_norm": 0.29718905687332153, "learning_rate": 0.0015, "loss": 1.3576, "step": 5770 }, { "epoch": 0.2959246364939586, "grad_norm": 0.28392040729522705, "learning_rate": 0.0015, "loss": 1.3779, "step": 5780 }, { "epoch": 0.2964366168339136, "grad_norm": 0.2852902114391327, "learning_rate": 0.0015, "loss": 1.3709, "step": 5790 }, { "epoch": 0.2969485971738685, "grad_norm": 0.29683250188827515, "learning_rate": 0.0015, "loss": 1.3757, "step": 5800 }, { "epoch": 0.2974605775138235, "grad_norm": 0.2882269620895386, "learning_rate": 0.0015, "loss": 1.3706, "step": 5810 }, { "epoch": 0.2979725578537784, "grad_norm": 0.3086804449558258, "learning_rate": 0.0015, "loss": 1.3506, "step": 5820 }, { "epoch": 0.2984845381937334, "grad_norm": 0.2780090868473053, "learning_rate": 0.0015, "loss": 1.3565, "step": 5830 }, { "epoch": 0.2989965185336883, "grad_norm": 0.30415329337120056, "learning_rate": 0.0015, "loss": 1.3593, "step": 5840 }, { "epoch": 0.2995084988736432, "grad_norm": 0.2865590751171112, "learning_rate": 0.0015, "loss": 1.3873, "step": 5850 }, { "epoch": 0.3000204792135982, "grad_norm": 0.2798267900943756, "learning_rate": 0.0015, "loss": 1.3439, "step": 5860 }, { "epoch": 0.3005324595535531, "grad_norm": 0.29937195777893066, "learning_rate": 0.0015, "loss": 1.3483, "step": 5870 }, { "epoch": 0.3010444398935081, "grad_norm": 0.27708205580711365, "learning_rate": 0.0015, "loss": 1.3207, "step": 5880 }, { "epoch": 0.301556420233463, "grad_norm": 0.2955605983734131, "learning_rate": 0.0015, "loss": 1.3524, "step": 5890 }, { "epoch": 0.302068400573418, "grad_norm": 0.3226946294307709, "learning_rate": 0.0015, "loss": 1.3545, "step": 5900 }, { "epoch": 0.3025803809133729, "grad_norm": 0.2925417721271515, "learning_rate": 0.0015, "loss": 1.3435, "step": 5910 }, { "epoch": 0.3030923612533279, "grad_norm": 0.3087621331214905, "learning_rate": 0.0015, "loss": 1.3275, "step": 5920 }, { "epoch": 0.3036043415932828, "grad_norm": 0.2996879518032074, "learning_rate": 0.0015, "loss": 1.3514, "step": 5930 }, { "epoch": 0.3041163219332378, "grad_norm": 0.3085525333881378, "learning_rate": 0.0015, "loss": 1.3539, "step": 5940 }, { "epoch": 0.3046283022731927, "grad_norm": 0.28985559940338135, "learning_rate": 0.0015, "loss": 1.3661, "step": 5950 }, { "epoch": 0.30514028261314763, "grad_norm": 0.2889237701892853, "learning_rate": 0.0015, "loss": 1.3622, "step": 5960 }, { "epoch": 0.3056522629531026, "grad_norm": 0.3278009593486786, "learning_rate": 0.0015, "loss": 1.3438, "step": 5970 }, { "epoch": 0.3061642432930575, "grad_norm": 0.2967126965522766, "learning_rate": 0.0015, "loss": 1.3752, "step": 5980 }, { "epoch": 0.3066762236330125, "grad_norm": 0.2810833752155304, "learning_rate": 0.0015, "loss": 1.3673, "step": 5990 }, { "epoch": 0.3071882039729674, "grad_norm": 0.2842026650905609, "learning_rate": 0.0015, "loss": 1.3315, "step": 6000 }, { "epoch": 0.3077001843129224, "grad_norm": 0.2904771864414215, "learning_rate": 0.0015, "loss": 1.3551, "step": 6010 }, { "epoch": 0.3082121646528773, "grad_norm": 0.2798822224140167, "learning_rate": 0.0015, "loss": 1.374, "step": 6020 }, { "epoch": 0.3087241449928323, "grad_norm": 0.2831931412220001, "learning_rate": 0.0015, "loss": 1.3449, "step": 6030 }, { "epoch": 0.3092361253327872, "grad_norm": 0.27797648310661316, "learning_rate": 0.0015, "loss": 1.3427, "step": 6040 }, { "epoch": 0.3097481056727422, "grad_norm": 0.2972757816314697, "learning_rate": 0.0015, "loss": 1.3498, "step": 6050 }, { "epoch": 0.3102600860126971, "grad_norm": 0.2661411166191101, "learning_rate": 0.0015, "loss": 1.3391, "step": 6060 }, { "epoch": 0.31077206635265203, "grad_norm": 0.2736954689025879, "learning_rate": 0.0015, "loss": 1.3637, "step": 6070 }, { "epoch": 0.311284046692607, "grad_norm": 0.27739083766937256, "learning_rate": 0.0015, "loss": 1.3432, "step": 6080 }, { "epoch": 0.31179602703256193, "grad_norm": 0.275734543800354, "learning_rate": 0.0015, "loss": 1.3523, "step": 6090 }, { "epoch": 0.3123080073725169, "grad_norm": 0.29389500617980957, "learning_rate": 0.0015, "loss": 1.3566, "step": 6100 }, { "epoch": 0.3128199877124718, "grad_norm": 0.3517824113368988, "learning_rate": 0.0015, "loss": 1.3401, "step": 6110 }, { "epoch": 0.3133319680524268, "grad_norm": 0.2847048342227936, "learning_rate": 0.0015, "loss": 1.3345, "step": 6120 }, { "epoch": 0.3138439483923817, "grad_norm": 0.2781658470630646, "learning_rate": 0.0015, "loss": 1.3165, "step": 6130 }, { "epoch": 0.3143559287323367, "grad_norm": 0.27928218245506287, "learning_rate": 0.0015, "loss": 1.3419, "step": 6140 }, { "epoch": 0.3148679090722916, "grad_norm": 0.29375484585762024, "learning_rate": 0.0015, "loss": 1.3424, "step": 6150 }, { "epoch": 0.3153798894122466, "grad_norm": 0.2773997187614441, "learning_rate": 0.0015, "loss": 1.3153, "step": 6160 }, { "epoch": 0.3158918697522015, "grad_norm": 0.2810317277908325, "learning_rate": 0.0015, "loss": 1.3633, "step": 6170 }, { "epoch": 0.31640385009215644, "grad_norm": 0.2810805141925812, "learning_rate": 0.0015, "loss": 1.3388, "step": 6180 }, { "epoch": 0.3169158304321114, "grad_norm": 0.27900010347366333, "learning_rate": 0.0015, "loss": 1.3494, "step": 6190 }, { "epoch": 0.31742781077206633, "grad_norm": 0.2763247787952423, "learning_rate": 0.0015, "loss": 1.347, "step": 6200 }, { "epoch": 0.3179397911120213, "grad_norm": 0.27593132853507996, "learning_rate": 0.0015, "loss": 1.3286, "step": 6210 }, { "epoch": 0.31845177145197623, "grad_norm": 0.2928100526332855, "learning_rate": 0.0015, "loss": 1.3485, "step": 6220 }, { "epoch": 0.3189637517919312, "grad_norm": 0.2809889316558838, "learning_rate": 0.0015, "loss": 1.3318, "step": 6230 }, { "epoch": 0.3194757321318861, "grad_norm": 0.2984907329082489, "learning_rate": 0.0015, "loss": 1.3474, "step": 6240 }, { "epoch": 0.3199877124718411, "grad_norm": 0.2861260771751404, "learning_rate": 0.0015, "loss": 1.3308, "step": 6250 }, { "epoch": 0.320499692811796, "grad_norm": 0.30209678411483765, "learning_rate": 0.0015, "loss": 1.3438, "step": 6260 }, { "epoch": 0.321011673151751, "grad_norm": 0.27839919924736023, "learning_rate": 0.0015, "loss": 1.3606, "step": 6270 }, { "epoch": 0.3215236534917059, "grad_norm": 0.27120068669319153, "learning_rate": 0.0015, "loss": 1.3291, "step": 6280 }, { "epoch": 0.32203563383166084, "grad_norm": 0.2891988158226013, "learning_rate": 0.0015, "loss": 1.3483, "step": 6290 }, { "epoch": 0.3225476141716158, "grad_norm": 0.3099561929702759, "learning_rate": 0.0015, "loss": 1.3538, "step": 6300 }, { "epoch": 0.32305959451157074, "grad_norm": 0.28136762976646423, "learning_rate": 0.0015, "loss": 1.344, "step": 6310 }, { "epoch": 0.3235715748515257, "grad_norm": 0.27209803462028503, "learning_rate": 0.0015, "loss": 1.3395, "step": 6320 }, { "epoch": 0.32408355519148063, "grad_norm": 0.2847345173358917, "learning_rate": 0.0015, "loss": 1.3278, "step": 6330 }, { "epoch": 0.3245955355314356, "grad_norm": 0.29409244656562805, "learning_rate": 0.0015, "loss": 1.352, "step": 6340 }, { "epoch": 0.32510751587139053, "grad_norm": 0.26782944798469543, "learning_rate": 0.0015, "loss": 1.3211, "step": 6350 }, { "epoch": 0.3256194962113455, "grad_norm": 0.27680841088294983, "learning_rate": 0.0015, "loss": 1.3168, "step": 6360 }, { "epoch": 0.3261314765513004, "grad_norm": 0.28913265466690063, "learning_rate": 0.0015, "loss": 1.3412, "step": 6370 }, { "epoch": 0.3266434568912554, "grad_norm": 0.2598094046115875, "learning_rate": 0.0015, "loss": 1.3235, "step": 6380 }, { "epoch": 0.3271554372312103, "grad_norm": 0.2622967064380646, "learning_rate": 0.0015, "loss": 1.3353, "step": 6390 }, { "epoch": 0.32766741757116524, "grad_norm": 0.2802422046661377, "learning_rate": 0.0015, "loss": 1.3278, "step": 6400 }, { "epoch": 0.3281793979111202, "grad_norm": 0.2863336503505707, "learning_rate": 0.0015, "loss": 1.3421, "step": 6410 }, { "epoch": 0.32869137825107514, "grad_norm": 0.28782033920288086, "learning_rate": 0.0015, "loss": 1.3395, "step": 6420 }, { "epoch": 0.3292033585910301, "grad_norm": 0.2650611698627472, "learning_rate": 0.0015, "loss": 1.3461, "step": 6430 }, { "epoch": 0.32971533893098504, "grad_norm": 0.28210777044296265, "learning_rate": 0.0015, "loss": 1.3452, "step": 6440 }, { "epoch": 0.33022731927094, "grad_norm": 0.29541024565696716, "learning_rate": 0.0015, "loss": 1.3304, "step": 6450 }, { "epoch": 0.33073929961089493, "grad_norm": 0.27473190426826477, "learning_rate": 0.0015, "loss": 1.3277, "step": 6460 }, { "epoch": 0.3312512799508499, "grad_norm": 0.2899293005466461, "learning_rate": 0.0015, "loss": 1.3193, "step": 6470 }, { "epoch": 0.33176326029080483, "grad_norm": 0.2961236834526062, "learning_rate": 0.0015, "loss": 1.3252, "step": 6480 }, { "epoch": 0.3322752406307598, "grad_norm": 0.2859441637992859, "learning_rate": 0.0015, "loss": 1.3327, "step": 6490 }, { "epoch": 0.3327872209707147, "grad_norm": 0.26721256971359253, "learning_rate": 0.0015, "loss": 1.344, "step": 6500 }, { "epoch": 0.33329920131066965, "grad_norm": 0.27258962392807007, "learning_rate": 0.0015, "loss": 1.3291, "step": 6510 }, { "epoch": 0.3338111816506246, "grad_norm": 0.2868225872516632, "learning_rate": 0.0015, "loss": 1.3542, "step": 6520 }, { "epoch": 0.33432316199057954, "grad_norm": 0.27058276534080505, "learning_rate": 0.0015, "loss": 1.3428, "step": 6530 }, { "epoch": 0.3348351423305345, "grad_norm": 0.2648937404155731, "learning_rate": 0.0015, "loss": 1.3345, "step": 6540 }, { "epoch": 0.33534712267048944, "grad_norm": 0.2588028609752655, "learning_rate": 0.0015, "loss": 1.3163, "step": 6550 }, { "epoch": 0.3358591030104444, "grad_norm": 0.2773786783218384, "learning_rate": 0.0015, "loss": 1.3353, "step": 6560 }, { "epoch": 0.33637108335039934, "grad_norm": 0.2635444402694702, "learning_rate": 0.0015, "loss": 1.3073, "step": 6570 }, { "epoch": 0.3368830636903543, "grad_norm": 0.28633764386177063, "learning_rate": 0.0015, "loss": 1.3085, "step": 6580 }, { "epoch": 0.33739504403030923, "grad_norm": 0.29486966133117676, "learning_rate": 0.0015, "loss": 1.3316, "step": 6590 }, { "epoch": 0.3379070243702642, "grad_norm": 0.2629407048225403, "learning_rate": 0.0015, "loss": 1.3319, "step": 6600 }, { "epoch": 0.33841900471021913, "grad_norm": 0.2779609262943268, "learning_rate": 0.0015, "loss": 1.3043, "step": 6610 }, { "epoch": 0.33893098505017405, "grad_norm": 0.2911774218082428, "learning_rate": 0.0015, "loss": 1.361, "step": 6620 }, { "epoch": 0.339442965390129, "grad_norm": 0.26540687680244446, "learning_rate": 0.0015, "loss": 1.3095, "step": 6630 }, { "epoch": 0.33995494573008395, "grad_norm": 0.27710777521133423, "learning_rate": 0.0015, "loss": 1.3173, "step": 6640 }, { "epoch": 0.3404669260700389, "grad_norm": 0.2614011764526367, "learning_rate": 0.0015, "loss": 1.3178, "step": 6650 }, { "epoch": 0.34097890640999384, "grad_norm": 0.2797437906265259, "learning_rate": 0.0015, "loss": 1.3287, "step": 6660 }, { "epoch": 0.3414908867499488, "grad_norm": 0.28846311569213867, "learning_rate": 0.0015, "loss": 1.3222, "step": 6670 }, { "epoch": 0.34200286708990374, "grad_norm": 0.2507641911506653, "learning_rate": 0.0015, "loss": 1.3297, "step": 6680 }, { "epoch": 0.3425148474298587, "grad_norm": 0.277458518743515, "learning_rate": 0.0015, "loss": 1.3092, "step": 6690 }, { "epoch": 0.34302682776981364, "grad_norm": 0.28139162063598633, "learning_rate": 0.0015, "loss": 1.3509, "step": 6700 }, { "epoch": 0.3435388081097686, "grad_norm": 0.26460030674934387, "learning_rate": 0.0015, "loss": 1.3357, "step": 6710 }, { "epoch": 0.34405078844972353, "grad_norm": 0.2602977752685547, "learning_rate": 0.0015, "loss": 1.3375, "step": 6720 }, { "epoch": 0.34456276878967845, "grad_norm": 0.3062650263309479, "learning_rate": 0.0015, "loss": 1.3225, "step": 6730 }, { "epoch": 0.34507474912963343, "grad_norm": 0.27152612805366516, "learning_rate": 0.0015, "loss": 1.3326, "step": 6740 }, { "epoch": 0.34558672946958835, "grad_norm": 0.2585943341255188, "learning_rate": 0.0015, "loss": 1.3275, "step": 6750 }, { "epoch": 0.3460987098095433, "grad_norm": 0.2826108932495117, "learning_rate": 0.0015, "loss": 1.3143, "step": 6760 }, { "epoch": 0.34661069014949825, "grad_norm": 0.2719128131866455, "learning_rate": 0.0015, "loss": 1.3136, "step": 6770 }, { "epoch": 0.3471226704894532, "grad_norm": 0.2605542540550232, "learning_rate": 0.0015, "loss": 1.3207, "step": 6780 }, { "epoch": 0.34763465082940814, "grad_norm": 0.26649779081344604, "learning_rate": 0.0015, "loss": 1.304, "step": 6790 }, { "epoch": 0.3481466311693631, "grad_norm": 0.28349971771240234, "learning_rate": 0.0015, "loss": 1.3176, "step": 6800 }, { "epoch": 0.34865861150931804, "grad_norm": 0.27145761251449585, "learning_rate": 0.0015, "loss": 1.3294, "step": 6810 }, { "epoch": 0.349170591849273, "grad_norm": 0.26513341069221497, "learning_rate": 0.0015, "loss": 1.3299, "step": 6820 }, { "epoch": 0.34968257218922794, "grad_norm": 0.2701232135295868, "learning_rate": 0.0015, "loss": 1.3028, "step": 6830 }, { "epoch": 0.35019455252918286, "grad_norm": 0.27336186170578003, "learning_rate": 0.0015, "loss": 1.3253, "step": 6840 }, { "epoch": 0.35070653286913783, "grad_norm": 0.26006847620010376, "learning_rate": 0.0015, "loss": 1.3097, "step": 6850 }, { "epoch": 0.35121851320909275, "grad_norm": 0.2867346405982971, "learning_rate": 0.0015, "loss": 1.3489, "step": 6860 }, { "epoch": 0.35173049354904773, "grad_norm": 0.2665490210056305, "learning_rate": 0.0015, "loss": 1.3029, "step": 6870 }, { "epoch": 0.35224247388900265, "grad_norm": 0.26250341534614563, "learning_rate": 0.0015, "loss": 1.324, "step": 6880 }, { "epoch": 0.3527544542289576, "grad_norm": 0.27404358983039856, "learning_rate": 0.0015, "loss": 1.3222, "step": 6890 }, { "epoch": 0.35326643456891255, "grad_norm": 0.271932989358902, "learning_rate": 0.0015, "loss": 1.3068, "step": 6900 }, { "epoch": 0.3537784149088675, "grad_norm": 0.25479060411453247, "learning_rate": 0.0015, "loss": 1.3143, "step": 6910 }, { "epoch": 0.35429039524882244, "grad_norm": 0.2571351230144501, "learning_rate": 0.0015, "loss": 1.2886, "step": 6920 }, { "epoch": 0.35480237558877736, "grad_norm": 0.2612917125225067, "learning_rate": 0.0015, "loss": 1.3199, "step": 6930 }, { "epoch": 0.35531435592873234, "grad_norm": 0.2573522925376892, "learning_rate": 0.0015, "loss": 1.3143, "step": 6940 }, { "epoch": 0.35582633626868726, "grad_norm": 0.2598212659358978, "learning_rate": 0.0015, "loss": 1.3039, "step": 6950 }, { "epoch": 0.35633831660864224, "grad_norm": 0.2575034201145172, "learning_rate": 0.0015, "loss": 1.3095, "step": 6960 }, { "epoch": 0.35685029694859716, "grad_norm": 0.2559545636177063, "learning_rate": 0.0015, "loss": 1.2971, "step": 6970 }, { "epoch": 0.35736227728855213, "grad_norm": 0.26087066531181335, "learning_rate": 0.0015, "loss": 1.3023, "step": 6980 }, { "epoch": 0.35787425762850705, "grad_norm": 0.2606737017631531, "learning_rate": 0.0015, "loss": 1.3098, "step": 6990 }, { "epoch": 0.35838623796846203, "grad_norm": 0.27495986223220825, "learning_rate": 0.0015, "loss": 1.3249, "step": 7000 }, { "epoch": 0.35889821830841695, "grad_norm": 0.25473734736442566, "learning_rate": 0.0015, "loss": 1.3253, "step": 7010 }, { "epoch": 0.3594101986483719, "grad_norm": 0.2764824330806732, "learning_rate": 0.0015, "loss": 1.3101, "step": 7020 }, { "epoch": 0.35992217898832685, "grad_norm": 0.27935823798179626, "learning_rate": 0.0015, "loss": 1.3268, "step": 7030 }, { "epoch": 0.36043415932828177, "grad_norm": 0.26057881116867065, "learning_rate": 0.0015, "loss": 1.2999, "step": 7040 }, { "epoch": 0.36094613966823674, "grad_norm": 0.27014756202697754, "learning_rate": 0.0015, "loss": 1.3083, "step": 7050 }, { "epoch": 0.36145812000819166, "grad_norm": 0.26150983572006226, "learning_rate": 0.0015, "loss": 1.3059, "step": 7060 }, { "epoch": 0.36197010034814664, "grad_norm": 0.2634667158126831, "learning_rate": 0.0015, "loss": 1.3325, "step": 7070 }, { "epoch": 0.36248208068810156, "grad_norm": 0.2591879665851593, "learning_rate": 0.0015, "loss": 1.3004, "step": 7080 }, { "epoch": 0.36299406102805654, "grad_norm": 0.27941566705703735, "learning_rate": 0.0015, "loss": 1.3216, "step": 7090 }, { "epoch": 0.36350604136801146, "grad_norm": 0.2634701430797577, "learning_rate": 0.0015, "loss": 1.3043, "step": 7100 }, { "epoch": 0.36401802170796643, "grad_norm": 0.2601988613605499, "learning_rate": 0.0015, "loss": 1.3128, "step": 7110 }, { "epoch": 0.36453000204792135, "grad_norm": 0.2701079249382019, "learning_rate": 0.0015, "loss": 1.2908, "step": 7120 }, { "epoch": 0.36504198238787633, "grad_norm": 0.2694578170776367, "learning_rate": 0.0015, "loss": 1.303, "step": 7130 }, { "epoch": 0.36555396272783125, "grad_norm": 0.2465587705373764, "learning_rate": 0.0015, "loss": 1.3177, "step": 7140 }, { "epoch": 0.36606594306778617, "grad_norm": 0.26136472821235657, "learning_rate": 0.0015, "loss": 1.3112, "step": 7150 }, { "epoch": 0.36657792340774115, "grad_norm": 0.2548895478248596, "learning_rate": 0.0015, "loss": 1.3114, "step": 7160 }, { "epoch": 0.36708990374769607, "grad_norm": 0.2586556673049927, "learning_rate": 0.0015, "loss": 1.3076, "step": 7170 }, { "epoch": 0.36760188408765104, "grad_norm": 0.25887277722358704, "learning_rate": 0.0015, "loss": 1.3217, "step": 7180 }, { "epoch": 0.36811386442760596, "grad_norm": 0.2628803253173828, "learning_rate": 0.0015, "loss": 1.3012, "step": 7190 }, { "epoch": 0.36862584476756094, "grad_norm": 0.2630269527435303, "learning_rate": 0.0015, "loss": 1.3187, "step": 7200 }, { "epoch": 0.36913782510751586, "grad_norm": 0.2589748501777649, "learning_rate": 0.0015, "loss": 1.2885, "step": 7210 }, { "epoch": 0.36964980544747084, "grad_norm": 0.262361615896225, "learning_rate": 0.0015, "loss": 1.2962, "step": 7220 }, { "epoch": 0.37016178578742576, "grad_norm": 0.24950037896633148, "learning_rate": 0.0015, "loss": 1.3026, "step": 7230 }, { "epoch": 0.37067376612738073, "grad_norm": 0.2537461817264557, "learning_rate": 0.0015, "loss": 1.2971, "step": 7240 }, { "epoch": 0.37118574646733565, "grad_norm": 0.25920331478118896, "learning_rate": 0.0015, "loss": 1.2951, "step": 7250 }, { "epoch": 0.3716977268072906, "grad_norm": 0.2526357173919678, "learning_rate": 0.0015, "loss": 1.2989, "step": 7260 }, { "epoch": 0.37220970714724555, "grad_norm": 0.28876397013664246, "learning_rate": 0.0015, "loss": 1.3063, "step": 7270 }, { "epoch": 0.37272168748720047, "grad_norm": 0.27300864458084106, "learning_rate": 0.0015, "loss": 1.2954, "step": 7280 }, { "epoch": 0.37323366782715545, "grad_norm": 0.26332223415374756, "learning_rate": 0.0015, "loss": 1.3329, "step": 7290 }, { "epoch": 0.37374564816711037, "grad_norm": 0.26332515478134155, "learning_rate": 0.0015, "loss": 1.2908, "step": 7300 }, { "epoch": 0.37425762850706534, "grad_norm": 0.2604503631591797, "learning_rate": 0.0015, "loss": 1.3002, "step": 7310 }, { "epoch": 0.37476960884702026, "grad_norm": 0.25917840003967285, "learning_rate": 0.0015, "loss": 1.2983, "step": 7320 }, { "epoch": 0.37528158918697524, "grad_norm": 0.26824817061424255, "learning_rate": 0.0015, "loss": 1.3183, "step": 7330 }, { "epoch": 0.37579356952693016, "grad_norm": 0.2575696110725403, "learning_rate": 0.0015, "loss": 1.318, "step": 7340 }, { "epoch": 0.37630554986688514, "grad_norm": 0.2578194737434387, "learning_rate": 0.0015, "loss": 1.2833, "step": 7350 }, { "epoch": 0.37681753020684006, "grad_norm": 0.2768312096595764, "learning_rate": 0.0015, "loss": 1.2948, "step": 7360 }, { "epoch": 0.377329510546795, "grad_norm": 0.2382088154554367, "learning_rate": 0.0015, "loss": 1.3, "step": 7370 }, { "epoch": 0.37784149088674995, "grad_norm": 0.2637539803981781, "learning_rate": 0.0015, "loss": 1.2792, "step": 7380 }, { "epoch": 0.3783534712267049, "grad_norm": 0.2832081615924835, "learning_rate": 0.0015, "loss": 1.3097, "step": 7390 }, { "epoch": 0.37886545156665985, "grad_norm": 0.2672945261001587, "learning_rate": 0.0015, "loss": 1.2989, "step": 7400 }, { "epoch": 0.37937743190661477, "grad_norm": 0.24696801602840424, "learning_rate": 0.0015, "loss": 1.3174, "step": 7410 }, { "epoch": 0.37988941224656975, "grad_norm": 0.2638930082321167, "learning_rate": 0.0015, "loss": 1.295, "step": 7420 }, { "epoch": 0.38040139258652467, "grad_norm": 0.2714937925338745, "learning_rate": 0.0015, "loss": 1.2917, "step": 7430 }, { "epoch": 0.38091337292647964, "grad_norm": 0.2469353824853897, "learning_rate": 0.0015, "loss": 1.2919, "step": 7440 }, { "epoch": 0.38142535326643456, "grad_norm": 0.25035470724105835, "learning_rate": 0.0015, "loss": 1.2896, "step": 7450 }, { "epoch": 0.38193733360638954, "grad_norm": 0.26178446412086487, "learning_rate": 0.0015, "loss": 1.2891, "step": 7460 }, { "epoch": 0.38244931394634446, "grad_norm": 0.26942870020866394, "learning_rate": 0.0015, "loss": 1.2723, "step": 7470 }, { "epoch": 0.3829612942862994, "grad_norm": 0.26943838596343994, "learning_rate": 0.0015, "loss": 1.284, "step": 7480 }, { "epoch": 0.38347327462625436, "grad_norm": 0.25865715742111206, "learning_rate": 0.0015, "loss": 1.3063, "step": 7490 }, { "epoch": 0.3839852549662093, "grad_norm": 0.27455562353134155, "learning_rate": 0.0015, "loss": 1.2988, "step": 7500 }, { "epoch": 0.38449723530616425, "grad_norm": 0.2636263370513916, "learning_rate": 0.0015, "loss": 1.2739, "step": 7510 }, { "epoch": 0.3850092156461192, "grad_norm": 0.26559826731681824, "learning_rate": 0.0015, "loss": 1.2958, "step": 7520 }, { "epoch": 0.38552119598607415, "grad_norm": 0.2592698335647583, "learning_rate": 0.0015, "loss": 1.2981, "step": 7530 }, { "epoch": 0.38603317632602907, "grad_norm": 0.25872740149497986, "learning_rate": 0.0015, "loss": 1.3005, "step": 7540 }, { "epoch": 0.38654515666598405, "grad_norm": 0.26369425654411316, "learning_rate": 0.0015, "loss": 1.3021, "step": 7550 }, { "epoch": 0.38705713700593897, "grad_norm": 0.25757378339767456, "learning_rate": 0.0015, "loss": 1.302, "step": 7560 }, { "epoch": 0.38756911734589394, "grad_norm": 0.27320241928100586, "learning_rate": 0.0015, "loss": 1.2802, "step": 7570 }, { "epoch": 0.38808109768584886, "grad_norm": 0.2795805335044861, "learning_rate": 0.0015, "loss": 1.295, "step": 7580 }, { "epoch": 0.3885930780258038, "grad_norm": 0.26023516058921814, "learning_rate": 0.0015, "loss": 1.2889, "step": 7590 }, { "epoch": 0.38910505836575876, "grad_norm": 0.2582970857620239, "learning_rate": 0.0015, "loss": 1.302, "step": 7600 }, { "epoch": 0.3896170387057137, "grad_norm": 0.2473934441804886, "learning_rate": 0.0015, "loss": 1.3023, "step": 7610 }, { "epoch": 0.39012901904566866, "grad_norm": 0.2547856271266937, "learning_rate": 0.0015, "loss": 1.29, "step": 7620 }, { "epoch": 0.3906409993856236, "grad_norm": 0.26764586567878723, "learning_rate": 0.0015, "loss": 1.2905, "step": 7630 }, { "epoch": 0.39115297972557855, "grad_norm": 0.2481442391872406, "learning_rate": 0.0015, "loss": 1.3164, "step": 7640 }, { "epoch": 0.3916649600655335, "grad_norm": 0.25532233715057373, "learning_rate": 0.0015, "loss": 1.2958, "step": 7650 }, { "epoch": 0.39217694040548845, "grad_norm": 0.24001578986644745, "learning_rate": 0.0015, "loss": 1.2827, "step": 7660 }, { "epoch": 0.39268892074544337, "grad_norm": 0.2489776611328125, "learning_rate": 0.0015, "loss": 1.2742, "step": 7670 }, { "epoch": 0.39320090108539835, "grad_norm": 0.23535743355751038, "learning_rate": 0.0015, "loss": 1.2855, "step": 7680 }, { "epoch": 0.39371288142535327, "grad_norm": 0.25811052322387695, "learning_rate": 0.0015, "loss": 1.2971, "step": 7690 }, { "epoch": 0.3942248617653082, "grad_norm": 0.24241647124290466, "learning_rate": 0.0015, "loss": 1.2968, "step": 7700 }, { "epoch": 0.39473684210526316, "grad_norm": 0.25648635625839233, "learning_rate": 0.0015, "loss": 1.2916, "step": 7710 }, { "epoch": 0.3952488224452181, "grad_norm": 0.2703993618488312, "learning_rate": 0.0015, "loss": 1.2909, "step": 7720 }, { "epoch": 0.39576080278517306, "grad_norm": 0.2558510899543762, "learning_rate": 0.0015, "loss": 1.2913, "step": 7730 }, { "epoch": 0.396272783125128, "grad_norm": 0.2394089698791504, "learning_rate": 0.0015, "loss": 1.2968, "step": 7740 }, { "epoch": 0.39678476346508296, "grad_norm": 0.2338177114725113, "learning_rate": 0.0015, "loss": 1.2894, "step": 7750 }, { "epoch": 0.3972967438050379, "grad_norm": 0.25422418117523193, "learning_rate": 0.0015, "loss": 1.2958, "step": 7760 }, { "epoch": 0.39780872414499285, "grad_norm": 0.2437313348054886, "learning_rate": 0.0015, "loss": 1.2878, "step": 7770 }, { "epoch": 0.3983207044849478, "grad_norm": 0.26623979210853577, "learning_rate": 0.0015, "loss": 1.2915, "step": 7780 }, { "epoch": 0.39883268482490275, "grad_norm": 0.24698524177074432, "learning_rate": 0.0015, "loss": 1.2949, "step": 7790 }, { "epoch": 0.39934466516485767, "grad_norm": 0.23496921360492706, "learning_rate": 0.0015, "loss": 1.3069, "step": 7800 }, { "epoch": 0.3998566455048126, "grad_norm": 0.2393864393234253, "learning_rate": 0.0015, "loss": 1.2913, "step": 7810 }, { "epoch": 0.40036862584476757, "grad_norm": 0.24716414511203766, "learning_rate": 0.0015, "loss": 1.2829, "step": 7820 }, { "epoch": 0.4008806061847225, "grad_norm": 0.24985013902187347, "learning_rate": 0.0015, "loss": 1.2773, "step": 7830 }, { "epoch": 0.40139258652467746, "grad_norm": 0.24895814061164856, "learning_rate": 0.0015, "loss": 1.2889, "step": 7840 }, { "epoch": 0.4019045668646324, "grad_norm": 0.2497827261686325, "learning_rate": 0.0015, "loss": 1.2747, "step": 7850 }, { "epoch": 0.40241654720458736, "grad_norm": 0.23879243433475494, "learning_rate": 0.0015, "loss": 1.3071, "step": 7860 }, { "epoch": 0.4029285275445423, "grad_norm": 0.24402157962322235, "learning_rate": 0.0015, "loss": 1.2924, "step": 7870 }, { "epoch": 0.40344050788449726, "grad_norm": 0.24736930429935455, "learning_rate": 0.0015, "loss": 1.2643, "step": 7880 }, { "epoch": 0.4039524882244522, "grad_norm": 0.2525321841239929, "learning_rate": 0.0015, "loss": 1.3014, "step": 7890 }, { "epoch": 0.40446446856440715, "grad_norm": 0.2575211226940155, "learning_rate": 0.0015, "loss": 1.2625, "step": 7900 }, { "epoch": 0.4049764489043621, "grad_norm": 0.24405083060264587, "learning_rate": 0.0015, "loss": 1.2834, "step": 7910 }, { "epoch": 0.405488429244317, "grad_norm": 0.28250402212142944, "learning_rate": 0.0015, "loss": 1.2814, "step": 7920 }, { "epoch": 0.40600040958427197, "grad_norm": 0.2795003056526184, "learning_rate": 0.0015, "loss": 1.3154, "step": 7930 }, { "epoch": 0.4065123899242269, "grad_norm": 0.24883300065994263, "learning_rate": 0.0015, "loss": 1.2887, "step": 7940 }, { "epoch": 0.40702437026418187, "grad_norm": 0.2502342164516449, "learning_rate": 0.0015, "loss": 1.3033, "step": 7950 }, { "epoch": 0.4075363506041368, "grad_norm": 0.24973638355731964, "learning_rate": 0.0015, "loss": 1.2947, "step": 7960 }, { "epoch": 0.40804833094409176, "grad_norm": 0.24371185898780823, "learning_rate": 0.0015, "loss": 1.2908, "step": 7970 }, { "epoch": 0.4085603112840467, "grad_norm": 0.24570930004119873, "learning_rate": 0.0015, "loss": 1.2879, "step": 7980 }, { "epoch": 0.40907229162400166, "grad_norm": 0.23717066645622253, "learning_rate": 0.0015, "loss": 1.2928, "step": 7990 }, { "epoch": 0.4095842719639566, "grad_norm": 0.24726137518882751, "learning_rate": 0.0015, "loss": 1.2915, "step": 8000 }, { "epoch": 0.41009625230391156, "grad_norm": 0.2352866679430008, "learning_rate": 0.0015, "loss": 1.2817, "step": 8010 }, { "epoch": 0.4106082326438665, "grad_norm": 0.251365065574646, "learning_rate": 0.0015, "loss": 1.2979, "step": 8020 }, { "epoch": 0.4111202129838214, "grad_norm": 0.22410385310649872, "learning_rate": 0.0015, "loss": 1.2749, "step": 8030 }, { "epoch": 0.4116321933237764, "grad_norm": 0.25029605627059937, "learning_rate": 0.0015, "loss": 1.2862, "step": 8040 }, { "epoch": 0.4121441736637313, "grad_norm": 0.25629550218582153, "learning_rate": 0.0015, "loss": 1.2749, "step": 8050 }, { "epoch": 0.41265615400368627, "grad_norm": 0.23836827278137207, "learning_rate": 0.0015, "loss": 1.28, "step": 8060 }, { "epoch": 0.4131681343436412, "grad_norm": 0.23752672970294952, "learning_rate": 0.0015, "loss": 1.2916, "step": 8070 }, { "epoch": 0.41368011468359617, "grad_norm": 0.26047077775001526, "learning_rate": 0.0015, "loss": 1.2718, "step": 8080 }, { "epoch": 0.4141920950235511, "grad_norm": 0.24297983944416046, "learning_rate": 0.0015, "loss": 1.2961, "step": 8090 }, { "epoch": 0.41470407536350606, "grad_norm": 0.24528458714485168, "learning_rate": 0.0015, "loss": 1.2591, "step": 8100 }, { "epoch": 0.415216055703461, "grad_norm": 0.24459367990493774, "learning_rate": 0.0015, "loss": 1.2754, "step": 8110 }, { "epoch": 0.41572803604341596, "grad_norm": 0.24630287289619446, "learning_rate": 0.0015, "loss": 1.2864, "step": 8120 }, { "epoch": 0.4162400163833709, "grad_norm": 0.2514908015727997, "learning_rate": 0.0015, "loss": 1.2847, "step": 8130 }, { "epoch": 0.4167519967233258, "grad_norm": 0.227911576628685, "learning_rate": 0.0015, "loss": 1.2798, "step": 8140 }, { "epoch": 0.4172639770632808, "grad_norm": 0.2512179911136627, "learning_rate": 0.0015, "loss": 1.2817, "step": 8150 }, { "epoch": 0.4177759574032357, "grad_norm": 0.24971604347229004, "learning_rate": 0.0015, "loss": 1.2856, "step": 8160 }, { "epoch": 0.4182879377431907, "grad_norm": 0.24980546534061432, "learning_rate": 0.0015, "loss": 1.2932, "step": 8170 }, { "epoch": 0.4187999180831456, "grad_norm": 0.2510388493537903, "learning_rate": 0.0015, "loss": 1.2849, "step": 8180 }, { "epoch": 0.41931189842310057, "grad_norm": 0.23916485905647278, "learning_rate": 0.0015, "loss": 1.2787, "step": 8190 }, { "epoch": 0.4198238787630555, "grad_norm": 0.2525003254413605, "learning_rate": 0.0015, "loss": 1.2856, "step": 8200 }, { "epoch": 0.42033585910301047, "grad_norm": 0.25865113735198975, "learning_rate": 0.0015, "loss": 1.2473, "step": 8210 }, { "epoch": 0.4208478394429654, "grad_norm": 0.24689891934394836, "learning_rate": 0.0015, "loss": 1.2663, "step": 8220 }, { "epoch": 0.4213598197829203, "grad_norm": 0.2257513701915741, "learning_rate": 0.0015, "loss": 1.2576, "step": 8230 }, { "epoch": 0.4218718001228753, "grad_norm": 0.2339119166135788, "learning_rate": 0.0015, "loss": 1.3053, "step": 8240 }, { "epoch": 0.4223837804628302, "grad_norm": 0.2590661942958832, "learning_rate": 0.0015, "loss": 1.2698, "step": 8250 }, { "epoch": 0.4228957608027852, "grad_norm": 0.2483995407819748, "learning_rate": 0.0015, "loss": 1.2728, "step": 8260 }, { "epoch": 0.4234077411427401, "grad_norm": 0.23534591495990753, "learning_rate": 0.0015, "loss": 1.2867, "step": 8270 }, { "epoch": 0.4239197214826951, "grad_norm": 0.22678501904010773, "learning_rate": 0.0015, "loss": 1.2775, "step": 8280 }, { "epoch": 0.42443170182265, "grad_norm": 0.2298179715871811, "learning_rate": 0.0015, "loss": 1.2866, "step": 8290 }, { "epoch": 0.424943682162605, "grad_norm": 0.2495158165693283, "learning_rate": 0.0015, "loss": 1.2762, "step": 8300 }, { "epoch": 0.4254556625025599, "grad_norm": 0.22808024287223816, "learning_rate": 0.0015, "loss": 1.269, "step": 8310 }, { "epoch": 0.42596764284251487, "grad_norm": 0.24249188601970673, "learning_rate": 0.0015, "loss": 1.2881, "step": 8320 }, { "epoch": 0.4264796231824698, "grad_norm": 0.2539406418800354, "learning_rate": 0.0015, "loss": 1.2618, "step": 8330 }, { "epoch": 0.4269916035224247, "grad_norm": 0.2367791384458542, "learning_rate": 0.0015, "loss": 1.2762, "step": 8340 }, { "epoch": 0.4275035838623797, "grad_norm": 0.2301592379808426, "learning_rate": 0.0015, "loss": 1.2724, "step": 8350 }, { "epoch": 0.4280155642023346, "grad_norm": 0.24136430025100708, "learning_rate": 0.0015, "loss": 1.2629, "step": 8360 }, { "epoch": 0.4285275445422896, "grad_norm": 0.23719066381454468, "learning_rate": 0.0015, "loss": 1.2624, "step": 8370 }, { "epoch": 0.4290395248822445, "grad_norm": 0.2514694631099701, "learning_rate": 0.0015, "loss": 1.2686, "step": 8380 }, { "epoch": 0.4295515052221995, "grad_norm": 0.24186182022094727, "learning_rate": 0.0015, "loss": 1.2823, "step": 8390 }, { "epoch": 0.4300634855621544, "grad_norm": 0.23494115471839905, "learning_rate": 0.0015, "loss": 1.2534, "step": 8400 }, { "epoch": 0.4305754659021094, "grad_norm": 0.2518327534198761, "learning_rate": 0.0015, "loss": 1.2913, "step": 8410 }, { "epoch": 0.4310874462420643, "grad_norm": 0.23622803390026093, "learning_rate": 0.0015, "loss": 1.2652, "step": 8420 }, { "epoch": 0.4315994265820193, "grad_norm": 0.22990188002586365, "learning_rate": 0.0015, "loss": 1.277, "step": 8430 }, { "epoch": 0.4321114069219742, "grad_norm": 0.23679761588573456, "learning_rate": 0.0015, "loss": 1.2839, "step": 8440 }, { "epoch": 0.4326233872619291, "grad_norm": 0.25512683391571045, "learning_rate": 0.0015, "loss": 1.2818, "step": 8450 }, { "epoch": 0.4331353676018841, "grad_norm": 0.24284730851650238, "learning_rate": 0.0015, "loss": 1.2882, "step": 8460 }, { "epoch": 0.433647347941839, "grad_norm": 0.24152646958827972, "learning_rate": 0.0015, "loss": 1.2727, "step": 8470 }, { "epoch": 0.434159328281794, "grad_norm": 0.24133774638175964, "learning_rate": 0.0015, "loss": 1.2743, "step": 8480 }, { "epoch": 0.4346713086217489, "grad_norm": 0.23270800709724426, "learning_rate": 0.0015, "loss": 1.2651, "step": 8490 }, { "epoch": 0.4351832889617039, "grad_norm": 0.2446971833705902, "learning_rate": 0.0015, "loss": 1.268, "step": 8500 }, { "epoch": 0.4356952693016588, "grad_norm": 0.23358875513076782, "learning_rate": 0.0015, "loss": 1.2774, "step": 8510 }, { "epoch": 0.4362072496416138, "grad_norm": 0.22265927493572235, "learning_rate": 0.0015, "loss": 1.2602, "step": 8520 }, { "epoch": 0.4367192299815687, "grad_norm": 0.22781646251678467, "learning_rate": 0.0015, "loss": 1.2724, "step": 8530 }, { "epoch": 0.4372312103215237, "grad_norm": 0.23868761956691742, "learning_rate": 0.0015, "loss": 1.2581, "step": 8540 }, { "epoch": 0.4377431906614786, "grad_norm": 0.2235594540834427, "learning_rate": 0.0015, "loss": 1.2741, "step": 8550 }, { "epoch": 0.4382551710014335, "grad_norm": 0.2419920712709427, "learning_rate": 0.0015, "loss": 1.2765, "step": 8560 }, { "epoch": 0.4387671513413885, "grad_norm": 0.27400338649749756, "learning_rate": 0.0015, "loss": 1.2635, "step": 8570 }, { "epoch": 0.4392791316813434, "grad_norm": 0.23386618494987488, "learning_rate": 0.0015, "loss": 1.2806, "step": 8580 }, { "epoch": 0.4397911120212984, "grad_norm": 0.24642907083034515, "learning_rate": 0.0015, "loss": 1.2739, "step": 8590 }, { "epoch": 0.4403030923612533, "grad_norm": 0.2347201406955719, "learning_rate": 0.0015, "loss": 1.2581, "step": 8600 }, { "epoch": 0.4408150727012083, "grad_norm": 0.22591201961040497, "learning_rate": 0.0015, "loss": 1.2882, "step": 8610 }, { "epoch": 0.4413270530411632, "grad_norm": 0.2508542537689209, "learning_rate": 0.0015, "loss": 1.2699, "step": 8620 }, { "epoch": 0.4418390333811182, "grad_norm": 0.2366652637720108, "learning_rate": 0.0015, "loss": 1.2522, "step": 8630 }, { "epoch": 0.4423510137210731, "grad_norm": 0.22938509285449982, "learning_rate": 0.0015, "loss": 1.2676, "step": 8640 }, { "epoch": 0.4428629940610281, "grad_norm": 0.22820281982421875, "learning_rate": 0.0015, "loss": 1.2712, "step": 8650 }, { "epoch": 0.443374974400983, "grad_norm": 0.22258944809436798, "learning_rate": 0.0015, "loss": 1.2721, "step": 8660 }, { "epoch": 0.4438869547409379, "grad_norm": 0.23942533135414124, "learning_rate": 0.0015, "loss": 1.2659, "step": 8670 }, { "epoch": 0.4443989350808929, "grad_norm": 0.23312713205814362, "learning_rate": 0.0015, "loss": 1.2755, "step": 8680 }, { "epoch": 0.4449109154208478, "grad_norm": 0.2283553183078766, "learning_rate": 0.0015, "loss": 1.2537, "step": 8690 }, { "epoch": 0.4454228957608028, "grad_norm": 0.23631595075130463, "learning_rate": 0.0015, "loss": 1.2487, "step": 8700 }, { "epoch": 0.4459348761007577, "grad_norm": 0.2447190135717392, "learning_rate": 0.0015, "loss": 1.2529, "step": 8710 }, { "epoch": 0.4464468564407127, "grad_norm": 0.24584966897964478, "learning_rate": 0.0015, "loss": 1.2738, "step": 8720 }, { "epoch": 0.4469588367806676, "grad_norm": 0.2374550849199295, "learning_rate": 0.0015, "loss": 1.2791, "step": 8730 }, { "epoch": 0.4474708171206226, "grad_norm": 0.240436390042305, "learning_rate": 0.0015, "loss": 1.2518, "step": 8740 }, { "epoch": 0.4479827974605775, "grad_norm": 0.23341523110866547, "learning_rate": 0.0015, "loss": 1.2688, "step": 8750 }, { "epoch": 0.4484947778005325, "grad_norm": 0.24230003356933594, "learning_rate": 0.0015, "loss": 1.2379, "step": 8760 }, { "epoch": 0.4490067581404874, "grad_norm": 0.2401583343744278, "learning_rate": 0.0015, "loss": 1.2699, "step": 8770 }, { "epoch": 0.4495187384804423, "grad_norm": 0.22647708654403687, "learning_rate": 0.0015, "loss": 1.2656, "step": 8780 }, { "epoch": 0.4500307188203973, "grad_norm": 0.24045558273792267, "learning_rate": 0.0015, "loss": 1.2531, "step": 8790 }, { "epoch": 0.4505426991603522, "grad_norm": 0.2597295045852661, "learning_rate": 0.0015, "loss": 1.2568, "step": 8800 }, { "epoch": 0.4510546795003072, "grad_norm": 0.22485364973545074, "learning_rate": 0.0015, "loss": 1.2478, "step": 8810 }, { "epoch": 0.4515666598402621, "grad_norm": 0.23133698105812073, "learning_rate": 0.0015, "loss": 1.2688, "step": 8820 }, { "epoch": 0.4520786401802171, "grad_norm": 0.22866465151309967, "learning_rate": 0.0015, "loss": 1.2516, "step": 8830 }, { "epoch": 0.452590620520172, "grad_norm": 0.2258300632238388, "learning_rate": 0.0015, "loss": 1.2571, "step": 8840 }, { "epoch": 0.453102600860127, "grad_norm": 0.23454922437667847, "learning_rate": 0.0015, "loss": 1.2413, "step": 8850 }, { "epoch": 0.4536145812000819, "grad_norm": 0.22673968970775604, "learning_rate": 0.0015, "loss": 1.2504, "step": 8860 }, { "epoch": 0.4541265615400369, "grad_norm": 0.24363909661769867, "learning_rate": 0.0015, "loss": 1.2511, "step": 8870 }, { "epoch": 0.4546385418799918, "grad_norm": 0.25056564807891846, "learning_rate": 0.0015, "loss": 1.2423, "step": 8880 }, { "epoch": 0.45515052221994673, "grad_norm": 0.2318125218153, "learning_rate": 0.0015, "loss": 1.2753, "step": 8890 }, { "epoch": 0.4556625025599017, "grad_norm": 0.22525230050086975, "learning_rate": 0.0015, "loss": 1.2389, "step": 8900 }, { "epoch": 0.4561744828998566, "grad_norm": 0.23389683663845062, "learning_rate": 0.0015, "loss": 1.2457, "step": 8910 }, { "epoch": 0.4566864632398116, "grad_norm": 0.23282834887504578, "learning_rate": 0.0015, "loss": 1.2628, "step": 8920 }, { "epoch": 0.4571984435797665, "grad_norm": 0.24000655114650726, "learning_rate": 0.0015, "loss": 1.2637, "step": 8930 }, { "epoch": 0.4577104239197215, "grad_norm": 0.22707650065422058, "learning_rate": 0.0015, "loss": 1.2651, "step": 8940 }, { "epoch": 0.4582224042596764, "grad_norm": 0.24544113874435425, "learning_rate": 0.0015, "loss": 1.2597, "step": 8950 }, { "epoch": 0.4587343845996314, "grad_norm": 0.2471536099910736, "learning_rate": 0.0015, "loss": 1.2583, "step": 8960 }, { "epoch": 0.4592463649395863, "grad_norm": 0.2399998903274536, "learning_rate": 0.0015, "loss": 1.2587, "step": 8970 }, { "epoch": 0.4597583452795413, "grad_norm": 0.239053875207901, "learning_rate": 0.0015, "loss": 1.2604, "step": 8980 }, { "epoch": 0.4602703256194962, "grad_norm": 0.23578478395938873, "learning_rate": 0.0015, "loss": 1.251, "step": 8990 }, { "epoch": 0.46078230595945113, "grad_norm": 0.22768492996692657, "learning_rate": 0.0015, "loss": 1.2584, "step": 9000 }, { "epoch": 0.4612942862994061, "grad_norm": 0.2407897710800171, "learning_rate": 0.0015, "loss": 1.2551, "step": 9010 }, { "epoch": 0.46180626663936103, "grad_norm": 0.24113765358924866, "learning_rate": 0.0015, "loss": 1.2686, "step": 9020 }, { "epoch": 0.462318246979316, "grad_norm": 0.23086939752101898, "learning_rate": 0.0015, "loss": 1.2521, "step": 9030 }, { "epoch": 0.4628302273192709, "grad_norm": 0.2428579032421112, "learning_rate": 0.0015, "loss": 1.2539, "step": 9040 }, { "epoch": 0.4633422076592259, "grad_norm": 0.23166462779045105, "learning_rate": 0.0015, "loss": 1.2452, "step": 9050 }, { "epoch": 0.4638541879991808, "grad_norm": 0.23648124933242798, "learning_rate": 0.0015, "loss": 1.2522, "step": 9060 }, { "epoch": 0.4643661683391358, "grad_norm": 0.23984448611736298, "learning_rate": 0.0015, "loss": 1.2556, "step": 9070 }, { "epoch": 0.4648781486790907, "grad_norm": 0.22623547911643982, "learning_rate": 0.0015, "loss": 1.2496, "step": 9080 }, { "epoch": 0.4653901290190457, "grad_norm": 0.23154547810554504, "learning_rate": 0.0015, "loss": 1.2688, "step": 9090 }, { "epoch": 0.4659021093590006, "grad_norm": 0.24457304179668427, "learning_rate": 0.0015, "loss": 1.2457, "step": 9100 }, { "epoch": 0.46641408969895554, "grad_norm": 0.22743169963359833, "learning_rate": 0.0015, "loss": 1.2533, "step": 9110 }, { "epoch": 0.4669260700389105, "grad_norm": 0.23356840014457703, "learning_rate": 0.0015, "loss": 1.2529, "step": 9120 }, { "epoch": 0.46743805037886543, "grad_norm": 0.23355025053024292, "learning_rate": 0.0015, "loss": 1.2595, "step": 9130 }, { "epoch": 0.4679500307188204, "grad_norm": 0.21895302832126617, "learning_rate": 0.0015, "loss": 1.2613, "step": 9140 }, { "epoch": 0.46846201105877533, "grad_norm": 0.23437921702861786, "learning_rate": 0.0015, "loss": 1.2631, "step": 9150 }, { "epoch": 0.4689739913987303, "grad_norm": 0.22628231346607208, "learning_rate": 0.0015, "loss": 1.2634, "step": 9160 }, { "epoch": 0.4694859717386852, "grad_norm": 0.2286689728498459, "learning_rate": 0.0015, "loss": 1.2412, "step": 9170 }, { "epoch": 0.4699979520786402, "grad_norm": 0.21830707788467407, "learning_rate": 0.0015, "loss": 1.2714, "step": 9180 }, { "epoch": 0.4705099324185951, "grad_norm": 0.2502080500125885, "learning_rate": 0.0015, "loss": 1.2419, "step": 9190 }, { "epoch": 0.4710219127585501, "grad_norm": 0.21958868205547333, "learning_rate": 0.0015, "loss": 1.2406, "step": 9200 }, { "epoch": 0.471533893098505, "grad_norm": 0.22988547384738922, "learning_rate": 0.0015, "loss": 1.2802, "step": 9210 }, { "epoch": 0.47204587343845994, "grad_norm": 0.22131182253360748, "learning_rate": 0.0015, "loss": 1.2496, "step": 9220 }, { "epoch": 0.4725578537784149, "grad_norm": 0.24254952371120453, "learning_rate": 0.0015, "loss": 1.2702, "step": 9230 }, { "epoch": 0.47306983411836984, "grad_norm": 0.22780196368694305, "learning_rate": 0.0015, "loss": 1.2452, "step": 9240 }, { "epoch": 0.4735818144583248, "grad_norm": 0.22993087768554688, "learning_rate": 0.0015, "loss": 1.2475, "step": 9250 }, { "epoch": 0.47409379479827973, "grad_norm": 0.21792259812355042, "learning_rate": 0.0015, "loss": 1.2532, "step": 9260 }, { "epoch": 0.4746057751382347, "grad_norm": 0.22392146289348602, "learning_rate": 0.0015, "loss": 1.2451, "step": 9270 }, { "epoch": 0.47511775547818963, "grad_norm": 0.24879144132137299, "learning_rate": 0.0015, "loss": 1.2492, "step": 9280 }, { "epoch": 0.4756297358181446, "grad_norm": 0.21757066249847412, "learning_rate": 0.0015, "loss": 1.2508, "step": 9290 }, { "epoch": 0.4761417161580995, "grad_norm": 0.23313356935977936, "learning_rate": 0.0015, "loss": 1.2532, "step": 9300 }, { "epoch": 0.4766536964980545, "grad_norm": 0.25208523869514465, "learning_rate": 0.0015, "loss": 1.2286, "step": 9310 }, { "epoch": 0.4771656768380094, "grad_norm": 0.2262171059846878, "learning_rate": 0.0015, "loss": 1.2398, "step": 9320 }, { "epoch": 0.47767765717796434, "grad_norm": 0.2252594530582428, "learning_rate": 0.0015, "loss": 1.2525, "step": 9330 }, { "epoch": 0.4781896375179193, "grad_norm": 0.2281142771244049, "learning_rate": 0.0015, "loss": 1.2453, "step": 9340 }, { "epoch": 0.47870161785787424, "grad_norm": 0.22341011464595795, "learning_rate": 0.0015, "loss": 1.2628, "step": 9350 }, { "epoch": 0.4792135981978292, "grad_norm": 0.22117526829242706, "learning_rate": 0.0015, "loss": 1.2597, "step": 9360 }, { "epoch": 0.47972557853778414, "grad_norm": 0.2359929233789444, "learning_rate": 0.0015, "loss": 1.2504, "step": 9370 }, { "epoch": 0.4802375588777391, "grad_norm": 0.2348971962928772, "learning_rate": 0.0015, "loss": 1.2352, "step": 9380 }, { "epoch": 0.48074953921769403, "grad_norm": 0.23461927473545074, "learning_rate": 0.0015, "loss": 1.2383, "step": 9390 }, { "epoch": 0.481261519557649, "grad_norm": 0.2463158220052719, "learning_rate": 0.0015, "loss": 1.2329, "step": 9400 }, { "epoch": 0.48177349989760393, "grad_norm": 0.240493506193161, "learning_rate": 0.0015, "loss": 1.2614, "step": 9410 }, { "epoch": 0.48228548023755885, "grad_norm": 0.22357292473316193, "learning_rate": 0.0015, "loss": 1.2553, "step": 9420 }, { "epoch": 0.4827974605775138, "grad_norm": 0.2223501205444336, "learning_rate": 0.0015, "loss": 1.245, "step": 9430 }, { "epoch": 0.48330944091746875, "grad_norm": 0.2278713434934616, "learning_rate": 0.0015, "loss": 1.2544, "step": 9440 }, { "epoch": 0.4838214212574237, "grad_norm": 0.23052051663398743, "learning_rate": 0.0015, "loss": 1.2614, "step": 9450 }, { "epoch": 0.48433340159737864, "grad_norm": 0.22685429453849792, "learning_rate": 0.0015, "loss": 1.2613, "step": 9460 }, { "epoch": 0.4848453819373336, "grad_norm": 0.22306014597415924, "learning_rate": 0.0015, "loss": 1.2289, "step": 9470 }, { "epoch": 0.48535736227728854, "grad_norm": 0.22385765612125397, "learning_rate": 0.0015, "loss": 1.2452, "step": 9480 }, { "epoch": 0.4858693426172435, "grad_norm": 0.22245322167873383, "learning_rate": 0.0015, "loss": 1.2541, "step": 9490 }, { "epoch": 0.48638132295719844, "grad_norm": 0.2279806137084961, "learning_rate": 0.0015, "loss": 1.2557, "step": 9500 }, { "epoch": 0.4868933032971534, "grad_norm": 0.2449760138988495, "learning_rate": 0.0015, "loss": 1.2358, "step": 9510 }, { "epoch": 0.48740528363710833, "grad_norm": 0.22621648013591766, "learning_rate": 0.0015, "loss": 1.2466, "step": 9520 }, { "epoch": 0.48791726397706325, "grad_norm": 0.22223225235939026, "learning_rate": 0.0015, "loss": 1.2522, "step": 9530 }, { "epoch": 0.48842924431701823, "grad_norm": 0.23512163758277893, "learning_rate": 0.0015, "loss": 1.2542, "step": 9540 }, { "epoch": 0.48894122465697315, "grad_norm": 0.21729685366153717, "learning_rate": 0.0015, "loss": 1.224, "step": 9550 }, { "epoch": 0.4894532049969281, "grad_norm": 0.22177568078041077, "learning_rate": 0.0015, "loss": 1.2624, "step": 9560 }, { "epoch": 0.48996518533688305, "grad_norm": 0.22674211859703064, "learning_rate": 0.0015, "loss": 1.2191, "step": 9570 }, { "epoch": 0.490477165676838, "grad_norm": 0.25243934988975525, "learning_rate": 0.0015, "loss": 1.2327, "step": 9580 }, { "epoch": 0.49098914601679294, "grad_norm": 0.22206014394760132, "learning_rate": 0.0015, "loss": 1.2369, "step": 9590 }, { "epoch": 0.4915011263567479, "grad_norm": 0.21915268898010254, "learning_rate": 0.0015, "loss": 1.2475, "step": 9600 }, { "epoch": 0.49201310669670284, "grad_norm": 0.219084694981575, "learning_rate": 0.0015, "loss": 1.2469, "step": 9610 }, { "epoch": 0.4925250870366578, "grad_norm": 0.21210044622421265, "learning_rate": 0.0015, "loss": 1.2385, "step": 9620 }, { "epoch": 0.49303706737661274, "grad_norm": 0.22252093255519867, "learning_rate": 0.0015, "loss": 1.2652, "step": 9630 }, { "epoch": 0.49354904771656766, "grad_norm": 0.2407660186290741, "learning_rate": 0.0015, "loss": 1.2436, "step": 9640 }, { "epoch": 0.49406102805652263, "grad_norm": 0.22691743075847626, "learning_rate": 0.0015, "loss": 1.2254, "step": 9650 }, { "epoch": 0.49457300839647755, "grad_norm": 0.23666201531887054, "learning_rate": 0.0015, "loss": 1.2297, "step": 9660 }, { "epoch": 0.49508498873643253, "grad_norm": 0.21549946069717407, "learning_rate": 0.0015, "loss": 1.238, "step": 9670 }, { "epoch": 0.49559696907638745, "grad_norm": 0.22083760797977448, "learning_rate": 0.0015, "loss": 1.2531, "step": 9680 }, { "epoch": 0.4961089494163424, "grad_norm": 0.23391181230545044, "learning_rate": 0.0015, "loss": 1.1973, "step": 9690 }, { "epoch": 0.49662092975629735, "grad_norm": 0.21990463137626648, "learning_rate": 0.0015, "loss": 1.2357, "step": 9700 }, { "epoch": 0.4971329100962523, "grad_norm": 0.22842243313789368, "learning_rate": 0.0015, "loss": 1.2566, "step": 9710 }, { "epoch": 0.49764489043620724, "grad_norm": 0.2154964953660965, "learning_rate": 0.0015, "loss": 1.2489, "step": 9720 }, { "epoch": 0.4981568707761622, "grad_norm": 0.23381535708904266, "learning_rate": 0.0015, "loss": 1.2379, "step": 9730 }, { "epoch": 0.49866885111611714, "grad_norm": 0.23405200242996216, "learning_rate": 0.0015, "loss": 1.251, "step": 9740 }, { "epoch": 0.49918083145607206, "grad_norm": 0.24905334413051605, "learning_rate": 0.0015, "loss": 1.2247, "step": 9750 }, { "epoch": 0.49969281179602704, "grad_norm": 0.22687901556491852, "learning_rate": 0.0015, "loss": 1.2362, "step": 9760 }, { "epoch": 0.500204792135982, "grad_norm": 0.21950958669185638, "learning_rate": 0.0015, "loss": 1.2304, "step": 9770 }, { "epoch": 0.5007167724759369, "grad_norm": 0.24343635141849518, "learning_rate": 0.0015, "loss": 1.2313, "step": 9780 }, { "epoch": 0.5012287528158919, "grad_norm": 0.2238016575574875, "learning_rate": 0.0015, "loss": 1.2504, "step": 9790 }, { "epoch": 0.5017407331558468, "grad_norm": 0.22162608802318573, "learning_rate": 0.0015, "loss": 1.2242, "step": 9800 }, { "epoch": 0.5022527134958018, "grad_norm": 0.2090781331062317, "learning_rate": 0.0015, "loss": 1.2214, "step": 9810 }, { "epoch": 0.5027646938357567, "grad_norm": 0.23861265182495117, "learning_rate": 0.0015, "loss": 1.2554, "step": 9820 }, { "epoch": 0.5032766741757116, "grad_norm": 0.24569468200206757, "learning_rate": 0.0015, "loss": 1.2525, "step": 9830 }, { "epoch": 0.5037886545156666, "grad_norm": 0.22713309526443481, "learning_rate": 0.0015, "loss": 1.2513, "step": 9840 }, { "epoch": 0.5043006348556216, "grad_norm": 0.22980822622776031, "learning_rate": 0.0015, "loss": 1.2493, "step": 9850 }, { "epoch": 0.5048126151955765, "grad_norm": 0.23609554767608643, "learning_rate": 0.0015, "loss": 1.2366, "step": 9860 }, { "epoch": 0.5053245955355314, "grad_norm": 0.2115827053785324, "learning_rate": 0.0015, "loss": 1.2558, "step": 9870 }, { "epoch": 0.5058365758754864, "grad_norm": 0.20506598055362701, "learning_rate": 0.0015, "loss": 1.2421, "step": 9880 }, { "epoch": 0.5063485562154413, "grad_norm": 0.21842671930789948, "learning_rate": 0.0015, "loss": 1.2328, "step": 9890 }, { "epoch": 0.5068605365553963, "grad_norm": 0.2390349954366684, "learning_rate": 0.0015, "loss": 1.2494, "step": 9900 }, { "epoch": 0.5073725168953512, "grad_norm": 0.21842844784259796, "learning_rate": 0.0015, "loss": 1.243, "step": 9910 }, { "epoch": 0.5078844972353062, "grad_norm": 0.21210695803165436, "learning_rate": 0.0015, "loss": 1.2438, "step": 9920 }, { "epoch": 0.5083964775752611, "grad_norm": 0.21826642751693726, "learning_rate": 0.0015, "loss": 1.2402, "step": 9930 }, { "epoch": 0.5089084579152161, "grad_norm": 0.21249307692050934, "learning_rate": 0.0015, "loss": 1.2168, "step": 9940 }, { "epoch": 0.509420438255171, "grad_norm": 0.22593854367733002, "learning_rate": 0.0015, "loss": 1.222, "step": 9950 }, { "epoch": 0.509932418595126, "grad_norm": 0.22972868382930756, "learning_rate": 0.0015, "loss": 1.2577, "step": 9960 }, { "epoch": 0.5104443989350809, "grad_norm": 0.21808108687400818, "learning_rate": 0.0015, "loss": 1.2301, "step": 9970 }, { "epoch": 0.5109563792750358, "grad_norm": 0.21525093913078308, "learning_rate": 0.0015, "loss": 1.2412, "step": 9980 }, { "epoch": 0.5114683596149908, "grad_norm": 0.22222475707530975, "learning_rate": 0.0015, "loss": 1.237, "step": 9990 }, { "epoch": 0.5119803399549457, "grad_norm": 0.23491185903549194, "learning_rate": 0.0015, "loss": 1.2436, "step": 10000 }, { "epoch": 0.5124923202949007, "grad_norm": 0.23327389359474182, "learning_rate": 0.0015, "loss": 1.223, "step": 10010 }, { "epoch": 0.5130043006348556, "grad_norm": 0.21225926280021667, "learning_rate": 0.0015, "loss": 1.2215, "step": 10020 }, { "epoch": 0.5135162809748106, "grad_norm": 0.21181495487689972, "learning_rate": 0.0015, "loss": 1.2297, "step": 10030 }, { "epoch": 0.5140282613147655, "grad_norm": 0.21177121996879578, "learning_rate": 0.0015, "loss": 1.2228, "step": 10040 }, { "epoch": 0.5145402416547205, "grad_norm": 0.22206859290599823, "learning_rate": 0.0015, "loss": 1.2579, "step": 10050 }, { "epoch": 0.5150522219946754, "grad_norm": 0.21502964198589325, "learning_rate": 0.0015, "loss": 1.2298, "step": 10060 }, { "epoch": 0.5155642023346303, "grad_norm": 0.22302408516407013, "learning_rate": 0.0015, "loss": 1.2226, "step": 10070 }, { "epoch": 0.5160761826745853, "grad_norm": 0.21490171551704407, "learning_rate": 0.0015, "loss": 1.2554, "step": 10080 }, { "epoch": 0.5165881630145402, "grad_norm": 0.22137999534606934, "learning_rate": 0.0015, "loss": 1.2189, "step": 10090 }, { "epoch": 0.5171001433544952, "grad_norm": 0.21363165974617004, "learning_rate": 0.0015, "loss": 1.2533, "step": 10100 }, { "epoch": 0.5176121236944501, "grad_norm": 0.23033399879932404, "learning_rate": 0.0015, "loss": 1.2406, "step": 10110 }, { "epoch": 0.5181241040344051, "grad_norm": 0.22692923247814178, "learning_rate": 0.0015, "loss": 1.2294, "step": 10120 }, { "epoch": 0.51863608437436, "grad_norm": 0.23053601384162903, "learning_rate": 0.0015, "loss": 1.2351, "step": 10130 }, { "epoch": 0.519148064714315, "grad_norm": 0.21180744469165802, "learning_rate": 0.0015, "loss": 1.2518, "step": 10140 }, { "epoch": 0.5196600450542699, "grad_norm": 0.2388363927602768, "learning_rate": 0.0015, "loss": 1.2188, "step": 10150 }, { "epoch": 0.5201720253942249, "grad_norm": 0.22531351447105408, "learning_rate": 0.0015, "loss": 1.2242, "step": 10160 }, { "epoch": 0.5206840057341798, "grad_norm": 0.2166026532649994, "learning_rate": 0.0015, "loss": 1.2122, "step": 10170 }, { "epoch": 0.5211959860741348, "grad_norm": 0.23231609165668488, "learning_rate": 0.0015, "loss": 1.2078, "step": 10180 }, { "epoch": 0.5217079664140897, "grad_norm": 0.2189248949289322, "learning_rate": 0.0015, "loss": 1.2392, "step": 10190 }, { "epoch": 0.5222199467540446, "grad_norm": 0.21036341786384583, "learning_rate": 0.0015, "loss": 1.2325, "step": 10200 }, { "epoch": 0.5227319270939996, "grad_norm": 0.21162335574626923, "learning_rate": 0.0015, "loss": 1.2348, "step": 10210 }, { "epoch": 0.5232439074339545, "grad_norm": 0.21558861434459686, "learning_rate": 0.0015, "loss": 1.2343, "step": 10220 }, { "epoch": 0.5237558877739095, "grad_norm": 0.22100234031677246, "learning_rate": 0.0015, "loss": 1.2373, "step": 10230 }, { "epoch": 0.5242678681138644, "grad_norm": 0.225110724568367, "learning_rate": 0.0015, "loss": 1.2368, "step": 10240 }, { "epoch": 0.5247798484538194, "grad_norm": 0.21674303710460663, "learning_rate": 0.0015, "loss": 1.2365, "step": 10250 }, { "epoch": 0.5252918287937743, "grad_norm": 0.23076364398002625, "learning_rate": 0.0015, "loss": 1.2202, "step": 10260 }, { "epoch": 0.5258038091337293, "grad_norm": 0.23180685937404633, "learning_rate": 0.0015, "loss": 1.234, "step": 10270 }, { "epoch": 0.5263157894736842, "grad_norm": 0.21580268442630768, "learning_rate": 0.0015, "loss": 1.2372, "step": 10280 }, { "epoch": 0.5268277698136391, "grad_norm": 0.2099384069442749, "learning_rate": 0.0015, "loss": 1.2118, "step": 10290 }, { "epoch": 0.5273397501535941, "grad_norm": 0.23586790263652802, "learning_rate": 0.0015, "loss": 1.2482, "step": 10300 }, { "epoch": 0.527851730493549, "grad_norm": 0.2149907946586609, "learning_rate": 0.0015, "loss": 1.2469, "step": 10310 }, { "epoch": 0.528363710833504, "grad_norm": 0.21271546185016632, "learning_rate": 0.0015, "loss": 1.2325, "step": 10320 }, { "epoch": 0.5288756911734589, "grad_norm": 0.20998185873031616, "learning_rate": 0.0015, "loss": 1.247, "step": 10330 }, { "epoch": 0.5293876715134139, "grad_norm": 0.23234112560749054, "learning_rate": 0.0015, "loss": 1.2395, "step": 10340 }, { "epoch": 0.5298996518533688, "grad_norm": 0.2261328250169754, "learning_rate": 0.0015, "loss": 1.2244, "step": 10350 }, { "epoch": 0.5304116321933238, "grad_norm": 0.2102995663881302, "learning_rate": 0.0015, "loss": 1.2307, "step": 10360 }, { "epoch": 0.5309236125332787, "grad_norm": 0.21107365190982819, "learning_rate": 0.0015, "loss": 1.2195, "step": 10370 }, { "epoch": 0.5314355928732337, "grad_norm": 0.2249820977449417, "learning_rate": 0.0015, "loss": 1.2499, "step": 10380 }, { "epoch": 0.5319475732131886, "grad_norm": 0.2142641544342041, "learning_rate": 0.0015, "loss": 1.2329, "step": 10390 }, { "epoch": 0.5324595535531436, "grad_norm": 0.2172004133462906, "learning_rate": 0.0015, "loss": 1.2098, "step": 10400 }, { "epoch": 0.5329715338930985, "grad_norm": 0.19984416663646698, "learning_rate": 0.0015, "loss": 1.2135, "step": 10410 }, { "epoch": 0.5334835142330534, "grad_norm": 0.22618216276168823, "learning_rate": 0.0015, "loss": 1.2173, "step": 10420 }, { "epoch": 0.5339954945730084, "grad_norm": 0.22356146574020386, "learning_rate": 0.0015, "loss": 1.2423, "step": 10430 }, { "epoch": 0.5345074749129634, "grad_norm": 0.2300511598587036, "learning_rate": 0.0015, "loss": 1.2308, "step": 10440 }, { "epoch": 0.5350194552529183, "grad_norm": 0.22442519664764404, "learning_rate": 0.0015, "loss": 1.2435, "step": 10450 }, { "epoch": 0.5355314355928732, "grad_norm": 0.21556325256824493, "learning_rate": 0.0015, "loss": 1.2499, "step": 10460 }, { "epoch": 0.5360434159328282, "grad_norm": 0.21608006954193115, "learning_rate": 0.0015, "loss": 1.2367, "step": 10470 }, { "epoch": 0.5365553962727831, "grad_norm": 0.22256320714950562, "learning_rate": 0.0015, "loss": 1.2325, "step": 10480 }, { "epoch": 0.5370673766127381, "grad_norm": 0.22661398351192474, "learning_rate": 0.0015, "loss": 1.2253, "step": 10490 }, { "epoch": 0.537579356952693, "grad_norm": 0.21327906847000122, "learning_rate": 0.0015, "loss": 1.215, "step": 10500 }, { "epoch": 0.5380913372926479, "grad_norm": 0.21695594489574432, "learning_rate": 0.0015, "loss": 1.2372, "step": 10510 }, { "epoch": 0.5386033176326029, "grad_norm": 0.20584948360919952, "learning_rate": 0.0015, "loss": 1.2491, "step": 10520 }, { "epoch": 0.5391152979725579, "grad_norm": 0.2212359756231308, "learning_rate": 0.0015, "loss": 1.2415, "step": 10530 }, { "epoch": 0.5396272783125128, "grad_norm": 0.2696838974952698, "learning_rate": 0.0015, "loss": 1.2254, "step": 10540 }, { "epoch": 0.5401392586524677, "grad_norm": 0.21417804062366486, "learning_rate": 0.0015, "loss": 1.2307, "step": 10550 }, { "epoch": 0.5406512389924227, "grad_norm": 0.2126997709274292, "learning_rate": 0.0015, "loss": 1.2134, "step": 10560 }, { "epoch": 0.5411632193323777, "grad_norm": 0.21690891683101654, "learning_rate": 0.0015, "loss": 1.2136, "step": 10570 }, { "epoch": 0.5416751996723326, "grad_norm": 0.21153941750526428, "learning_rate": 0.0015, "loss": 1.2157, "step": 10580 }, { "epoch": 0.5421871800122875, "grad_norm": 0.21089473366737366, "learning_rate": 0.0015, "loss": 1.2272, "step": 10590 }, { "epoch": 0.5426991603522425, "grad_norm": 0.2564721703529358, "learning_rate": 0.0015, "loss": 1.2026, "step": 10600 }, { "epoch": 0.5432111406921974, "grad_norm": 0.2235645204782486, "learning_rate": 0.0015, "loss": 1.2373, "step": 10610 }, { "epoch": 0.5437231210321524, "grad_norm": 0.21624423563480377, "learning_rate": 0.0015, "loss": 1.2208, "step": 10620 }, { "epoch": 0.5442351013721073, "grad_norm": 0.22423268854618073, "learning_rate": 0.0015, "loss": 1.2246, "step": 10630 }, { "epoch": 0.5447470817120622, "grad_norm": 0.20781590044498444, "learning_rate": 0.0015, "loss": 1.2197, "step": 10640 }, { "epoch": 0.5452590620520172, "grad_norm": 0.21837033331394196, "learning_rate": 0.0015, "loss": 1.2195, "step": 10650 }, { "epoch": 0.5457710423919722, "grad_norm": 0.23481489717960358, "learning_rate": 0.0015, "loss": 1.2221, "step": 10660 }, { "epoch": 0.5462830227319271, "grad_norm": 0.20522017776966095, "learning_rate": 0.0015, "loss": 1.2119, "step": 10670 }, { "epoch": 0.546795003071882, "grad_norm": 0.24082933366298676, "learning_rate": 0.0015, "loss": 1.2115, "step": 10680 }, { "epoch": 0.547306983411837, "grad_norm": 0.21289277076721191, "learning_rate": 0.0015, "loss": 1.2386, "step": 10690 }, { "epoch": 0.547818963751792, "grad_norm": 0.21003836393356323, "learning_rate": 0.0015, "loss": 1.2107, "step": 10700 }, { "epoch": 0.5483309440917469, "grad_norm": 0.21242666244506836, "learning_rate": 0.0015, "loss": 1.2429, "step": 10710 }, { "epoch": 0.5488429244317018, "grad_norm": 0.2271721065044403, "learning_rate": 0.0015, "loss": 1.2314, "step": 10720 }, { "epoch": 0.5493549047716567, "grad_norm": 0.21104945242404938, "learning_rate": 0.0015, "loss": 1.2342, "step": 10730 }, { "epoch": 0.5498668851116117, "grad_norm": 0.2085346132516861, "learning_rate": 0.0015, "loss": 1.2271, "step": 10740 }, { "epoch": 0.5503788654515667, "grad_norm": 0.22231942415237427, "learning_rate": 0.0015, "loss": 1.2306, "step": 10750 }, { "epoch": 0.5508908457915216, "grad_norm": 0.21245570480823517, "learning_rate": 0.0015, "loss": 1.2258, "step": 10760 }, { "epoch": 0.5514028261314765, "grad_norm": 0.19826675951480865, "learning_rate": 0.0015, "loss": 1.2163, "step": 10770 }, { "epoch": 0.5519148064714315, "grad_norm": 0.22163072228431702, "learning_rate": 0.0015, "loss": 1.229, "step": 10780 }, { "epoch": 0.5524267868113865, "grad_norm": 0.21903766691684723, "learning_rate": 0.0015, "loss": 1.2139, "step": 10790 }, { "epoch": 0.5529387671513414, "grad_norm": 0.2075222283601761, "learning_rate": 0.0015, "loss": 1.2129, "step": 10800 }, { "epoch": 0.5534507474912963, "grad_norm": 0.21938522160053253, "learning_rate": 0.0015, "loss": 1.2232, "step": 10810 }, { "epoch": 0.5539627278312513, "grad_norm": 0.21770595014095306, "learning_rate": 0.0015, "loss": 1.2465, "step": 10820 }, { "epoch": 0.5544747081712063, "grad_norm": 0.20712700486183167, "learning_rate": 0.0015, "loss": 1.2183, "step": 10830 }, { "epoch": 0.5549866885111612, "grad_norm": 0.22477000951766968, "learning_rate": 0.0015, "loss": 1.2186, "step": 10840 }, { "epoch": 0.5554986688511161, "grad_norm": 0.21939463913440704, "learning_rate": 0.0015, "loss": 1.2355, "step": 10850 }, { "epoch": 0.556010649191071, "grad_norm": 0.2524956464767456, "learning_rate": 0.0015, "loss": 1.2092, "step": 10860 }, { "epoch": 0.556522629531026, "grad_norm": 0.2115110456943512, "learning_rate": 0.0015, "loss": 1.2137, "step": 10870 }, { "epoch": 0.557034609870981, "grad_norm": 0.20509475469589233, "learning_rate": 0.0015, "loss": 1.2234, "step": 10880 }, { "epoch": 0.5575465902109359, "grad_norm": 0.21247826516628265, "learning_rate": 0.0015, "loss": 1.2234, "step": 10890 }, { "epoch": 0.5580585705508908, "grad_norm": 0.21064293384552002, "learning_rate": 0.0015, "loss": 1.2289, "step": 10900 }, { "epoch": 0.5585705508908458, "grad_norm": 0.21902692317962646, "learning_rate": 0.0015, "loss": 1.2085, "step": 10910 }, { "epoch": 0.5590825312308008, "grad_norm": 0.21347709000110626, "learning_rate": 0.0015, "loss": 1.2151, "step": 10920 }, { "epoch": 0.5595945115707557, "grad_norm": 0.20034797489643097, "learning_rate": 0.0015, "loss": 1.218, "step": 10930 }, { "epoch": 0.5601064919107106, "grad_norm": 0.20223546028137207, "learning_rate": 0.0015, "loss": 1.2176, "step": 10940 }, { "epoch": 0.5606184722506655, "grad_norm": 0.23771893978118896, "learning_rate": 0.0015, "loss": 1.2297, "step": 10950 }, { "epoch": 0.5611304525906206, "grad_norm": 0.24617038667201996, "learning_rate": 0.0015, "loss": 1.2331, "step": 10960 }, { "epoch": 0.5616424329305755, "grad_norm": 0.2169172167778015, "learning_rate": 0.0015, "loss": 1.2319, "step": 10970 }, { "epoch": 0.5621544132705304, "grad_norm": 0.21281367540359497, "learning_rate": 0.0015, "loss": 1.2205, "step": 10980 }, { "epoch": 0.5626663936104853, "grad_norm": 0.21705804765224457, "learning_rate": 0.0015, "loss": 1.2138, "step": 10990 }, { "epoch": 0.5631783739504403, "grad_norm": 0.19822140038013458, "learning_rate": 0.0015, "loss": 1.2339, "step": 11000 }, { "epoch": 0.5636903542903953, "grad_norm": 0.20427508652210236, "learning_rate": 0.0015, "loss": 1.2195, "step": 11010 }, { "epoch": 0.5642023346303502, "grad_norm": 0.2140669971704483, "learning_rate": 0.0015, "loss": 1.1975, "step": 11020 }, { "epoch": 0.5647143149703051, "grad_norm": 0.20858561992645264, "learning_rate": 0.0015, "loss": 1.208, "step": 11030 }, { "epoch": 0.5652262953102601, "grad_norm": 0.21723324060440063, "learning_rate": 0.0015, "loss": 1.2193, "step": 11040 }, { "epoch": 0.5657382756502151, "grad_norm": 0.21611307561397552, "learning_rate": 0.0015, "loss": 1.2199, "step": 11050 }, { "epoch": 0.56625025599017, "grad_norm": 0.21373584866523743, "learning_rate": 0.0015, "loss": 1.2065, "step": 11060 }, { "epoch": 0.5667622363301249, "grad_norm": 0.2058737874031067, "learning_rate": 0.0015, "loss": 1.2019, "step": 11070 }, { "epoch": 0.5672742166700798, "grad_norm": 0.22086186707019806, "learning_rate": 0.0015, "loss": 1.2108, "step": 11080 }, { "epoch": 0.5677861970100349, "grad_norm": 0.21599149703979492, "learning_rate": 0.0015, "loss": 1.209, "step": 11090 }, { "epoch": 0.5682981773499898, "grad_norm": 0.22241829335689545, "learning_rate": 0.0015, "loss": 1.2054, "step": 11100 }, { "epoch": 0.5688101576899447, "grad_norm": 0.19618919491767883, "learning_rate": 0.0015, "loss": 1.2293, "step": 11110 }, { "epoch": 0.5693221380298996, "grad_norm": 0.19986511766910553, "learning_rate": 0.0015, "loss": 1.1945, "step": 11120 }, { "epoch": 0.5698341183698546, "grad_norm": 0.20131878554821014, "learning_rate": 0.0015, "loss": 1.2082, "step": 11130 }, { "epoch": 0.5703460987098096, "grad_norm": 0.20655354857444763, "learning_rate": 0.0015, "loss": 1.2111, "step": 11140 }, { "epoch": 0.5708580790497645, "grad_norm": 0.2156609296798706, "learning_rate": 0.0015, "loss": 1.2288, "step": 11150 }, { "epoch": 0.5713700593897194, "grad_norm": 0.20367379486560822, "learning_rate": 0.0015, "loss": 1.2229, "step": 11160 }, { "epoch": 0.5718820397296743, "grad_norm": 0.20256848633289337, "learning_rate": 0.0015, "loss": 1.2236, "step": 11170 }, { "epoch": 0.5723940200696294, "grad_norm": 0.20862998068332672, "learning_rate": 0.0015, "loss": 1.2153, "step": 11180 }, { "epoch": 0.5729060004095843, "grad_norm": 0.21000482141971588, "learning_rate": 0.0015, "loss": 1.2164, "step": 11190 }, { "epoch": 0.5734179807495392, "grad_norm": 0.21778449416160583, "learning_rate": 0.0015, "loss": 1.2221, "step": 11200 }, { "epoch": 0.5739299610894941, "grad_norm": 0.20954222977161407, "learning_rate": 0.0015, "loss": 1.2257, "step": 11210 }, { "epoch": 0.5744419414294492, "grad_norm": 0.21105293929576874, "learning_rate": 0.0015, "loss": 1.2218, "step": 11220 }, { "epoch": 0.5749539217694041, "grad_norm": 0.2167726457118988, "learning_rate": 0.0015, "loss": 1.2193, "step": 11230 }, { "epoch": 0.575465902109359, "grad_norm": 0.20207858085632324, "learning_rate": 0.0015, "loss": 1.2243, "step": 11240 }, { "epoch": 0.5759778824493139, "grad_norm": 0.21475255489349365, "learning_rate": 0.0015, "loss": 1.2222, "step": 11250 }, { "epoch": 0.576489862789269, "grad_norm": 0.22506240010261536, "learning_rate": 0.0015, "loss": 1.2255, "step": 11260 }, { "epoch": 0.5770018431292239, "grad_norm": 0.23033161461353302, "learning_rate": 0.0015, "loss": 1.2287, "step": 11270 }, { "epoch": 0.5775138234691788, "grad_norm": 0.20455433428287506, "learning_rate": 0.0015, "loss": 1.2141, "step": 11280 }, { "epoch": 0.5780258038091337, "grad_norm": 0.22457818686962128, "learning_rate": 0.0015, "loss": 1.2329, "step": 11290 }, { "epoch": 0.5785377841490886, "grad_norm": 0.2011692076921463, "learning_rate": 0.0015, "loss": 1.213, "step": 11300 }, { "epoch": 0.5790497644890437, "grad_norm": 0.20488318800926208, "learning_rate": 0.0015, "loss": 1.2224, "step": 11310 }, { "epoch": 0.5795617448289986, "grad_norm": 0.22065885365009308, "learning_rate": 0.0015, "loss": 1.231, "step": 11320 }, { "epoch": 0.5800737251689535, "grad_norm": 0.20532485842704773, "learning_rate": 0.0015, "loss": 1.2051, "step": 11330 }, { "epoch": 0.5805857055089084, "grad_norm": 0.20642031729221344, "learning_rate": 0.0015, "loss": 1.215, "step": 11340 }, { "epoch": 0.5810976858488635, "grad_norm": 0.20660312473773956, "learning_rate": 0.0015, "loss": 1.2191, "step": 11350 }, { "epoch": 0.5816096661888184, "grad_norm": 0.21046073734760284, "learning_rate": 0.0015, "loss": 1.2142, "step": 11360 }, { "epoch": 0.5821216465287733, "grad_norm": 0.21846343576908112, "learning_rate": 0.0015, "loss": 1.2205, "step": 11370 }, { "epoch": 0.5826336268687282, "grad_norm": 0.20589517056941986, "learning_rate": 0.0015, "loss": 1.2057, "step": 11380 }, { "epoch": 0.5831456072086831, "grad_norm": 0.20691034197807312, "learning_rate": 0.0015, "loss": 1.2064, "step": 11390 }, { "epoch": 0.5836575875486382, "grad_norm": 0.21649305522441864, "learning_rate": 0.0015, "loss": 1.2032, "step": 11400 }, { "epoch": 0.5841695678885931, "grad_norm": 0.2329801321029663, "learning_rate": 0.0015, "loss": 1.2196, "step": 11410 }, { "epoch": 0.584681548228548, "grad_norm": 0.23256272077560425, "learning_rate": 0.0015, "loss": 1.2124, "step": 11420 }, { "epoch": 0.5851935285685029, "grad_norm": 0.2036832720041275, "learning_rate": 0.0015, "loss": 1.2098, "step": 11430 }, { "epoch": 0.585705508908458, "grad_norm": 0.21199576556682587, "learning_rate": 0.0015, "loss": 1.2266, "step": 11440 }, { "epoch": 0.5862174892484129, "grad_norm": 0.2015303373336792, "learning_rate": 0.0015, "loss": 1.1916, "step": 11450 }, { "epoch": 0.5867294695883678, "grad_norm": 0.2176617681980133, "learning_rate": 0.0015, "loss": 1.1888, "step": 11460 }, { "epoch": 0.5872414499283227, "grad_norm": 0.21515142917633057, "learning_rate": 0.0015, "loss": 1.2096, "step": 11470 }, { "epoch": 0.5877534302682776, "grad_norm": 0.21731404960155487, "learning_rate": 0.0015, "loss": 1.2077, "step": 11480 }, { "epoch": 0.5882654106082327, "grad_norm": 0.20664644241333008, "learning_rate": 0.0015, "loss": 1.2027, "step": 11490 }, { "epoch": 0.5887773909481876, "grad_norm": 0.20170624554157257, "learning_rate": 0.0015, "loss": 1.233, "step": 11500 }, { "epoch": 0.5892893712881425, "grad_norm": 0.2092912346124649, "learning_rate": 0.0015, "loss": 1.2004, "step": 11510 }, { "epoch": 0.5898013516280974, "grad_norm": 0.204396590590477, "learning_rate": 0.0015, "loss": 1.2052, "step": 11520 }, { "epoch": 0.5903133319680525, "grad_norm": 0.2075720578432083, "learning_rate": 0.0015, "loss": 1.2042, "step": 11530 }, { "epoch": 0.5908253123080074, "grad_norm": 0.19743815064430237, "learning_rate": 0.0015, "loss": 1.1974, "step": 11540 }, { "epoch": 0.5913372926479623, "grad_norm": 0.19972637295722961, "learning_rate": 0.0015, "loss": 1.2021, "step": 11550 }, { "epoch": 0.5918492729879172, "grad_norm": 0.20364214479923248, "learning_rate": 0.0015, "loss": 1.2149, "step": 11560 }, { "epoch": 0.5923612533278723, "grad_norm": 0.20440620183944702, "learning_rate": 0.0015, "loss": 1.1855, "step": 11570 }, { "epoch": 0.5928732336678272, "grad_norm": 0.21338412165641785, "learning_rate": 0.0015, "loss": 1.2022, "step": 11580 }, { "epoch": 0.5933852140077821, "grad_norm": 0.2067076861858368, "learning_rate": 0.0015, "loss": 1.2109, "step": 11590 }, { "epoch": 0.593897194347737, "grad_norm": 0.20598556101322174, "learning_rate": 0.0015, "loss": 1.2132, "step": 11600 }, { "epoch": 0.5944091746876919, "grad_norm": 0.21331733465194702, "learning_rate": 0.0015, "loss": 1.2021, "step": 11610 }, { "epoch": 0.594921155027647, "grad_norm": 0.23132279515266418, "learning_rate": 0.0015, "loss": 1.1954, "step": 11620 }, { "epoch": 0.5954331353676019, "grad_norm": 0.2226603478193283, "learning_rate": 0.0015, "loss": 1.2055, "step": 11630 }, { "epoch": 0.5959451157075568, "grad_norm": 0.19999723136425018, "learning_rate": 0.0015, "loss": 1.1961, "step": 11640 }, { "epoch": 0.5964570960475117, "grad_norm": 0.19226787984371185, "learning_rate": 0.0015, "loss": 1.2056, "step": 11650 }, { "epoch": 0.5969690763874668, "grad_norm": 0.20891976356506348, "learning_rate": 0.0015, "loss": 1.2023, "step": 11660 }, { "epoch": 0.5974810567274217, "grad_norm": 0.19218876957893372, "learning_rate": 0.0015, "loss": 1.2027, "step": 11670 }, { "epoch": 0.5979930370673766, "grad_norm": 0.20928075909614563, "learning_rate": 0.0015, "loss": 1.2176, "step": 11680 }, { "epoch": 0.5985050174073315, "grad_norm": 0.204718217253685, "learning_rate": 0.0015, "loss": 1.2014, "step": 11690 }, { "epoch": 0.5990169977472865, "grad_norm": 0.22869887948036194, "learning_rate": 0.0015, "loss": 1.1888, "step": 11700 }, { "epoch": 0.5995289780872415, "grad_norm": 0.19692908227443695, "learning_rate": 0.0015, "loss": 1.2161, "step": 11710 }, { "epoch": 0.6000409584271964, "grad_norm": 0.2099919617176056, "learning_rate": 0.0015, "loss": 1.1968, "step": 11720 }, { "epoch": 0.6005529387671513, "grad_norm": 0.20044675469398499, "learning_rate": 0.0015, "loss": 1.2071, "step": 11730 }, { "epoch": 0.6010649191071062, "grad_norm": 0.20645897090435028, "learning_rate": 0.0015, "loss": 1.2142, "step": 11740 }, { "epoch": 0.6015768994470613, "grad_norm": 0.20446518063545227, "learning_rate": 0.0015, "loss": 1.1907, "step": 11750 }, { "epoch": 0.6020888797870162, "grad_norm": 0.19793803989887238, "learning_rate": 0.0015, "loss": 1.2237, "step": 11760 }, { "epoch": 0.6026008601269711, "grad_norm": 0.23807552456855774, "learning_rate": 0.0015, "loss": 1.2072, "step": 11770 }, { "epoch": 0.603112840466926, "grad_norm": 0.20290285348892212, "learning_rate": 0.0015, "loss": 1.2048, "step": 11780 }, { "epoch": 0.6036248208068811, "grad_norm": 0.21725532412528992, "learning_rate": 0.0015, "loss": 1.1961, "step": 11790 }, { "epoch": 0.604136801146836, "grad_norm": 0.20467454195022583, "learning_rate": 0.0015, "loss": 1.2301, "step": 11800 }, { "epoch": 0.6046487814867909, "grad_norm": 0.20618268847465515, "learning_rate": 0.0015, "loss": 1.2026, "step": 11810 }, { "epoch": 0.6051607618267458, "grad_norm": 0.2097761183977127, "learning_rate": 0.0015, "loss": 1.1992, "step": 11820 }, { "epoch": 0.6056727421667008, "grad_norm": 0.21861404180526733, "learning_rate": 0.0015, "loss": 1.2047, "step": 11830 }, { "epoch": 0.6061847225066558, "grad_norm": 0.2066473513841629, "learning_rate": 0.0015, "loss": 1.2022, "step": 11840 }, { "epoch": 0.6066967028466107, "grad_norm": 0.203571155667305, "learning_rate": 0.0015, "loss": 1.1729, "step": 11850 }, { "epoch": 0.6072086831865656, "grad_norm": 0.20523090660572052, "learning_rate": 0.0015, "loss": 1.222, "step": 11860 }, { "epoch": 0.6077206635265205, "grad_norm": 0.2021731734275818, "learning_rate": 0.0015, "loss": 1.1983, "step": 11870 }, { "epoch": 0.6082326438664756, "grad_norm": 0.20643019676208496, "learning_rate": 0.0015, "loss": 1.2147, "step": 11880 }, { "epoch": 0.6087446242064305, "grad_norm": 0.21817174553871155, "learning_rate": 0.0015, "loss": 1.1988, "step": 11890 }, { "epoch": 0.6092566045463854, "grad_norm": 0.21849657595157623, "learning_rate": 0.0015, "loss": 1.1908, "step": 11900 }, { "epoch": 0.6097685848863403, "grad_norm": 0.21117383241653442, "learning_rate": 0.0015, "loss": 1.2318, "step": 11910 }, { "epoch": 0.6102805652262953, "grad_norm": 0.2120293378829956, "learning_rate": 0.0015, "loss": 1.2071, "step": 11920 }, { "epoch": 0.6107925455662503, "grad_norm": 0.20229868590831757, "learning_rate": 0.0015, "loss": 1.191, "step": 11930 }, { "epoch": 0.6113045259062052, "grad_norm": 0.19626636803150177, "learning_rate": 0.0015, "loss": 1.2172, "step": 11940 }, { "epoch": 0.6118165062461601, "grad_norm": 0.21968694031238556, "learning_rate": 0.0015, "loss": 1.1901, "step": 11950 }, { "epoch": 0.612328486586115, "grad_norm": 0.22982917726039886, "learning_rate": 0.0015, "loss": 1.2023, "step": 11960 }, { "epoch": 0.6128404669260701, "grad_norm": 0.20328094065189362, "learning_rate": 0.0015, "loss": 1.193, "step": 11970 }, { "epoch": 0.613352447266025, "grad_norm": 0.20781250298023224, "learning_rate": 0.0015, "loss": 1.1871, "step": 11980 }, { "epoch": 0.6138644276059799, "grad_norm": 0.1945171356201172, "learning_rate": 0.0015, "loss": 1.1954, "step": 11990 }, { "epoch": 0.6143764079459348, "grad_norm": 0.2018270492553711, "learning_rate": 0.0015, "loss": 1.1848, "step": 12000 }, { "epoch": 0.6148883882858899, "grad_norm": 0.20180918276309967, "learning_rate": 0.0015, "loss": 1.2081, "step": 12010 }, { "epoch": 0.6154003686258448, "grad_norm": 0.20221208035945892, "learning_rate": 0.0015, "loss": 1.2076, "step": 12020 }, { "epoch": 0.6159123489657997, "grad_norm": 0.2013401836156845, "learning_rate": 0.0015, "loss": 1.2211, "step": 12030 }, { "epoch": 0.6164243293057546, "grad_norm": 0.20016033947467804, "learning_rate": 0.0015, "loss": 1.2037, "step": 12040 }, { "epoch": 0.6169363096457096, "grad_norm": 0.20722372829914093, "learning_rate": 0.0015, "loss": 1.2052, "step": 12050 }, { "epoch": 0.6174482899856646, "grad_norm": 0.21285022795200348, "learning_rate": 0.0015, "loss": 1.2066, "step": 12060 }, { "epoch": 0.6179602703256195, "grad_norm": 0.21281997859477997, "learning_rate": 0.0015, "loss": 1.1955, "step": 12070 }, { "epoch": 0.6184722506655744, "grad_norm": 0.19675594568252563, "learning_rate": 0.0015, "loss": 1.2088, "step": 12080 }, { "epoch": 0.6189842310055294, "grad_norm": 0.21459296345710754, "learning_rate": 0.0015, "loss": 1.2255, "step": 12090 }, { "epoch": 0.6194962113454844, "grad_norm": 0.20511606335639954, "learning_rate": 0.0015, "loss": 1.2, "step": 12100 }, { "epoch": 0.6200081916854393, "grad_norm": 0.20228254795074463, "learning_rate": 0.0015, "loss": 1.1906, "step": 12110 }, { "epoch": 0.6205201720253942, "grad_norm": 0.1966087371110916, "learning_rate": 0.0015, "loss": 1.1771, "step": 12120 }, { "epoch": 0.6210321523653491, "grad_norm": 0.2050897479057312, "learning_rate": 0.0015, "loss": 1.1931, "step": 12130 }, { "epoch": 0.6215441327053041, "grad_norm": 0.20761296153068542, "learning_rate": 0.0015, "loss": 1.1796, "step": 12140 }, { "epoch": 0.6220561130452591, "grad_norm": 0.19282642006874084, "learning_rate": 0.0015, "loss": 1.2022, "step": 12150 }, { "epoch": 0.622568093385214, "grad_norm": 0.2018144577741623, "learning_rate": 0.0015, "loss": 1.2151, "step": 12160 }, { "epoch": 0.6230800737251689, "grad_norm": 0.19583159685134888, "learning_rate": 0.0015, "loss": 1.2027, "step": 12170 }, { "epoch": 0.6235920540651239, "grad_norm": 0.22334228456020355, "learning_rate": 0.0015, "loss": 1.2158, "step": 12180 }, { "epoch": 0.6241040344050789, "grad_norm": 0.2306404560804367, "learning_rate": 0.0015, "loss": 1.1856, "step": 12190 }, { "epoch": 0.6246160147450338, "grad_norm": 0.21355292201042175, "learning_rate": 0.0015, "loss": 1.1723, "step": 12200 }, { "epoch": 0.6251279950849887, "grad_norm": 0.19845044612884521, "learning_rate": 0.0015, "loss": 1.2052, "step": 12210 }, { "epoch": 0.6256399754249437, "grad_norm": 0.2062026709318161, "learning_rate": 0.0015, "loss": 1.2093, "step": 12220 }, { "epoch": 0.6261519557648987, "grad_norm": 0.20521892607212067, "learning_rate": 0.0015, "loss": 1.1888, "step": 12230 }, { "epoch": 0.6266639361048536, "grad_norm": 0.20746907591819763, "learning_rate": 0.0015, "loss": 1.2038, "step": 12240 }, { "epoch": 0.6271759164448085, "grad_norm": 0.19719459116458893, "learning_rate": 0.0015, "loss": 1.1995, "step": 12250 }, { "epoch": 0.6276878967847634, "grad_norm": 0.20681564509868622, "learning_rate": 0.0015, "loss": 1.2157, "step": 12260 }, { "epoch": 0.6281998771247184, "grad_norm": 0.20236019790172577, "learning_rate": 0.0015, "loss": 1.1859, "step": 12270 }, { "epoch": 0.6287118574646734, "grad_norm": 0.22654055058956146, "learning_rate": 0.0015, "loss": 1.1961, "step": 12280 }, { "epoch": 0.6292238378046283, "grad_norm": 0.1928294599056244, "learning_rate": 0.0015, "loss": 1.1932, "step": 12290 }, { "epoch": 0.6297358181445832, "grad_norm": 0.21249711513519287, "learning_rate": 0.0015, "loss": 1.2018, "step": 12300 }, { "epoch": 0.6302477984845382, "grad_norm": 0.19809094071388245, "learning_rate": 0.0015, "loss": 1.1806, "step": 12310 }, { "epoch": 0.6307597788244932, "grad_norm": 0.1965721845626831, "learning_rate": 0.0015, "loss": 1.1956, "step": 12320 }, { "epoch": 0.6312717591644481, "grad_norm": 0.20646794140338898, "learning_rate": 0.0015, "loss": 1.1907, "step": 12330 }, { "epoch": 0.631783739504403, "grad_norm": 0.19848330318927765, "learning_rate": 0.0015, "loss": 1.2049, "step": 12340 }, { "epoch": 0.632295719844358, "grad_norm": 0.19884952902793884, "learning_rate": 0.0015, "loss": 1.1886, "step": 12350 }, { "epoch": 0.6328077001843129, "grad_norm": 0.21490252017974854, "learning_rate": 0.0015, "loss": 1.2033, "step": 12360 }, { "epoch": 0.6333196805242679, "grad_norm": 0.21076445281505585, "learning_rate": 0.0015, "loss": 1.1725, "step": 12370 }, { "epoch": 0.6338316608642228, "grad_norm": 0.20743723213672638, "learning_rate": 0.0015, "loss": 1.2118, "step": 12380 }, { "epoch": 0.6343436412041777, "grad_norm": 0.2091572880744934, "learning_rate": 0.0015, "loss": 1.2058, "step": 12390 }, { "epoch": 0.6348556215441327, "grad_norm": 0.19593819975852966, "learning_rate": 0.0015, "loss": 1.1789, "step": 12400 }, { "epoch": 0.6353676018840877, "grad_norm": 0.21120460331439972, "learning_rate": 0.0015, "loss": 1.199, "step": 12410 }, { "epoch": 0.6358795822240426, "grad_norm": 0.19703616201877594, "learning_rate": 0.0015, "loss": 1.2062, "step": 12420 }, { "epoch": 0.6363915625639975, "grad_norm": 0.2228432148694992, "learning_rate": 0.0015, "loss": 1.2046, "step": 12430 }, { "epoch": 0.6369035429039525, "grad_norm": 0.19556592404842377, "learning_rate": 0.0015, "loss": 1.1958, "step": 12440 }, { "epoch": 0.6374155232439075, "grad_norm": 0.2118174135684967, "learning_rate": 0.0015, "loss": 1.2158, "step": 12450 }, { "epoch": 0.6379275035838624, "grad_norm": 0.19802866876125336, "learning_rate": 0.0015, "loss": 1.1889, "step": 12460 }, { "epoch": 0.6384394839238173, "grad_norm": 0.2045314460992813, "learning_rate": 0.0015, "loss": 1.2052, "step": 12470 }, { "epoch": 0.6389514642637723, "grad_norm": 0.20061345398426056, "learning_rate": 0.0015, "loss": 1.1859, "step": 12480 }, { "epoch": 0.6394634446037272, "grad_norm": 0.19872547686100006, "learning_rate": 0.0015, "loss": 1.2002, "step": 12490 }, { "epoch": 0.6399754249436822, "grad_norm": 0.2001519650220871, "learning_rate": 0.0015, "loss": 1.192, "step": 12500 }, { "epoch": 0.6404874052836371, "grad_norm": 0.20049947500228882, "learning_rate": 0.0015, "loss": 1.1919, "step": 12510 }, { "epoch": 0.640999385623592, "grad_norm": 0.20143716037273407, "learning_rate": 0.0015, "loss": 1.1821, "step": 12520 }, { "epoch": 0.641511365963547, "grad_norm": 0.19347570836544037, "learning_rate": 0.0015, "loss": 1.2135, "step": 12530 }, { "epoch": 0.642023346303502, "grad_norm": 0.19492658972740173, "learning_rate": 0.0015, "loss": 1.1891, "step": 12540 }, { "epoch": 0.6425353266434569, "grad_norm": 0.19527223706245422, "learning_rate": 0.0015, "loss": 1.2102, "step": 12550 }, { "epoch": 0.6430473069834118, "grad_norm": 0.1927892118692398, "learning_rate": 0.0015, "loss": 1.1714, "step": 12560 }, { "epoch": 0.6435592873233668, "grad_norm": 0.2009015530347824, "learning_rate": 0.0015, "loss": 1.2035, "step": 12570 }, { "epoch": 0.6440712676633217, "grad_norm": 0.21776844561100006, "learning_rate": 0.0015, "loss": 1.1777, "step": 12580 }, { "epoch": 0.6445832480032767, "grad_norm": 0.19154374301433563, "learning_rate": 0.0015, "loss": 1.1906, "step": 12590 }, { "epoch": 0.6450952283432316, "grad_norm": 0.19381144642829895, "learning_rate": 0.0015, "loss": 1.1778, "step": 12600 }, { "epoch": 0.6456072086831866, "grad_norm": 0.19017955660820007, "learning_rate": 0.0015, "loss": 1.1967, "step": 12610 }, { "epoch": 0.6461191890231415, "grad_norm": 0.21785299479961395, "learning_rate": 0.0015, "loss": 1.2088, "step": 12620 }, { "epoch": 0.6466311693630965, "grad_norm": 0.2039538025856018, "learning_rate": 0.0015, "loss": 1.1663, "step": 12630 }, { "epoch": 0.6471431497030514, "grad_norm": 0.19732427597045898, "learning_rate": 0.0015, "loss": 1.1913, "step": 12640 }, { "epoch": 0.6476551300430063, "grad_norm": 0.1911800503730774, "learning_rate": 0.0015, "loss": 1.2052, "step": 12650 }, { "epoch": 0.6481671103829613, "grad_norm": 0.19413244724273682, "learning_rate": 0.0015, "loss": 1.1804, "step": 12660 }, { "epoch": 0.6486790907229162, "grad_norm": 0.1838771104812622, "learning_rate": 0.0015, "loss": 1.1911, "step": 12670 }, { "epoch": 0.6491910710628712, "grad_norm": 0.1838536560535431, "learning_rate": 0.0015, "loss": 1.1991, "step": 12680 }, { "epoch": 0.6497030514028261, "grad_norm": 0.20453278720378876, "learning_rate": 0.0015, "loss": 1.1992, "step": 12690 }, { "epoch": 0.6502150317427811, "grad_norm": 0.21677398681640625, "learning_rate": 0.0015, "loss": 1.1811, "step": 12700 }, { "epoch": 0.650727012082736, "grad_norm": 0.19484928250312805, "learning_rate": 0.0015, "loss": 1.1924, "step": 12710 }, { "epoch": 0.651238992422691, "grad_norm": 0.1887393295764923, "learning_rate": 0.0015, "loss": 1.1978, "step": 12720 }, { "epoch": 0.6517509727626459, "grad_norm": 0.19239051640033722, "learning_rate": 0.0015, "loss": 1.2051, "step": 12730 }, { "epoch": 0.6522629531026009, "grad_norm": 0.20435065031051636, "learning_rate": 0.0015, "loss": 1.153, "step": 12740 }, { "epoch": 0.6527749334425558, "grad_norm": 0.2020270824432373, "learning_rate": 0.0015, "loss": 1.2096, "step": 12750 }, { "epoch": 0.6532869137825108, "grad_norm": 0.21720841526985168, "learning_rate": 0.0015, "loss": 1.1776, "step": 12760 }, { "epoch": 0.6537988941224657, "grad_norm": 0.19210828840732574, "learning_rate": 0.0015, "loss": 1.1894, "step": 12770 }, { "epoch": 0.6543108744624206, "grad_norm": 0.19044719636440277, "learning_rate": 0.0015, "loss": 1.1894, "step": 12780 }, { "epoch": 0.6548228548023756, "grad_norm": 0.20893365144729614, "learning_rate": 0.0015, "loss": 1.1916, "step": 12790 }, { "epoch": 0.6553348351423305, "grad_norm": 0.20288752019405365, "learning_rate": 0.0015, "loss": 1.2018, "step": 12800 }, { "epoch": 0.6558468154822855, "grad_norm": 0.1970445066690445, "learning_rate": 0.0015, "loss": 1.1728, "step": 12810 }, { "epoch": 0.6563587958222404, "grad_norm": 0.19928324222564697, "learning_rate": 0.0015, "loss": 1.1959, "step": 12820 }, { "epoch": 0.6568707761621954, "grad_norm": 0.1929846554994583, "learning_rate": 0.0015, "loss": 1.1885, "step": 12830 }, { "epoch": 0.6573827565021503, "grad_norm": 0.20633605122566223, "learning_rate": 0.0015, "loss": 1.2145, "step": 12840 }, { "epoch": 0.6578947368421053, "grad_norm": 0.19971442222595215, "learning_rate": 0.0015, "loss": 1.188, "step": 12850 }, { "epoch": 0.6584067171820602, "grad_norm": 0.18677356839179993, "learning_rate": 0.0015, "loss": 1.1943, "step": 12860 }, { "epoch": 0.6589186975220152, "grad_norm": 0.1940857172012329, "learning_rate": 0.0015, "loss": 1.1921, "step": 12870 }, { "epoch": 0.6594306778619701, "grad_norm": 0.20788009464740753, "learning_rate": 0.0015, "loss": 1.1922, "step": 12880 }, { "epoch": 0.659942658201925, "grad_norm": 0.20371931791305542, "learning_rate": 0.0015, "loss": 1.1963, "step": 12890 }, { "epoch": 0.66045463854188, "grad_norm": 0.19461549818515778, "learning_rate": 0.0015, "loss": 1.1639, "step": 12900 }, { "epoch": 0.6609666188818349, "grad_norm": 0.19904249906539917, "learning_rate": 0.0015, "loss": 1.1708, "step": 12910 }, { "epoch": 0.6614785992217899, "grad_norm": 0.2062397003173828, "learning_rate": 0.0015, "loss": 1.1937, "step": 12920 }, { "epoch": 0.6619905795617448, "grad_norm": 0.20642533898353577, "learning_rate": 0.0015, "loss": 1.1929, "step": 12930 }, { "epoch": 0.6625025599016998, "grad_norm": 0.19433195888996124, "learning_rate": 0.0015, "loss": 1.1886, "step": 12940 }, { "epoch": 0.6630145402416547, "grad_norm": 0.1951138973236084, "learning_rate": 0.0015, "loss": 1.1847, "step": 12950 }, { "epoch": 0.6635265205816097, "grad_norm": 0.19220565259456635, "learning_rate": 0.0015, "loss": 1.1847, "step": 12960 }, { "epoch": 0.6640385009215646, "grad_norm": 0.1887965053319931, "learning_rate": 0.0015, "loss": 1.1791, "step": 12970 }, { "epoch": 0.6645504812615196, "grad_norm": 0.18562547862529755, "learning_rate": 0.0015, "loss": 1.1677, "step": 12980 }, { "epoch": 0.6650624616014745, "grad_norm": 0.1826203167438507, "learning_rate": 0.0015, "loss": 1.1796, "step": 12990 }, { "epoch": 0.6655744419414295, "grad_norm": 0.18740873038768768, "learning_rate": 0.0015, "loss": 1.1797, "step": 13000 }, { "epoch": 0.6660864222813844, "grad_norm": 0.1979881227016449, "learning_rate": 0.0015, "loss": 1.198, "step": 13010 }, { "epoch": 0.6665984026213393, "grad_norm": 0.20608335733413696, "learning_rate": 0.0015, "loss": 1.1926, "step": 13020 }, { "epoch": 0.6671103829612943, "grad_norm": 0.21441541612148285, "learning_rate": 0.0015, "loss": 1.2049, "step": 13030 }, { "epoch": 0.6676223633012492, "grad_norm": 0.22678618133068085, "learning_rate": 0.0015, "loss": 1.1917, "step": 13040 }, { "epoch": 0.6681343436412042, "grad_norm": 0.19718590378761292, "learning_rate": 0.0015, "loss": 1.1968, "step": 13050 }, { "epoch": 0.6686463239811591, "grad_norm": 0.19607524573802948, "learning_rate": 0.0015, "loss": 1.1721, "step": 13060 }, { "epoch": 0.6691583043211141, "grad_norm": 0.19298435747623444, "learning_rate": 0.0015, "loss": 1.1979, "step": 13070 }, { "epoch": 0.669670284661069, "grad_norm": 0.19610482454299927, "learning_rate": 0.0015, "loss": 1.1919, "step": 13080 }, { "epoch": 0.670182265001024, "grad_norm": 0.19872240722179413, "learning_rate": 0.0015, "loss": 1.183, "step": 13090 }, { "epoch": 0.6706942453409789, "grad_norm": 0.1863928586244583, "learning_rate": 0.0015, "loss": 1.1868, "step": 13100 }, { "epoch": 0.6712062256809338, "grad_norm": 0.19495519995689392, "learning_rate": 0.0015, "loss": 1.2084, "step": 13110 }, { "epoch": 0.6717182060208888, "grad_norm": 0.19348977506160736, "learning_rate": 0.0015, "loss": 1.1981, "step": 13120 }, { "epoch": 0.6722301863608438, "grad_norm": 0.19418825209140778, "learning_rate": 0.0015, "loss": 1.2081, "step": 13130 }, { "epoch": 0.6727421667007987, "grad_norm": 0.19263537228107452, "learning_rate": 0.0015, "loss": 1.181, "step": 13140 }, { "epoch": 0.6732541470407536, "grad_norm": 0.19272197782993317, "learning_rate": 0.0015, "loss": 1.1908, "step": 13150 }, { "epoch": 0.6737661273807086, "grad_norm": 0.19103066623210907, "learning_rate": 0.0015, "loss": 1.164, "step": 13160 }, { "epoch": 0.6742781077206635, "grad_norm": 0.19996246695518494, "learning_rate": 0.0015, "loss": 1.1951, "step": 13170 }, { "epoch": 0.6747900880606185, "grad_norm": 0.2288653403520584, "learning_rate": 0.0015, "loss": 1.2188, "step": 13180 }, { "epoch": 0.6753020684005734, "grad_norm": 0.1978132575750351, "learning_rate": 0.0015, "loss": 1.177, "step": 13190 }, { "epoch": 0.6758140487405284, "grad_norm": 0.2042623907327652, "learning_rate": 0.0015, "loss": 1.1833, "step": 13200 }, { "epoch": 0.6763260290804833, "grad_norm": 0.1838945895433426, "learning_rate": 0.0015, "loss": 1.1638, "step": 13210 }, { "epoch": 0.6768380094204383, "grad_norm": 0.18537567555904388, "learning_rate": 0.0015, "loss": 1.1879, "step": 13220 }, { "epoch": 0.6773499897603932, "grad_norm": 0.19888518750667572, "learning_rate": 0.0015, "loss": 1.1648, "step": 13230 }, { "epoch": 0.6778619701003481, "grad_norm": 0.20373912155628204, "learning_rate": 0.0015, "loss": 1.2043, "step": 13240 }, { "epoch": 0.6783739504403031, "grad_norm": 0.19218416512012482, "learning_rate": 0.0015, "loss": 1.1553, "step": 13250 }, { "epoch": 0.678885930780258, "grad_norm": 0.1989835649728775, "learning_rate": 0.0015, "loss": 1.1679, "step": 13260 }, { "epoch": 0.679397911120213, "grad_norm": 0.20067016780376434, "learning_rate": 0.0015, "loss": 1.1827, "step": 13270 }, { "epoch": 0.6799098914601679, "grad_norm": 0.19568151235580444, "learning_rate": 0.0015, "loss": 1.1839, "step": 13280 }, { "epoch": 0.6804218718001229, "grad_norm": 0.2029784619808197, "learning_rate": 0.0015, "loss": 1.1787, "step": 13290 }, { "epoch": 0.6809338521400778, "grad_norm": 0.19807346165180206, "learning_rate": 0.0015, "loss": 1.1763, "step": 13300 }, { "epoch": 0.6814458324800328, "grad_norm": 0.1898653358221054, "learning_rate": 0.0015, "loss": 1.2075, "step": 13310 }, { "epoch": 0.6819578128199877, "grad_norm": 0.2038862705230713, "learning_rate": 0.0015, "loss": 1.1773, "step": 13320 }, { "epoch": 0.6824697931599426, "grad_norm": 0.18675602972507477, "learning_rate": 0.0015, "loss": 1.1888, "step": 13330 }, { "epoch": 0.6829817734998976, "grad_norm": 0.20663636922836304, "learning_rate": 0.0015, "loss": 1.169, "step": 13340 }, { "epoch": 0.6834937538398526, "grad_norm": 0.1998421996831894, "learning_rate": 0.0015, "loss": 1.1725, "step": 13350 }, { "epoch": 0.6840057341798075, "grad_norm": 0.20095355808734894, "learning_rate": 0.0015, "loss": 1.1727, "step": 13360 }, { "epoch": 0.6845177145197624, "grad_norm": 0.19053997099399567, "learning_rate": 0.0015, "loss": 1.1759, "step": 13370 }, { "epoch": 0.6850296948597174, "grad_norm": 0.20177049934864044, "learning_rate": 0.0015, "loss": 1.1845, "step": 13380 }, { "epoch": 0.6855416751996724, "grad_norm": 0.19868339598178864, "learning_rate": 0.0015, "loss": 1.178, "step": 13390 }, { "epoch": 0.6860536555396273, "grad_norm": 0.1922164112329483, "learning_rate": 0.0015, "loss": 1.1536, "step": 13400 }, { "epoch": 0.6865656358795822, "grad_norm": 0.2025415003299713, "learning_rate": 0.0015, "loss": 1.1849, "step": 13410 }, { "epoch": 0.6870776162195372, "grad_norm": 0.19813013076782227, "learning_rate": 0.0015, "loss": 1.1803, "step": 13420 }, { "epoch": 0.6875895965594921, "grad_norm": 0.18536531925201416, "learning_rate": 0.0015, "loss": 1.1686, "step": 13430 }, { "epoch": 0.6881015768994471, "grad_norm": 0.1998080015182495, "learning_rate": 0.0015, "loss": 1.1949, "step": 13440 }, { "epoch": 0.688613557239402, "grad_norm": 0.1955641508102417, "learning_rate": 0.0015, "loss": 1.1758, "step": 13450 }, { "epoch": 0.6891255375793569, "grad_norm": 0.19140900671482086, "learning_rate": 0.0015, "loss": 1.1675, "step": 13460 }, { "epoch": 0.6896375179193119, "grad_norm": 0.20261794328689575, "learning_rate": 0.0015, "loss": 1.1802, "step": 13470 }, { "epoch": 0.6901494982592669, "grad_norm": 0.19682539999485016, "learning_rate": 0.0015, "loss": 1.1798, "step": 13480 }, { "epoch": 0.6906614785992218, "grad_norm": 0.2020127922296524, "learning_rate": 0.0015, "loss": 1.172, "step": 13490 }, { "epoch": 0.6911734589391767, "grad_norm": 0.19824573397636414, "learning_rate": 0.0015, "loss": 1.1888, "step": 13500 }, { "epoch": 0.6916854392791317, "grad_norm": 0.20089636743068695, "learning_rate": 0.0015, "loss": 1.1865, "step": 13510 }, { "epoch": 0.6921974196190867, "grad_norm": 0.1954367458820343, "learning_rate": 0.0015, "loss": 1.1734, "step": 13520 }, { "epoch": 0.6927093999590416, "grad_norm": 0.1989155411720276, "learning_rate": 0.0015, "loss": 1.1676, "step": 13530 }, { "epoch": 0.6932213802989965, "grad_norm": 0.20354506373405457, "learning_rate": 0.0015, "loss": 1.1638, "step": 13540 }, { "epoch": 0.6937333606389514, "grad_norm": 0.18505001068115234, "learning_rate": 0.0015, "loss": 1.1623, "step": 13550 }, { "epoch": 0.6942453409789064, "grad_norm": 0.19758115708827972, "learning_rate": 0.0015, "loss": 1.1715, "step": 13560 }, { "epoch": 0.6947573213188614, "grad_norm": 0.19761599600315094, "learning_rate": 0.0015, "loss": 1.1892, "step": 13570 }, { "epoch": 0.6952693016588163, "grad_norm": 0.2028966248035431, "learning_rate": 0.0015, "loss": 1.1779, "step": 13580 }, { "epoch": 0.6957812819987712, "grad_norm": 0.1852991133928299, "learning_rate": 0.0015, "loss": 1.1756, "step": 13590 }, { "epoch": 0.6962932623387262, "grad_norm": 0.18972176313400269, "learning_rate": 0.0015, "loss": 1.1583, "step": 13600 }, { "epoch": 0.6968052426786812, "grad_norm": 0.18746834993362427, "learning_rate": 0.0015, "loss": 1.1758, "step": 13610 }, { "epoch": 0.6973172230186361, "grad_norm": 0.1831192672252655, "learning_rate": 0.0015, "loss": 1.1904, "step": 13620 }, { "epoch": 0.697829203358591, "grad_norm": 0.21230356395244598, "learning_rate": 0.0015, "loss": 1.1673, "step": 13630 }, { "epoch": 0.698341183698546, "grad_norm": 0.2109021544456482, "learning_rate": 0.0015, "loss": 1.176, "step": 13640 }, { "epoch": 0.698853164038501, "grad_norm": 0.18572686612606049, "learning_rate": 0.0015, "loss": 1.195, "step": 13650 }, { "epoch": 0.6993651443784559, "grad_norm": 0.19169217348098755, "learning_rate": 0.0015, "loss": 1.1865, "step": 13660 }, { "epoch": 0.6998771247184108, "grad_norm": 0.18918085098266602, "learning_rate": 0.0015, "loss": 1.1788, "step": 13670 }, { "epoch": 0.7003891050583657, "grad_norm": 0.19315798580646515, "learning_rate": 0.0014955269451601939, "loss": 1.1739, "step": 13680 }, { "epoch": 0.7009010853983207, "grad_norm": 0.18943412601947784, "learning_rate": 0.0014896616625957439, "loss": 1.1649, "step": 13690 }, { "epoch": 0.7014130657382757, "grad_norm": 0.19846367835998535, "learning_rate": 0.001483819382986655, "loss": 1.1883, "step": 13700 }, { "epoch": 0.7019250460782306, "grad_norm": 0.19269226491451263, "learning_rate": 0.001478000016118014, "loss": 1.1775, "step": 13710 }, { "epoch": 0.7024370264181855, "grad_norm": 0.19260330498218536, "learning_rate": 0.0014722034721287212, "loss": 1.169, "step": 13720 }, { "epoch": 0.7029490067581405, "grad_norm": 0.19868920743465424, "learning_rate": 0.0014664296615101004, "loss": 1.1671, "step": 13730 }, { "epoch": 0.7034609870980955, "grad_norm": 0.1958989053964615, "learning_rate": 0.0014606784951045186, "loss": 1.2049, "step": 13740 }, { "epoch": 0.7039729674380504, "grad_norm": 0.194174125790596, "learning_rate": 0.0014549498841040086, "loss": 1.1703, "step": 13750 }, { "epoch": 0.7044849477780053, "grad_norm": 0.19567228853702545, "learning_rate": 0.0014492437400488976, "loss": 1.1649, "step": 13760 }, { "epoch": 0.7049969281179602, "grad_norm": 0.191901296377182, "learning_rate": 0.0014435599748264416, "loss": 1.169, "step": 13770 }, { "epoch": 0.7055089084579153, "grad_norm": 0.1933002918958664, "learning_rate": 0.0014378985006694644, "loss": 1.1873, "step": 13780 }, { "epoch": 0.7060208887978702, "grad_norm": 0.20665253698825836, "learning_rate": 0.0014322592301550022, "loss": 1.1773, "step": 13790 }, { "epoch": 0.7065328691378251, "grad_norm": 0.19543762505054474, "learning_rate": 0.0014266420762029542, "loss": 1.1738, "step": 13800 }, { "epoch": 0.70704484947778, "grad_norm": 0.186002716422081, "learning_rate": 0.0014210469520747377, "loss": 1.1783, "step": 13810 }, { "epoch": 0.707556829817735, "grad_norm": 0.1872335523366928, "learning_rate": 0.0014154737713719476, "loss": 1.1918, "step": 13820 }, { "epoch": 0.70806881015769, "grad_norm": 0.1909414827823639, "learning_rate": 0.0014099224480350252, "loss": 1.1587, "step": 13830 }, { "epoch": 0.7085807904976449, "grad_norm": 0.1957162618637085, "learning_rate": 0.0014043928963419256, "loss": 1.1783, "step": 13840 }, { "epoch": 0.7090927708375998, "grad_norm": 0.1931842565536499, "learning_rate": 0.0013988850309067965, "loss": 1.1749, "step": 13850 }, { "epoch": 0.7096047511775547, "grad_norm": 0.2018897980451584, "learning_rate": 0.0013933987666786593, "loss": 1.1457, "step": 13860 }, { "epoch": 0.7101167315175098, "grad_norm": 0.1824326366186142, "learning_rate": 0.0013879340189400947, "loss": 1.1861, "step": 13870 }, { "epoch": 0.7106287118574647, "grad_norm": 0.19200804829597473, "learning_rate": 0.0013824907033059355, "loss": 1.1669, "step": 13880 }, { "epoch": 0.7111406921974196, "grad_norm": 0.18873439729213715, "learning_rate": 0.001377068735721964, "loss": 1.1555, "step": 13890 }, { "epoch": 0.7116526725373745, "grad_norm": 0.19836601614952087, "learning_rate": 0.0013716680324636122, "loss": 1.1536, "step": 13900 }, { "epoch": 0.7121646528773296, "grad_norm": 0.2006756067276001, "learning_rate": 0.001366288510134671, "loss": 1.1595, "step": 13910 }, { "epoch": 0.7126766332172845, "grad_norm": 0.18679478764533997, "learning_rate": 0.0013609300856660014, "loss": 1.1762, "step": 13920 }, { "epoch": 0.7131886135572394, "grad_norm": 0.19826917350292206, "learning_rate": 0.001355592676314251, "loss": 1.1752, "step": 13930 }, { "epoch": 0.7137005938971943, "grad_norm": 0.18885891139507294, "learning_rate": 0.0013502761996605787, "loss": 1.1731, "step": 13940 }, { "epoch": 0.7142125742371493, "grad_norm": 0.1888403594493866, "learning_rate": 0.0013449805736093791, "loss": 1.1536, "step": 13950 }, { "epoch": 0.7147245545771043, "grad_norm": 0.20078985393047333, "learning_rate": 0.0013397057163870173, "loss": 1.1545, "step": 13960 }, { "epoch": 0.7152365349170592, "grad_norm": 0.19156110286712646, "learning_rate": 0.001334451546540564, "loss": 1.148, "step": 13970 }, { "epoch": 0.7157485152570141, "grad_norm": 0.19765546917915344, "learning_rate": 0.0013292179829365398, "loss": 1.1776, "step": 13980 }, { "epoch": 0.716260495596969, "grad_norm": 0.1948610097169876, "learning_rate": 0.001324004944759661, "loss": 1.1597, "step": 13990 }, { "epoch": 0.7167724759369241, "grad_norm": 0.1816781461238861, "learning_rate": 0.0013188123515115915, "loss": 1.1484, "step": 14000 }, { "epoch": 0.717284456276879, "grad_norm": 0.2072591632604599, "learning_rate": 0.0013136401230097012, "loss": 1.1678, "step": 14010 }, { "epoch": 0.7177964366168339, "grad_norm": 0.19381676614284515, "learning_rate": 0.0013084881793858267, "loss": 1.1714, "step": 14020 }, { "epoch": 0.7183084169567888, "grad_norm": 0.178278848528862, "learning_rate": 0.0013033564410850373, "loss": 1.162, "step": 14030 }, { "epoch": 0.7188203972967439, "grad_norm": 0.18733732402324677, "learning_rate": 0.001298244828864409, "loss": 1.1565, "step": 14040 }, { "epoch": 0.7193323776366988, "grad_norm": 0.18614625930786133, "learning_rate": 0.0012931532637917983, "loss": 1.1678, "step": 14050 }, { "epoch": 0.7198443579766537, "grad_norm": 0.17618735134601593, "learning_rate": 0.0012880816672446245, "loss": 1.1723, "step": 14060 }, { "epoch": 0.7203563383166086, "grad_norm": 0.17765553295612335, "learning_rate": 0.0012830299609086558, "loss": 1.1511, "step": 14070 }, { "epoch": 0.7208683186565635, "grad_norm": 0.19092194736003876, "learning_rate": 0.0012779980667767994, "loss": 1.1679, "step": 14080 }, { "epoch": 0.7213802989965186, "grad_norm": 0.18768686056137085, "learning_rate": 0.0012729859071478975, "loss": 1.1668, "step": 14090 }, { "epoch": 0.7218922793364735, "grad_norm": 0.18770349025726318, "learning_rate": 0.0012679934046255271, "loss": 1.1749, "step": 14100 }, { "epoch": 0.7224042596764284, "grad_norm": 0.1935562640428543, "learning_rate": 0.0012630204821168047, "loss": 1.1535, "step": 14110 }, { "epoch": 0.7229162400163833, "grad_norm": 0.17887477576732635, "learning_rate": 0.0012580670628311967, "loss": 1.1541, "step": 14120 }, { "epoch": 0.7234282203563384, "grad_norm": 0.18734948337078094, "learning_rate": 0.0012531330702793323, "loss": 1.1669, "step": 14130 }, { "epoch": 0.7239402006962933, "grad_norm": 0.17879174649715424, "learning_rate": 0.0012482184282718238, "loss": 1.1905, "step": 14140 }, { "epoch": 0.7244521810362482, "grad_norm": 0.1950501948595047, "learning_rate": 0.0012433230609180889, "loss": 1.1446, "step": 14150 }, { "epoch": 0.7249641613762031, "grad_norm": 0.1801559329032898, "learning_rate": 0.0012384468926251798, "loss": 1.1367, "step": 14160 }, { "epoch": 0.7254761417161582, "grad_norm": 0.17999699711799622, "learning_rate": 0.0012335898480966146, "loss": 1.1402, "step": 14170 }, { "epoch": 0.7259881220561131, "grad_norm": 0.18279437720775604, "learning_rate": 0.0012287518523312166, "loss": 1.1597, "step": 14180 }, { "epoch": 0.726500102396068, "grad_norm": 0.19126516580581665, "learning_rate": 0.001223932830621954, "loss": 1.1604, "step": 14190 }, { "epoch": 0.7270120827360229, "grad_norm": 0.18581058084964752, "learning_rate": 0.0012191327085547877, "loss": 1.1532, "step": 14200 }, { "epoch": 0.7275240630759778, "grad_norm": 0.20243413746356964, "learning_rate": 0.0012143514120075223, "loss": 1.1495, "step": 14210 }, { "epoch": 0.7280360434159329, "grad_norm": 0.19404320418834686, "learning_rate": 0.0012095888671486597, "loss": 1.1567, "step": 14220 }, { "epoch": 0.7285480237558878, "grad_norm": 0.18503792583942413, "learning_rate": 0.0012048450004362614, "loss": 1.128, "step": 14230 }, { "epoch": 0.7290600040958427, "grad_norm": 0.19073212146759033, "learning_rate": 0.0012001197386168117, "loss": 1.1458, "step": 14240 }, { "epoch": 0.7295719844357976, "grad_norm": 0.2037813812494278, "learning_rate": 0.0011954130087240865, "loss": 1.1741, "step": 14250 }, { "epoch": 0.7300839647757527, "grad_norm": 0.18591246008872986, "learning_rate": 0.0011907247380780264, "loss": 1.1458, "step": 14260 }, { "epoch": 0.7305959451157076, "grad_norm": 0.18210938572883606, "learning_rate": 0.0011860548542836156, "loss": 1.1695, "step": 14270 }, { "epoch": 0.7311079254556625, "grad_norm": 0.18794593214988708, "learning_rate": 0.0011814032852297623, "loss": 1.1458, "step": 14280 }, { "epoch": 0.7316199057956174, "grad_norm": 0.1834757775068283, "learning_rate": 0.001176769959088186, "loss": 1.1485, "step": 14290 }, { "epoch": 0.7321318861355723, "grad_norm": 0.1770770400762558, "learning_rate": 0.0011721548043123092, "loss": 1.1473, "step": 14300 }, { "epoch": 0.7326438664755274, "grad_norm": 0.19540582597255707, "learning_rate": 0.0011675577496361507, "loss": 1.14, "step": 14310 }, { "epoch": 0.7331558468154823, "grad_norm": 0.18834899365901947, "learning_rate": 0.0011629787240732272, "loss": 1.1326, "step": 14320 }, { "epoch": 0.7336678271554372, "grad_norm": 0.18618904054164886, "learning_rate": 0.0011584176569154553, "loss": 1.1388, "step": 14330 }, { "epoch": 0.7341798074953921, "grad_norm": 0.1807902604341507, "learning_rate": 0.0011538744777320608, "loss": 1.1448, "step": 14340 }, { "epoch": 0.7346917878353472, "grad_norm": 0.18239812552928925, "learning_rate": 0.0011493491163684908, "loss": 1.1355, "step": 14350 }, { "epoch": 0.7352037681753021, "grad_norm": 0.18156401813030243, "learning_rate": 0.0011448415029453305, "loss": 1.1309, "step": 14360 }, { "epoch": 0.735715748515257, "grad_norm": 0.1813691258430481, "learning_rate": 0.0011403515678572234, "loss": 1.134, "step": 14370 }, { "epoch": 0.7362277288552119, "grad_norm": 0.18241450190544128, "learning_rate": 0.0011358792417717981, "loss": 1.1378, "step": 14380 }, { "epoch": 0.736739709195167, "grad_norm": 0.18394464254379272, "learning_rate": 0.001131424455628596, "loss": 1.1497, "step": 14390 }, { "epoch": 0.7372516895351219, "grad_norm": 0.18612609803676605, "learning_rate": 0.0011269871406380059, "loss": 1.1669, "step": 14400 }, { "epoch": 0.7377636698750768, "grad_norm": 0.18373136222362518, "learning_rate": 0.001122567228280201, "loss": 1.1453, "step": 14410 }, { "epoch": 0.7382756502150317, "grad_norm": 0.193937748670578, "learning_rate": 0.001118164650304082, "loss": 1.1357, "step": 14420 }, { "epoch": 0.7387876305549866, "grad_norm": 0.18261444568634033, "learning_rate": 0.0011137793387262216, "loss": 1.169, "step": 14430 }, { "epoch": 0.7392996108949417, "grad_norm": 0.19592134654521942, "learning_rate": 0.0011094112258298167, "loss": 1.1518, "step": 14440 }, { "epoch": 0.7398115912348966, "grad_norm": 0.17495043575763702, "learning_rate": 0.0011050602441636402, "loss": 1.1481, "step": 14450 }, { "epoch": 0.7403235715748515, "grad_norm": 0.18108507990837097, "learning_rate": 0.001100726326541002, "loss": 1.1327, "step": 14460 }, { "epoch": 0.7408355519148064, "grad_norm": 0.1797986775636673, "learning_rate": 0.00109640940603871, "loss": 1.1394, "step": 14470 }, { "epoch": 0.7413475322547615, "grad_norm": 0.18484458327293396, "learning_rate": 0.001092109415996037, "loss": 1.1188, "step": 14480 }, { "epoch": 0.7418595125947164, "grad_norm": 0.1784062534570694, "learning_rate": 0.0010878262900136915, "loss": 1.125, "step": 14490 }, { "epoch": 0.7423714929346713, "grad_norm": 0.1869814693927765, "learning_rate": 0.0010835599619527924, "loss": 1.1417, "step": 14500 }, { "epoch": 0.7428834732746262, "grad_norm": 0.18346761167049408, "learning_rate": 0.0010793103659338475, "loss": 1.1182, "step": 14510 }, { "epoch": 0.7433954536145811, "grad_norm": 0.188985213637352, "learning_rate": 0.0010750774363357356, "loss": 1.1412, "step": 14520 }, { "epoch": 0.7439074339545362, "grad_norm": 0.1802164912223816, "learning_rate": 0.0010708611077946955, "loss": 1.1338, "step": 14530 }, { "epoch": 0.7444194142944911, "grad_norm": 0.17940784990787506, "learning_rate": 0.0010666613152033133, "loss": 1.1477, "step": 14540 }, { "epoch": 0.744931394634446, "grad_norm": 0.19481126964092255, "learning_rate": 0.00106247799370952, "loss": 1.1306, "step": 14550 }, { "epoch": 0.7454433749744009, "grad_norm": 0.17663590610027313, "learning_rate": 0.0010583110787155889, "loss": 1.1395, "step": 14560 }, { "epoch": 0.745955355314356, "grad_norm": 0.18392081558704376, "learning_rate": 0.001054160505877137, "loss": 1.1339, "step": 14570 }, { "epoch": 0.7464673356543109, "grad_norm": 0.1872582733631134, "learning_rate": 0.0010500262111021333, "loss": 1.1271, "step": 14580 }, { "epoch": 0.7469793159942658, "grad_norm": 0.18514196574687958, "learning_rate": 0.0010459081305499078, "loss": 1.1561, "step": 14590 }, { "epoch": 0.7474912963342207, "grad_norm": 0.18902930617332458, "learning_rate": 0.0010418062006301674, "loss": 1.1402, "step": 14600 }, { "epoch": 0.7480032766741758, "grad_norm": 0.1824546903371811, "learning_rate": 0.0010377203580020109, "loss": 1.1439, "step": 14610 }, { "epoch": 0.7485152570141307, "grad_norm": 0.1803770363330841, "learning_rate": 0.001033650539572954, "loss": 1.1313, "step": 14620 }, { "epoch": 0.7490272373540856, "grad_norm": 0.19267936050891876, "learning_rate": 0.0010295966824979534, "loss": 1.1082, "step": 14630 }, { "epoch": 0.7495392176940405, "grad_norm": 0.19047097861766815, "learning_rate": 0.0010255587241784366, "loss": 1.122, "step": 14640 }, { "epoch": 0.7500511980339954, "grad_norm": 0.1689426302909851, "learning_rate": 0.0010215366022613358, "loss": 1.1172, "step": 14650 }, { "epoch": 0.7505631783739505, "grad_norm": 0.18644796311855316, "learning_rate": 0.0010175302546381246, "loss": 1.146, "step": 14660 }, { "epoch": 0.7510751587139054, "grad_norm": 0.18672852218151093, "learning_rate": 0.0010135396194438586, "loss": 1.1386, "step": 14670 }, { "epoch": 0.7515871390538603, "grad_norm": 0.19166767597198486, "learning_rate": 0.0010095646350562206, "loss": 1.1365, "step": 14680 }, { "epoch": 0.7520991193938152, "grad_norm": 0.18109376728534698, "learning_rate": 0.0010056052400945696, "loss": 1.113, "step": 14690 }, { "epoch": 0.7526110997337703, "grad_norm": 0.17950654029846191, "learning_rate": 0.0010016613734189915, "loss": 1.1474, "step": 14700 }, { "epoch": 0.7531230800737252, "grad_norm": 0.184305801987648, "learning_rate": 0.0009977329741293565, "loss": 1.1199, "step": 14710 }, { "epoch": 0.7536350604136801, "grad_norm": 0.18768514692783356, "learning_rate": 0.0009938199815643773, "loss": 1.1451, "step": 14720 }, { "epoch": 0.754147040753635, "grad_norm": 0.17981773614883423, "learning_rate": 0.0009899223353006738, "loss": 1.1423, "step": 14730 }, { "epoch": 0.75465902109359, "grad_norm": 0.17722870409488678, "learning_rate": 0.0009860399751518388, "loss": 1.1208, "step": 14740 }, { "epoch": 0.755171001433545, "grad_norm": 0.18367789685726166, "learning_rate": 0.0009821728411675095, "loss": 1.148, "step": 14750 }, { "epoch": 0.7556829817734999, "grad_norm": 0.18441089987754822, "learning_rate": 0.0009783208736324418, "loss": 1.1112, "step": 14760 }, { "epoch": 0.7561949621134548, "grad_norm": 0.1897488385438919, "learning_rate": 0.000974484013065587, "loss": 1.1231, "step": 14770 }, { "epoch": 0.7567069424534097, "grad_norm": 0.18716907501220703, "learning_rate": 0.0009706622002191746, "loss": 1.1018, "step": 14780 }, { "epoch": 0.7572189227933648, "grad_norm": 0.18121209740638733, "learning_rate": 0.0009668553760777972, "loss": 1.1225, "step": 14790 }, { "epoch": 0.7577309031333197, "grad_norm": 0.19911837577819824, "learning_rate": 0.0009630634818574985, "loss": 1.1266, "step": 14800 }, { "epoch": 0.7582428834732746, "grad_norm": 0.169275164604187, "learning_rate": 0.0009592864590048661, "loss": 1.1152, "step": 14810 }, { "epoch": 0.7587548638132295, "grad_norm": 0.1855994015932083, "learning_rate": 0.0009555242491961278, "loss": 1.1318, "step": 14820 }, { "epoch": 0.7592668441531846, "grad_norm": 0.17527516186237335, "learning_rate": 0.0009517767943362495, "loss": 1.0988, "step": 14830 }, { "epoch": 0.7597788244931395, "grad_norm": 0.18066614866256714, "learning_rate": 0.0009480440365580401, "loss": 1.1097, "step": 14840 }, { "epoch": 0.7602908048330944, "grad_norm": 0.17801222205162048, "learning_rate": 0.000944325918221256, "loss": 1.1196, "step": 14850 }, { "epoch": 0.7608027851730493, "grad_norm": 0.19464291632175446, "learning_rate": 0.0009406223819117125, "loss": 1.1319, "step": 14860 }, { "epoch": 0.7613147655130043, "grad_norm": 0.1878882348537445, "learning_rate": 0.0009369333704403964, "loss": 1.13, "step": 14870 }, { "epoch": 0.7618267458529593, "grad_norm": 0.17626269161701202, "learning_rate": 0.0009332588268425832, "loss": 1.1181, "step": 14880 }, { "epoch": 0.7623387261929142, "grad_norm": 0.1895529329776764, "learning_rate": 0.0009295986943769574, "loss": 1.1333, "step": 14890 }, { "epoch": 0.7628507065328691, "grad_norm": 0.1784052848815918, "learning_rate": 0.0009259529165247364, "loss": 1.1242, "step": 14900 }, { "epoch": 0.763362686872824, "grad_norm": 0.17965124547481537, "learning_rate": 0.0009223214369887976, "loss": 1.1258, "step": 14910 }, { "epoch": 0.7638746672127791, "grad_norm": 0.17978616058826447, "learning_rate": 0.0009187041996928093, "loss": 1.1125, "step": 14920 }, { "epoch": 0.764386647552734, "grad_norm": 0.18885265290737152, "learning_rate": 0.0009151011487803643, "loss": 1.1061, "step": 14930 }, { "epoch": 0.7648986278926889, "grad_norm": 0.18489712476730347, "learning_rate": 0.0009115122286141184, "loss": 1.127, "step": 14940 }, { "epoch": 0.7654106082326438, "grad_norm": 0.17437365651130676, "learning_rate": 0.0009079373837749296, "loss": 1.1148, "step": 14950 }, { "epoch": 0.7659225885725988, "grad_norm": 0.18147113919258118, "learning_rate": 0.0009043765590610044, "loss": 1.1014, "step": 14960 }, { "epoch": 0.7664345689125538, "grad_norm": 0.17263419926166534, "learning_rate": 0.0009008296994870436, "loss": 1.1118, "step": 14970 }, { "epoch": 0.7669465492525087, "grad_norm": 0.17921820282936096, "learning_rate": 0.000897296750283394, "loss": 1.1245, "step": 14980 }, { "epoch": 0.7674585295924636, "grad_norm": 0.17663663625717163, "learning_rate": 0.0008937776568952028, "loss": 1.1078, "step": 14990 }, { "epoch": 0.7679705099324186, "grad_norm": 0.17961500585079193, "learning_rate": 0.0008902723649815751, "loss": 1.0977, "step": 15000 }, { "epoch": 0.7684824902723736, "grad_norm": 0.18368123471736908, "learning_rate": 0.0008867808204147341, "loss": 1.103, "step": 15010 }, { "epoch": 0.7689944706123285, "grad_norm": 0.18269400298595428, "learning_rate": 0.0008833029692791867, "loss": 1.108, "step": 15020 }, { "epoch": 0.7695064509522834, "grad_norm": 0.1727774292230606, "learning_rate": 0.0008798387578708893, "loss": 1.1033, "step": 15030 }, { "epoch": 0.7700184312922383, "grad_norm": 0.18222136795520782, "learning_rate": 0.0008763881326964195, "loss": 1.1089, "step": 15040 }, { "epoch": 0.7705304116321933, "grad_norm": 0.1899970918893814, "learning_rate": 0.0008729510404721502, "loss": 1.1039, "step": 15050 }, { "epoch": 0.7710423919721483, "grad_norm": 0.18128469586372375, "learning_rate": 0.0008695274281234262, "loss": 1.1078, "step": 15060 }, { "epoch": 0.7715543723121032, "grad_norm": 0.18401475250720978, "learning_rate": 0.0008661172427837451, "loss": 1.1023, "step": 15070 }, { "epoch": 0.7720663526520581, "grad_norm": 0.18456844985485077, "learning_rate": 0.0008627204317939403, "loss": 1.1187, "step": 15080 }, { "epoch": 0.7725783329920131, "grad_norm": 0.18838796019554138, "learning_rate": 0.0008593369427013692, "loss": 1.0908, "step": 15090 }, { "epoch": 0.7730903133319681, "grad_norm": 0.18515382707118988, "learning_rate": 0.0008559667232591014, "loss": 1.1099, "step": 15100 }, { "epoch": 0.773602293671923, "grad_norm": 0.18746817111968994, "learning_rate": 0.0008526097214251135, "loss": 1.1073, "step": 15110 }, { "epoch": 0.7741142740118779, "grad_norm": 0.18683654069900513, "learning_rate": 0.0008492658853614846, "loss": 1.1195, "step": 15120 }, { "epoch": 0.7746262543518329, "grad_norm": 0.17560458183288574, "learning_rate": 0.0008459351634335962, "loss": 1.0919, "step": 15130 }, { "epoch": 0.7751382346917879, "grad_norm": 0.17539164423942566, "learning_rate": 0.0008426175042093346, "loss": 1.1082, "step": 15140 }, { "epoch": 0.7756502150317428, "grad_norm": 0.17442087829113007, "learning_rate": 0.0008393128564582973, "loss": 1.1077, "step": 15150 }, { "epoch": 0.7761621953716977, "grad_norm": 0.17610372602939606, "learning_rate": 0.0008360211691510009, "loss": 1.0976, "step": 15160 }, { "epoch": 0.7766741757116526, "grad_norm": 0.18700052797794342, "learning_rate": 0.0008327423914580938, "loss": 1.1116, "step": 15170 }, { "epoch": 0.7771861560516076, "grad_norm": 0.18908992409706116, "learning_rate": 0.0008294764727495717, "loss": 1.1266, "step": 15180 }, { "epoch": 0.7776981363915626, "grad_norm": 0.17554494738578796, "learning_rate": 0.0008262233625939947, "loss": 1.1228, "step": 15190 }, { "epoch": 0.7782101167315175, "grad_norm": 0.1848273128271103, "learning_rate": 0.0008229830107577095, "loss": 1.1032, "step": 15200 }, { "epoch": 0.7787220970714724, "grad_norm": 0.1751490831375122, "learning_rate": 0.0008197553672040732, "loss": 1.1022, "step": 15210 }, { "epoch": 0.7792340774114274, "grad_norm": 0.19107986986637115, "learning_rate": 0.0008165403820926805, "loss": 1.1107, "step": 15220 }, { "epoch": 0.7797460577513824, "grad_norm": 0.17038871347904205, "learning_rate": 0.000813338005778595, "loss": 1.0906, "step": 15230 }, { "epoch": 0.7802580380913373, "grad_norm": 0.17573246359825134, "learning_rate": 0.0008101481888115815, "loss": 1.1185, "step": 15240 }, { "epoch": 0.7807700184312922, "grad_norm": 0.18138054013252258, "learning_rate": 0.000806970881935343, "loss": 1.1068, "step": 15250 }, { "epoch": 0.7812819987712472, "grad_norm": 0.18504558503627777, "learning_rate": 0.00080380603608676, "loss": 1.1187, "step": 15260 }, { "epoch": 0.7817939791112021, "grad_norm": 0.1914263665676117, "learning_rate": 0.0008006536023951326, "loss": 1.1028, "step": 15270 }, { "epoch": 0.7823059594511571, "grad_norm": 0.17930828034877777, "learning_rate": 0.0007975135321814267, "loss": 1.12, "step": 15280 }, { "epoch": 0.782817939791112, "grad_norm": 0.18710237741470337, "learning_rate": 0.0007943857769575209, "loss": 1.0943, "step": 15290 }, { "epoch": 0.783329920131067, "grad_norm": 0.18522420525550842, "learning_rate": 0.0007912702884254589, "loss": 1.1125, "step": 15300 }, { "epoch": 0.7838419004710219, "grad_norm": 0.17634257674217224, "learning_rate": 0.0007881670184767039, "loss": 1.0855, "step": 15310 }, { "epoch": 0.7843538808109769, "grad_norm": 0.1925361305475235, "learning_rate": 0.0007850759191913941, "loss": 1.0957, "step": 15320 }, { "epoch": 0.7848658611509318, "grad_norm": 0.18163706362247467, "learning_rate": 0.0007819969428376047, "loss": 1.0994, "step": 15330 }, { "epoch": 0.7853778414908867, "grad_norm": 0.1802321821451187, "learning_rate": 0.0007789300418706098, "loss": 1.1043, "step": 15340 }, { "epoch": 0.7858898218308417, "grad_norm": 0.20434251427650452, "learning_rate": 0.0007758751689321484, "loss": 1.0943, "step": 15350 }, { "epoch": 0.7864018021707967, "grad_norm": 0.1818198412656784, "learning_rate": 0.0007728322768496924, "loss": 1.0916, "step": 15360 }, { "epoch": 0.7869137825107516, "grad_norm": 0.18060991168022156, "learning_rate": 0.0007698013186357197, "loss": 1.1122, "step": 15370 }, { "epoch": 0.7874257628507065, "grad_norm": 0.18546059727668762, "learning_rate": 0.0007667822474869874, "loss": 1.1075, "step": 15380 }, { "epoch": 0.7879377431906615, "grad_norm": 0.18823228776454926, "learning_rate": 0.0007637750167838097, "loss": 1.1197, "step": 15390 }, { "epoch": 0.7884497235306164, "grad_norm": 0.17590127885341644, "learning_rate": 0.0007607795800893374, "loss": 1.0865, "step": 15400 }, { "epoch": 0.7889617038705714, "grad_norm": 0.18602034449577332, "learning_rate": 0.000757795891148842, "loss": 1.1, "step": 15410 }, { "epoch": 0.7894736842105263, "grad_norm": 0.19357922673225403, "learning_rate": 0.0007548239038889995, "loss": 1.1015, "step": 15420 }, { "epoch": 0.7899856645504812, "grad_norm": 0.17590965330600739, "learning_rate": 0.000751863572417181, "loss": 1.1113, "step": 15430 }, { "epoch": 0.7904976448904362, "grad_norm": 0.1751716434955597, "learning_rate": 0.0007489148510207429, "loss": 1.0898, "step": 15440 }, { "epoch": 0.7910096252303912, "grad_norm": 0.17589299380779266, "learning_rate": 0.000745977694166321, "loss": 1.0931, "step": 15450 }, { "epoch": 0.7915216055703461, "grad_norm": 0.17544785141944885, "learning_rate": 0.0007430520564991282, "loss": 1.0914, "step": 15460 }, { "epoch": 0.792033585910301, "grad_norm": 0.18367989361286163, "learning_rate": 0.0007401378928422531, "loss": 1.1043, "step": 15470 }, { "epoch": 0.792545566250256, "grad_norm": 0.17736022174358368, "learning_rate": 0.0007372351581959634, "loss": 1.1252, "step": 15480 }, { "epoch": 0.7930575465902109, "grad_norm": 0.18722687661647797, "learning_rate": 0.0007343438077370098, "loss": 1.095, "step": 15490 }, { "epoch": 0.7935695269301659, "grad_norm": 0.1756405234336853, "learning_rate": 0.0007314637968179351, "loss": 1.1017, "step": 15500 }, { "epoch": 0.7940815072701208, "grad_norm": 0.17875617742538452, "learning_rate": 0.0007285950809663841, "loss": 1.0979, "step": 15510 }, { "epoch": 0.7945934876100758, "grad_norm": 0.17093615233898163, "learning_rate": 0.0007257376158844169, "loss": 1.0886, "step": 15520 }, { "epoch": 0.7951054679500307, "grad_norm": 0.18361063301563263, "learning_rate": 0.0007228913574478252, "loss": 1.1089, "step": 15530 }, { "epoch": 0.7956174482899857, "grad_norm": 0.1857183277606964, "learning_rate": 0.0007200562617054503, "loss": 1.0806, "step": 15540 }, { "epoch": 0.7961294286299406, "grad_norm": 0.1974077820777893, "learning_rate": 0.0007172322848785056, "loss": 1.088, "step": 15550 }, { "epoch": 0.7966414089698955, "grad_norm": 0.173116534948349, "learning_rate": 0.0007144193833598987, "loss": 1.0921, "step": 15560 }, { "epoch": 0.7971533893098505, "grad_norm": 0.17753879725933075, "learning_rate": 0.0007116175137135599, "loss": 1.0846, "step": 15570 }, { "epoch": 0.7976653696498055, "grad_norm": 0.1796150505542755, "learning_rate": 0.0007088266326737707, "loss": 1.0816, "step": 15580 }, { "epoch": 0.7981773499897604, "grad_norm": 0.17271041870117188, "learning_rate": 0.0007060466971444953, "loss": 1.0875, "step": 15590 }, { "epoch": 0.7986893303297153, "grad_norm": 0.1766566038131714, "learning_rate": 0.0007032776641987162, "loss": 1.085, "step": 15600 }, { "epoch": 0.7992013106696703, "grad_norm": 0.17464908957481384, "learning_rate": 0.0007005194910777697, "loss": 1.0669, "step": 15610 }, { "epoch": 0.7997132910096252, "grad_norm": 0.18235880136489868, "learning_rate": 0.0006977721351906876, "loss": 1.0983, "step": 15620 }, { "epoch": 0.8002252713495802, "grad_norm": 0.17582911252975464, "learning_rate": 0.0006950355541135377, "loss": 1.0748, "step": 15630 }, { "epoch": 0.8007372516895351, "grad_norm": 0.18529601395130157, "learning_rate": 0.0006923097055887701, "loss": 1.082, "step": 15640 }, { "epoch": 0.80124923202949, "grad_norm": 0.18771891295909882, "learning_rate": 0.000689594547524564, "loss": 1.0792, "step": 15650 }, { "epoch": 0.801761212369445, "grad_norm": 0.18567664921283722, "learning_rate": 0.0006868900379941773, "loss": 1.0929, "step": 15660 }, { "epoch": 0.8022731927094, "grad_norm": 0.18062008917331696, "learning_rate": 0.0006841961352353004, "loss": 1.0952, "step": 15670 }, { "epoch": 0.8027851730493549, "grad_norm": 0.17383413016796112, "learning_rate": 0.0006815127976494104, "loss": 1.1029, "step": 15680 }, { "epoch": 0.8032971533893098, "grad_norm": 0.17971891164779663, "learning_rate": 0.0006788399838011287, "loss": 1.1032, "step": 15690 }, { "epoch": 0.8038091337292648, "grad_norm": 0.17936407029628754, "learning_rate": 0.0006761776524175815, "loss": 1.1001, "step": 15700 }, { "epoch": 0.8043211140692197, "grad_norm": 0.18222102522850037, "learning_rate": 0.0006735257623877627, "loss": 1.0872, "step": 15710 }, { "epoch": 0.8048330944091747, "grad_norm": 0.18015074729919434, "learning_rate": 0.0006708842727618985, "loss": 1.0991, "step": 15720 }, { "epoch": 0.8053450747491296, "grad_norm": 0.17375022172927856, "learning_rate": 0.0006682531427508156, "loss": 1.0623, "step": 15730 }, { "epoch": 0.8058570550890846, "grad_norm": 0.1764671802520752, "learning_rate": 0.0006656323317253108, "loss": 1.0984, "step": 15740 }, { "epoch": 0.8063690354290395, "grad_norm": 0.1692001074552536, "learning_rate": 0.0006630217992155241, "loss": 1.0859, "step": 15750 }, { "epoch": 0.8068810157689945, "grad_norm": 0.17819392681121826, "learning_rate": 0.0006604215049103134, "loss": 1.0899, "step": 15760 }, { "epoch": 0.8073929961089494, "grad_norm": 0.17758633196353912, "learning_rate": 0.0006578314086566325, "loss": 1.0826, "step": 15770 }, { "epoch": 0.8079049764489044, "grad_norm": 0.17600396275520325, "learning_rate": 0.0006552514704589104, "loss": 1.0912, "step": 15780 }, { "epoch": 0.8084169567888593, "grad_norm": 0.177523672580719, "learning_rate": 0.0006526816504784343, "loss": 1.0814, "step": 15790 }, { "epoch": 0.8089289371288143, "grad_norm": 0.17935074865818024, "learning_rate": 0.0006501219090327343, "loss": 1.0859, "step": 15800 }, { "epoch": 0.8094409174687692, "grad_norm": 0.18292473256587982, "learning_rate": 0.0006475722065949703, "loss": 1.0716, "step": 15810 }, { "epoch": 0.8099528978087241, "grad_norm": 0.18235322833061218, "learning_rate": 0.000645032503793322, "loss": 1.085, "step": 15820 }, { "epoch": 0.8104648781486791, "grad_norm": 0.18412081897258759, "learning_rate": 0.0006425027614103806, "loss": 1.0872, "step": 15830 }, { "epoch": 0.810976858488634, "grad_norm": 0.17389538884162903, "learning_rate": 0.0006399829403825436, "loss": 1.0935, "step": 15840 }, { "epoch": 0.811488838828589, "grad_norm": 0.17470002174377441, "learning_rate": 0.0006374730017994116, "loss": 1.0603, "step": 15850 }, { "epoch": 0.8120008191685439, "grad_norm": 0.17814920842647552, "learning_rate": 0.0006349729069031867, "loss": 1.1096, "step": 15860 }, { "epoch": 0.8125127995084989, "grad_norm": 0.18193413317203522, "learning_rate": 0.000632482617088075, "loss": 1.076, "step": 15870 }, { "epoch": 0.8130247798484538, "grad_norm": 0.18022698163986206, "learning_rate": 0.0006300020938996901, "loss": 1.0868, "step": 15880 }, { "epoch": 0.8135367601884088, "grad_norm": 0.16944915056228638, "learning_rate": 0.0006275312990344587, "loss": 1.0857, "step": 15890 }, { "epoch": 0.8140487405283637, "grad_norm": 0.17860791087150574, "learning_rate": 0.0006250701943390303, "loss": 1.0885, "step": 15900 }, { "epoch": 0.8145607208683187, "grad_norm": 0.169233039021492, "learning_rate": 0.0006226187418096868, "loss": 1.0701, "step": 15910 }, { "epoch": 0.8150727012082736, "grad_norm": 0.18404126167297363, "learning_rate": 0.0006201769035917569, "loss": 1.0862, "step": 15920 }, { "epoch": 0.8155846815482285, "grad_norm": 0.1732415407896042, "learning_rate": 0.0006177446419790303, "loss": 1.0552, "step": 15930 }, { "epoch": 0.8160966618881835, "grad_norm": 0.17680327594280243, "learning_rate": 0.0006153219194131765, "loss": 1.0839, "step": 15940 }, { "epoch": 0.8166086422281384, "grad_norm": 0.168556347489357, "learning_rate": 0.000612908698483164, "loss": 1.0628, "step": 15950 }, { "epoch": 0.8171206225680934, "grad_norm": 0.1826118528842926, "learning_rate": 0.0006105049419246835, "loss": 1.0855, "step": 15960 }, { "epoch": 0.8176326029080483, "grad_norm": 0.17182965576648712, "learning_rate": 0.0006081106126195717, "loss": 1.0669, "step": 15970 }, { "epoch": 0.8181445832480033, "grad_norm": 0.16935127973556519, "learning_rate": 0.0006057256735952383, "loss": 1.083, "step": 15980 }, { "epoch": 0.8186565635879582, "grad_norm": 0.17464590072631836, "learning_rate": 0.0006033500880240954, "loss": 1.0671, "step": 15990 }, { "epoch": 0.8191685439279132, "grad_norm": 0.17747105658054352, "learning_rate": 0.0006009838192229885, "loss": 1.0678, "step": 16000 }, { "epoch": 0.8196805242678681, "grad_norm": 0.17449192702770233, "learning_rate": 0.0005986268306526304, "loss": 1.0796, "step": 16010 }, { "epoch": 0.8201925046078231, "grad_norm": 0.17097654938697815, "learning_rate": 0.0005962790859170364, "loss": 1.0778, "step": 16020 }, { "epoch": 0.820704484947778, "grad_norm": 0.16904379427433014, "learning_rate": 0.0005939405487629626, "loss": 1.0843, "step": 16030 }, { "epoch": 0.821216465287733, "grad_norm": 0.17497345805168152, "learning_rate": 0.0005916111830793466, "loss": 1.101, "step": 16040 }, { "epoch": 0.8217284456276879, "grad_norm": 0.1789994090795517, "learning_rate": 0.0005892909528967487, "loss": 1.0845, "step": 16050 }, { "epoch": 0.8222404259676428, "grad_norm": 0.1678200364112854, "learning_rate": 0.0005869798223867978, "loss": 1.0606, "step": 16060 }, { "epoch": 0.8227524063075978, "grad_norm": 0.17383365333080292, "learning_rate": 0.000584677755861637, "loss": 1.0674, "step": 16070 }, { "epoch": 0.8232643866475527, "grad_norm": 0.17335745692253113, "learning_rate": 0.0005823847177733732, "loss": 1.0965, "step": 16080 }, { "epoch": 0.8237763669875077, "grad_norm": 0.16967058181762695, "learning_rate": 0.0005801006727135282, "loss": 1.0677, "step": 16090 }, { "epoch": 0.8242883473274626, "grad_norm": 0.16847650706768036, "learning_rate": 0.0005778255854124912, "loss": 1.0791, "step": 16100 }, { "epoch": 0.8248003276674176, "grad_norm": 0.17251423001289368, "learning_rate": 0.0005755594207389755, "loss": 1.0806, "step": 16110 }, { "epoch": 0.8253123080073725, "grad_norm": 0.17555896937847137, "learning_rate": 0.0005733021436994743, "loss": 1.066, "step": 16120 }, { "epoch": 0.8258242883473275, "grad_norm": 0.16997992992401123, "learning_rate": 0.000571053719437722, "loss": 1.0876, "step": 16130 }, { "epoch": 0.8263362686872824, "grad_norm": 0.17845116555690765, "learning_rate": 0.0005688141132341551, "loss": 1.085, "step": 16140 }, { "epoch": 0.8268482490272373, "grad_norm": 0.1836511194705963, "learning_rate": 0.0005665832905053756, "loss": 1.0769, "step": 16150 }, { "epoch": 0.8273602293671923, "grad_norm": 0.1753719449043274, "learning_rate": 0.0005643612168036182, "loss": 1.0742, "step": 16160 }, { "epoch": 0.8278722097071473, "grad_norm": 0.17152993381023407, "learning_rate": 0.0005621478578162176, "loss": 1.0761, "step": 16170 }, { "epoch": 0.8283841900471022, "grad_norm": 0.18273817002773285, "learning_rate": 0.0005599431793650786, "loss": 1.0803, "step": 16180 }, { "epoch": 0.8288961703870571, "grad_norm": 0.1865053027868271, "learning_rate": 0.0005577471474061485, "loss": 1.0695, "step": 16190 }, { "epoch": 0.8294081507270121, "grad_norm": 0.16600672900676727, "learning_rate": 0.0005555597280288918, "loss": 1.0844, "step": 16200 }, { "epoch": 0.829920131066967, "grad_norm": 0.1850479394197464, "learning_rate": 0.0005533808874557656, "loss": 1.0658, "step": 16210 }, { "epoch": 0.830432111406922, "grad_norm": 0.17687514424324036, "learning_rate": 0.000551210592041699, "loss": 1.072, "step": 16220 }, { "epoch": 0.8309440917468769, "grad_norm": 0.1833869218826294, "learning_rate": 0.000549048808273573, "loss": 1.0739, "step": 16230 }, { "epoch": 0.8314560720868319, "grad_norm": 0.1750813126564026, "learning_rate": 0.0005468955027697031, "loss": 1.0851, "step": 16240 }, { "epoch": 0.8319680524267868, "grad_norm": 0.18595030903816223, "learning_rate": 0.0005447506422793241, "loss": 1.0615, "step": 16250 }, { "epoch": 0.8324800327667418, "grad_norm": 0.1711542159318924, "learning_rate": 0.0005426141936820762, "loss": 1.0689, "step": 16260 }, { "epoch": 0.8329920131066967, "grad_norm": 0.18596914410591125, "learning_rate": 0.000540486123987494, "loss": 1.0574, "step": 16270 }, { "epoch": 0.8335039934466516, "grad_norm": 0.17115946114063263, "learning_rate": 0.0005383664003344964, "loss": 1.0703, "step": 16280 }, { "epoch": 0.8340159737866066, "grad_norm": 0.1802951842546463, "learning_rate": 0.0005362549899908805, "loss": 1.074, "step": 16290 }, { "epoch": 0.8345279541265616, "grad_norm": 0.18504950404167175, "learning_rate": 0.0005341518603528143, "loss": 1.0747, "step": 16300 }, { "epoch": 0.8350399344665165, "grad_norm": 0.17508040368556976, "learning_rate": 0.000532056978944335, "loss": 1.0784, "step": 16310 }, { "epoch": 0.8355519148064714, "grad_norm": 0.1866855025291443, "learning_rate": 0.0005299703134168463, "loss": 1.0799, "step": 16320 }, { "epoch": 0.8360638951464264, "grad_norm": 0.16678877174854279, "learning_rate": 0.0005278918315486196, "loss": 1.0531, "step": 16330 }, { "epoch": 0.8365758754863813, "grad_norm": 0.1872544288635254, "learning_rate": 0.000525821501244296, "loss": 1.0768, "step": 16340 }, { "epoch": 0.8370878558263363, "grad_norm": 0.17887745797634125, "learning_rate": 0.0005237592905343908, "loss": 1.0552, "step": 16350 }, { "epoch": 0.8375998361662912, "grad_norm": 0.1764066219329834, "learning_rate": 0.0005217051675748001, "loss": 1.0511, "step": 16360 }, { "epoch": 0.8381118165062461, "grad_norm": 0.17765092849731445, "learning_rate": 0.0005196591006463087, "loss": 1.0645, "step": 16370 }, { "epoch": 0.8386237968462011, "grad_norm": 0.17197942733764648, "learning_rate": 0.0005176210581541006, "loss": 1.0561, "step": 16380 }, { "epoch": 0.8391357771861561, "grad_norm": 0.1778382807970047, "learning_rate": 0.0005155910086272709, "loss": 1.0818, "step": 16390 }, { "epoch": 0.839647757526111, "grad_norm": 0.1758384257555008, "learning_rate": 0.00051356892071834, "loss": 1.0755, "step": 16400 }, { "epoch": 0.8401597378660659, "grad_norm": 0.17765450477600098, "learning_rate": 0.0005115547632027694, "loss": 1.0622, "step": 16410 }, { "epoch": 0.8406717182060209, "grad_norm": 0.1722906529903412, "learning_rate": 0.0005095485049784797, "loss": 1.0562, "step": 16420 }, { "epoch": 0.8411836985459759, "grad_norm": 0.18041284382343292, "learning_rate": 0.0005075501150653699, "loss": 1.0563, "step": 16430 }, { "epoch": 0.8416956788859308, "grad_norm": 0.1721327304840088, "learning_rate": 0.0005055595626048399, "loss": 1.0872, "step": 16440 }, { "epoch": 0.8422076592258857, "grad_norm": 0.17623233795166016, "learning_rate": 0.000503576816859313, "loss": 1.0768, "step": 16450 }, { "epoch": 0.8427196395658406, "grad_norm": 0.1824178546667099, "learning_rate": 0.000501601847211762, "loss": 1.0773, "step": 16460 }, { "epoch": 0.8432316199057956, "grad_norm": 0.17492622137069702, "learning_rate": 0.0004996346231652357, "loss": 1.0751, "step": 16470 }, { "epoch": 0.8437436002457506, "grad_norm": 0.19331291317939758, "learning_rate": 0.0004976751143423888, "loss": 1.0522, "step": 16480 }, { "epoch": 0.8442555805857055, "grad_norm": 0.17318172752857208, "learning_rate": 0.0004957232904850122, "loss": 1.0611, "step": 16490 }, { "epoch": 0.8447675609256604, "grad_norm": 0.18951846659183502, "learning_rate": 0.0004937791214535661, "loss": 1.0584, "step": 16500 }, { "epoch": 0.8452795412656154, "grad_norm": 0.17713989317417145, "learning_rate": 0.0004918425772267145, "loss": 1.0542, "step": 16510 }, { "epoch": 0.8457915216055704, "grad_norm": 0.16759324073791504, "learning_rate": 0.0004899136279008613, "loss": 1.0689, "step": 16520 }, { "epoch": 0.8463035019455253, "grad_norm": 0.18664461374282837, "learning_rate": 0.000487992243689689, "loss": 1.0732, "step": 16530 }, { "epoch": 0.8468154822854802, "grad_norm": 0.17348751425743103, "learning_rate": 0.00048607839492369886, "loss": 1.0762, "step": 16540 }, { "epoch": 0.8473274626254352, "grad_norm": 0.17233343422412872, "learning_rate": 0.0004841720520497518, "loss": 1.0579, "step": 16550 }, { "epoch": 0.8478394429653902, "grad_norm": 0.18232837319374084, "learning_rate": 0.0004822731856306133, "loss": 1.0576, "step": 16560 }, { "epoch": 0.8483514233053451, "grad_norm": 0.17330168187618256, "learning_rate": 0.000480381766344498, "loss": 1.044, "step": 16570 }, { "epoch": 0.8488634036453, "grad_norm": 0.1745171695947647, "learning_rate": 0.00047849776498461725, "loss": 1.07, "step": 16580 }, { "epoch": 0.8493753839852549, "grad_norm": 0.1749190390110016, "learning_rate": 0.00047662115245872787, "loss": 1.0666, "step": 16590 }, { "epoch": 0.84988736432521, "grad_norm": 0.17629800736904144, "learning_rate": 0.0004747518997886834, "loss": 1.0694, "step": 16600 }, { "epoch": 0.8503993446651649, "grad_norm": 0.17141848802566528, "learning_rate": 0.00047288997810998585, "loss": 1.0752, "step": 16610 }, { "epoch": 0.8509113250051198, "grad_norm": 0.16317421197891235, "learning_rate": 0.00047103535867134064, "loss": 1.0575, "step": 16620 }, { "epoch": 0.8514233053450747, "grad_norm": 0.1698952317237854, "learning_rate": 0.0004691880128342126, "loss": 1.054, "step": 16630 }, { "epoch": 0.8519352856850297, "grad_norm": 0.17862023413181305, "learning_rate": 0.00046734791207238334, "loss": 1.0578, "step": 16640 }, { "epoch": 0.8524472660249847, "grad_norm": 0.17291221022605896, "learning_rate": 0.0004655150279715109, "loss": 1.0614, "step": 16650 }, { "epoch": 0.8529592463649396, "grad_norm": 0.18683776259422302, "learning_rate": 0.0004636893322286915, "loss": 1.0587, "step": 16660 }, { "epoch": 0.8534712267048945, "grad_norm": 0.17157678306102753, "learning_rate": 0.00046187079665202144, "loss": 1.0876, "step": 16670 }, { "epoch": 0.8539832070448494, "grad_norm": 0.16680538654327393, "learning_rate": 0.0004600593931601628, "loss": 1.0608, "step": 16680 }, { "epoch": 0.8544951873848045, "grad_norm": 0.17904032766819, "learning_rate": 0.00045825509378190934, "loss": 1.0622, "step": 16690 }, { "epoch": 0.8550071677247594, "grad_norm": 0.17377473413944244, "learning_rate": 0.0004564578706557547, "loss": 1.0761, "step": 16700 }, { "epoch": 0.8555191480647143, "grad_norm": 0.17606638371944427, "learning_rate": 0.0004546676960294617, "loss": 1.0627, "step": 16710 }, { "epoch": 0.8560311284046692, "grad_norm": 0.1655128300189972, "learning_rate": 0.0004528845422596346, "loss": 1.0579, "step": 16720 }, { "epoch": 0.8565431087446242, "grad_norm": 0.185993954539299, "learning_rate": 0.0004511083818112919, "loss": 1.0604, "step": 16730 }, { "epoch": 0.8570550890845792, "grad_norm": 0.18218767642974854, "learning_rate": 0.00044933918725744066, "loss": 1.0595, "step": 16740 }, { "epoch": 0.8575670694245341, "grad_norm": 0.16947178542613983, "learning_rate": 0.000447576931278654, "loss": 1.0494, "step": 16750 }, { "epoch": 0.858079049764489, "grad_norm": 0.17753495275974274, "learning_rate": 0.00044582158666264793, "loss": 1.0522, "step": 16760 }, { "epoch": 0.858591030104444, "grad_norm": 0.1756090372800827, "learning_rate": 0.0004440731263038627, "loss": 1.074, "step": 16770 }, { "epoch": 0.859103010444399, "grad_norm": 0.18287988007068634, "learning_rate": 0.00044233152320304276, "loss": 1.0883, "step": 16780 }, { "epoch": 0.8596149907843539, "grad_norm": 0.18234935402870178, "learning_rate": 0.0004405967504668205, "loss": 1.0481, "step": 16790 }, { "epoch": 0.8601269711243088, "grad_norm": 0.17408689856529236, "learning_rate": 0.0004388687813073016, "loss": 1.0672, "step": 16800 }, { "epoch": 0.8606389514642637, "grad_norm": 0.1746188998222351, "learning_rate": 0.00043714758904165, "loss": 1.0581, "step": 16810 }, { "epoch": 0.8611509318042188, "grad_norm": 0.17414236068725586, "learning_rate": 0.0004354331470916772, "loss": 1.0296, "step": 16820 }, { "epoch": 0.8616629121441737, "grad_norm": 0.17176198959350586, "learning_rate": 0.00043372542898343074, "loss": 1.048, "step": 16830 }, { "epoch": 0.8621748924841286, "grad_norm": 0.17366254329681396, "learning_rate": 0.0004320244083467865, "loss": 1.0584, "step": 16840 }, { "epoch": 0.8626868728240835, "grad_norm": 0.17431634664535522, "learning_rate": 0.0004303300589150403, "loss": 1.0747, "step": 16850 }, { "epoch": 0.8631988531640385, "grad_norm": 0.17983673512935638, "learning_rate": 0.0004286423545245033, "loss": 1.0477, "step": 16860 }, { "epoch": 0.8637108335039935, "grad_norm": 0.17973174154758453, "learning_rate": 0.00042696126911409766, "loss": 1.0733, "step": 16870 }, { "epoch": 0.8642228138439484, "grad_norm": 0.17209124565124512, "learning_rate": 0.0004252867767249536, "loss": 1.0553, "step": 16880 }, { "epoch": 0.8647347941839033, "grad_norm": 0.17548377811908722, "learning_rate": 0.0004236188515000098, "loss": 1.0317, "step": 16890 }, { "epoch": 0.8652467745238582, "grad_norm": 0.1856032758951187, "learning_rate": 0.0004219574676836124, "loss": 1.0645, "step": 16900 }, { "epoch": 0.8657587548638133, "grad_norm": 0.171828031539917, "learning_rate": 0.0004203025996211187, "loss": 1.0468, "step": 16910 }, { "epoch": 0.8662707352037682, "grad_norm": 0.1737641543149948, "learning_rate": 0.00041865422175850074, "loss": 1.0593, "step": 16920 }, { "epoch": 0.8667827155437231, "grad_norm": 0.17497050762176514, "learning_rate": 0.00041701230864194997, "loss": 1.0558, "step": 16930 }, { "epoch": 0.867294695883678, "grad_norm": 0.1742735356092453, "learning_rate": 0.00041537683491748515, "loss": 1.0524, "step": 16940 }, { "epoch": 0.8678066762236331, "grad_norm": 0.16955190896987915, "learning_rate": 0.00041374777533055996, "loss": 1.0734, "step": 16950 }, { "epoch": 0.868318656563588, "grad_norm": 0.17131267488002777, "learning_rate": 0.00041212510472567404, "loss": 1.047, "step": 16960 }, { "epoch": 0.8688306369035429, "grad_norm": 0.18686212599277496, "learning_rate": 0.00041050879804598354, "loss": 1.0628, "step": 16970 }, { "epoch": 0.8693426172434978, "grad_norm": 0.18018223345279694, "learning_rate": 0.0004088988303329146, "loss": 1.0727, "step": 16980 }, { "epoch": 0.8698545975834528, "grad_norm": 0.17378225922584534, "learning_rate": 0.00040729517672577834, "loss": 1.0608, "step": 16990 }, { "epoch": 0.8703665779234078, "grad_norm": 0.17299434542655945, "learning_rate": 0.0004056978124613862, "loss": 1.0572, "step": 17000 }, { "epoch": 0.8708785582633627, "grad_norm": 0.17272843420505524, "learning_rate": 0.0004041067128736684, "loss": 1.068, "step": 17010 }, { "epoch": 0.8713905386033176, "grad_norm": 0.17482733726501465, "learning_rate": 0.0004025218533932921, "loss": 1.0434, "step": 17020 }, { "epoch": 0.8719025189432725, "grad_norm": 0.17604181170463562, "learning_rate": 0.00040094320954728313, "loss": 1.0473, "step": 17030 }, { "epoch": 0.8724144992832276, "grad_norm": 0.17563997209072113, "learning_rate": 0.000399370756958647, "loss": 1.0326, "step": 17040 }, { "epoch": 0.8729264796231825, "grad_norm": 0.17245963215827942, "learning_rate": 0.00039780447134599286, "loss": 1.0473, "step": 17050 }, { "epoch": 0.8734384599631374, "grad_norm": 0.1761290282011032, "learning_rate": 0.00039624432852315933, "loss": 1.0521, "step": 17060 }, { "epoch": 0.8739504403030923, "grad_norm": 0.17559461295604706, "learning_rate": 0.0003946903043988396, "loss": 1.0499, "step": 17070 }, { "epoch": 0.8744624206430474, "grad_norm": 0.16970165073871613, "learning_rate": 0.00039314237497621053, "loss": 1.0653, "step": 17080 }, { "epoch": 0.8749744009830023, "grad_norm": 0.1792786717414856, "learning_rate": 0.00039160051635256165, "loss": 1.0554, "step": 17090 }, { "epoch": 0.8754863813229572, "grad_norm": 0.16863805055618286, "learning_rate": 0.0003900647047189262, "loss": 1.0524, "step": 17100 }, { "epoch": 0.8759983616629121, "grad_norm": 0.1794777661561966, "learning_rate": 0.0003885349163597133, "loss": 1.0741, "step": 17110 }, { "epoch": 0.876510342002867, "grad_norm": 0.1949402540922165, "learning_rate": 0.0003870111276523419, "loss": 1.0458, "step": 17120 }, { "epoch": 0.8770223223428221, "grad_norm": 0.17837046086788177, "learning_rate": 0.0003854933150668761, "loss": 1.0484, "step": 17130 }, { "epoch": 0.877534302682777, "grad_norm": 0.16682222485542297, "learning_rate": 0.00038398145516566133, "loss": 1.0643, "step": 17140 }, { "epoch": 0.8780462830227319, "grad_norm": 0.17241717875003815, "learning_rate": 0.00038247552460296324, "loss": 1.0561, "step": 17150 }, { "epoch": 0.8785582633626868, "grad_norm": 0.16557161509990692, "learning_rate": 0.00038097550012460626, "loss": 1.0614, "step": 17160 }, { "epoch": 0.8790702437026419, "grad_norm": 0.17597849667072296, "learning_rate": 0.00037948135856761536, "loss": 1.0541, "step": 17170 }, { "epoch": 0.8795822240425968, "grad_norm": 0.17368751764297485, "learning_rate": 0.00037799307685985786, "loss": 1.0482, "step": 17180 }, { "epoch": 0.8800942043825517, "grad_norm": 0.17278683185577393, "learning_rate": 0.00037651063201968706, "loss": 1.0493, "step": 17190 }, { "epoch": 0.8806061847225066, "grad_norm": 0.17373493313789368, "learning_rate": 0.00037503400115558816, "loss": 1.0547, "step": 17200 }, { "epoch": 0.8811181650624617, "grad_norm": 0.1761094480752945, "learning_rate": 0.0003735631614658236, "loss": 1.0476, "step": 17210 }, { "epoch": 0.8816301454024166, "grad_norm": 0.1749420464038849, "learning_rate": 0.00037209809023808216, "loss": 1.0313, "step": 17220 }, { "epoch": 0.8821421257423715, "grad_norm": 0.1756523847579956, "learning_rate": 0.0003706387648491272, "loss": 1.0551, "step": 17230 }, { "epoch": 0.8826541060823264, "grad_norm": 0.1767933964729309, "learning_rate": 0.0003691851627644478, "loss": 1.0385, "step": 17240 }, { "epoch": 0.8831660864222813, "grad_norm": 0.17991852760314941, "learning_rate": 0.00036773726153791126, "loss": 1.0534, "step": 17250 }, { "epoch": 0.8836780667622364, "grad_norm": 0.17097926139831543, "learning_rate": 0.00036629503881141533, "loss": 1.0424, "step": 17260 }, { "epoch": 0.8841900471021913, "grad_norm": 0.1836550533771515, "learning_rate": 0.00036485847231454427, "loss": 1.0627, "step": 17270 }, { "epoch": 0.8847020274421462, "grad_norm": 0.18745499849319458, "learning_rate": 0.00036342753986422373, "loss": 1.0475, "step": 17280 }, { "epoch": 0.8852140077821011, "grad_norm": 0.17117556929588318, "learning_rate": 0.00036200221936437925, "loss": 1.0457, "step": 17290 }, { "epoch": 0.8857259881220562, "grad_norm": 0.17555800080299377, "learning_rate": 0.0003605824888055944, "loss": 1.0505, "step": 17300 }, { "epoch": 0.8862379684620111, "grad_norm": 0.17367680370807648, "learning_rate": 0.00035916832626477105, "loss": 1.0433, "step": 17310 }, { "epoch": 0.886749948801966, "grad_norm": 0.16771985590457916, "learning_rate": 0.0003577597099047911, "loss": 1.0405, "step": 17320 }, { "epoch": 0.8872619291419209, "grad_norm": 0.17749017477035522, "learning_rate": 0.00035635661797417894, "loss": 1.0326, "step": 17330 }, { "epoch": 0.8877739094818758, "grad_norm": 0.1756659597158432, "learning_rate": 0.0003549590288067658, "loss": 1.0481, "step": 17340 }, { "epoch": 0.8882858898218309, "grad_norm": 0.17804957926273346, "learning_rate": 0.00035356692082135497, "loss": 1.0348, "step": 17350 }, { "epoch": 0.8887978701617858, "grad_norm": 0.17013497650623322, "learning_rate": 0.000352180272521389, "loss": 1.0444, "step": 17360 }, { "epoch": 0.8893098505017407, "grad_norm": 0.16462627053260803, "learning_rate": 0.000350799062494617, "loss": 1.0473, "step": 17370 }, { "epoch": 0.8898218308416956, "grad_norm": 0.18292909860610962, "learning_rate": 0.00034942326941276463, "loss": 1.0548, "step": 17380 }, { "epoch": 0.8903338111816507, "grad_norm": 0.16778182983398438, "learning_rate": 0.00034805287203120474, "loss": 1.0486, "step": 17390 }, { "epoch": 0.8908457915216056, "grad_norm": 0.17783689498901367, "learning_rate": 0.0003466878491886288, "loss": 1.0422, "step": 17400 }, { "epoch": 0.8913577718615605, "grad_norm": 0.17219282686710358, "learning_rate": 0.0003453281798067208, "loss": 1.036, "step": 17410 }, { "epoch": 0.8918697522015154, "grad_norm": 0.17862632870674133, "learning_rate": 0.00034397384288983114, "loss": 1.0441, "step": 17420 }, { "epoch": 0.8923817325414705, "grad_norm": 0.17450949549674988, "learning_rate": 0.00034262481752465293, "loss": 1.0629, "step": 17430 }, { "epoch": 0.8928937128814254, "grad_norm": 0.17378470301628113, "learning_rate": 0.00034128108287989866, "loss": 1.0322, "step": 17440 }, { "epoch": 0.8934056932213803, "grad_norm": 0.17379970848560333, "learning_rate": 0.00033994261820597885, "loss": 1.0553, "step": 17450 }, { "epoch": 0.8939176735613352, "grad_norm": 0.17971958220005035, "learning_rate": 0.00033860940283468143, "loss": 1.0532, "step": 17460 }, { "epoch": 0.8944296539012901, "grad_norm": 0.17435471713542938, "learning_rate": 0.0003372814161788526, "loss": 1.0289, "step": 17470 }, { "epoch": 0.8949416342412452, "grad_norm": 0.17900234460830688, "learning_rate": 0.00033595863773207914, "loss": 1.0407, "step": 17480 }, { "epoch": 0.8954536145812001, "grad_norm": 0.1703522503376007, "learning_rate": 0.00033464104706837144, "loss": 1.0505, "step": 17490 }, { "epoch": 0.895965594921155, "grad_norm": 0.1772749274969101, "learning_rate": 0.00033332862384184833, "loss": 1.0504, "step": 17500 }, { "epoch": 0.8964775752611099, "grad_norm": 0.19156505167484283, "learning_rate": 0.0003320213477864227, "loss": 1.0537, "step": 17510 }, { "epoch": 0.896989555601065, "grad_norm": 0.17889319360256195, "learning_rate": 0.00033071919871548877, "loss": 1.0371, "step": 17520 }, { "epoch": 0.8975015359410199, "grad_norm": 0.17776621878147125, "learning_rate": 0.0003294221565216104, "loss": 1.0498, "step": 17530 }, { "epoch": 0.8980135162809748, "grad_norm": 0.1731380671262741, "learning_rate": 0.0003281302011762101, "loss": 1.048, "step": 17540 }, { "epoch": 0.8985254966209297, "grad_norm": 0.17784886062145233, "learning_rate": 0.0003268433127292607, "loss": 1.0477, "step": 17550 }, { "epoch": 0.8990374769608847, "grad_norm": 0.17313584685325623, "learning_rate": 0.00032556147130897615, "loss": 1.0323, "step": 17560 }, { "epoch": 0.8995494573008397, "grad_norm": 0.17907077074050903, "learning_rate": 0.00032428465712150536, "loss": 1.0527, "step": 17570 }, { "epoch": 0.9000614376407946, "grad_norm": 0.1737951934337616, "learning_rate": 0.0003230128504506268, "loss": 1.036, "step": 17580 }, { "epoch": 0.9005734179807495, "grad_norm": 0.17653332650661469, "learning_rate": 0.00032174603165744314, "loss": 1.0478, "step": 17590 }, { "epoch": 0.9010853983207044, "grad_norm": 0.16936801373958588, "learning_rate": 0.00032048418118007897, "loss": 1.0452, "step": 17600 }, { "epoch": 0.9015973786606595, "grad_norm": 0.17044688761234283, "learning_rate": 0.00031922727953337794, "loss": 1.0433, "step": 17610 }, { "epoch": 0.9021093590006144, "grad_norm": 0.16897530853748322, "learning_rate": 0.0003179753073086024, "loss": 1.041, "step": 17620 }, { "epoch": 0.9026213393405693, "grad_norm": 0.17904484272003174, "learning_rate": 0.00031672824517313354, "loss": 1.0562, "step": 17630 }, { "epoch": 0.9031333196805242, "grad_norm": 0.1729121208190918, "learning_rate": 0.0003154860738701725, "loss": 1.0345, "step": 17640 }, { "epoch": 0.9036453000204792, "grad_norm": 0.17275741696357727, "learning_rate": 0.00031424877421844385, "loss": 1.0494, "step": 17650 }, { "epoch": 0.9041572803604342, "grad_norm": 0.16756050288677216, "learning_rate": 0.0003130163271118985, "loss": 1.0305, "step": 17660 }, { "epoch": 0.9046692607003891, "grad_norm": 0.17867998778820038, "learning_rate": 0.00031178871351941924, "loss": 1.045, "step": 17670 }, { "epoch": 0.905181241040344, "grad_norm": 0.17364557087421417, "learning_rate": 0.00031056591448452663, "loss": 1.0407, "step": 17680 }, { "epoch": 0.905693221380299, "grad_norm": 0.18060193955898285, "learning_rate": 0.0003093479111250863, "loss": 1.0404, "step": 17690 }, { "epoch": 0.906205201720254, "grad_norm": 0.17321224510669708, "learning_rate": 0.0003081346846330176, "loss": 1.0338, "step": 17700 }, { "epoch": 0.9067171820602089, "grad_norm": 0.1827027052640915, "learning_rate": 0.0003069262162740026, "loss": 1.0513, "step": 17710 }, { "epoch": 0.9072291624001638, "grad_norm": 0.17330406606197357, "learning_rate": 0.0003057224873871977, "loss": 1.0537, "step": 17720 }, { "epoch": 0.9077411427401187, "grad_norm": 0.1664852797985077, "learning_rate": 0.00030452347938494435, "loss": 1.0385, "step": 17730 }, { "epoch": 0.9082531230800738, "grad_norm": 0.1791536808013916, "learning_rate": 0.00030332917375248324, "loss": 1.0205, "step": 17740 }, { "epoch": 0.9087651034200287, "grad_norm": 0.168918177485466, "learning_rate": 0.0003021395520476674, "loss": 1.0278, "step": 17750 }, { "epoch": 0.9092770837599836, "grad_norm": 0.17502665519714355, "learning_rate": 0.00030095459590067796, "loss": 1.0533, "step": 17760 }, { "epoch": 0.9097890640999385, "grad_norm": 0.17242580652236938, "learning_rate": 0.00029977428701374024, "loss": 1.0465, "step": 17770 }, { "epoch": 0.9103010444398935, "grad_norm": 0.16884900629520416, "learning_rate": 0.0002985986071608414, "loss": 1.0553, "step": 17780 }, { "epoch": 0.9108130247798485, "grad_norm": 0.17999139428138733, "learning_rate": 0.00029742753818744894, "loss": 1.052, "step": 17790 }, { "epoch": 0.9113250051198034, "grad_norm": 0.19205188751220703, "learning_rate": 0.0002962610620102301, "loss": 1.0386, "step": 17800 }, { "epoch": 0.9118369854597583, "grad_norm": 0.17089873552322388, "learning_rate": 0.00029509916061677314, "loss": 1.0519, "step": 17810 }, { "epoch": 0.9123489657997133, "grad_norm": 0.1669624298810959, "learning_rate": 0.0002939418160653087, "loss": 1.045, "step": 17820 }, { "epoch": 0.9128609461396683, "grad_norm": 0.1757606416940689, "learning_rate": 0.000292789010484433, "loss": 1.0311, "step": 17830 }, { "epoch": 0.9133729264796232, "grad_norm": 0.1726016104221344, "learning_rate": 0.00029164072607283187, "loss": 1.0302, "step": 17840 }, { "epoch": 0.9138849068195781, "grad_norm": 0.17893843352794647, "learning_rate": 0.0002904969450990057, "loss": 1.0236, "step": 17850 }, { "epoch": 0.914396887159533, "grad_norm": 0.17613349854946136, "learning_rate": 0.00028935764990099594, "loss": 1.0467, "step": 17860 }, { "epoch": 0.914908867499488, "grad_norm": 0.1762663722038269, "learning_rate": 0.00028822282288611204, "loss": 1.0143, "step": 17870 }, { "epoch": 0.915420847839443, "grad_norm": 0.17385472357273102, "learning_rate": 0.00028709244653066, "loss": 1.0373, "step": 17880 }, { "epoch": 0.9159328281793979, "grad_norm": 0.173353374004364, "learning_rate": 0.0002859665033796716, "loss": 1.0231, "step": 17890 }, { "epoch": 0.9164448085193528, "grad_norm": 0.1739385724067688, "learning_rate": 0.0002848449760466353, "loss": 1.0174, "step": 17900 }, { "epoch": 0.9169567888593078, "grad_norm": 0.17758533358573914, "learning_rate": 0.000283727847213227, "loss": 1.0271, "step": 17910 }, { "epoch": 0.9174687691992628, "grad_norm": 0.17424450814723969, "learning_rate": 0.00028261509962904325, "loss": 1.0464, "step": 17920 }, { "epoch": 0.9179807495392177, "grad_norm": 0.18018485605716705, "learning_rate": 0.0002815067161113347, "loss": 1.0379, "step": 17930 }, { "epoch": 0.9184927298791726, "grad_norm": 0.18166567385196686, "learning_rate": 0.0002804026795447407, "loss": 1.0364, "step": 17940 }, { "epoch": 0.9190047102191276, "grad_norm": 0.17235900461673737, "learning_rate": 0.00027930297288102513, "loss": 1.052, "step": 17950 }, { "epoch": 0.9195166905590826, "grad_norm": 0.17493902146816254, "learning_rate": 0.000278207579138813, "loss": 1.0377, "step": 17960 }, { "epoch": 0.9200286708990375, "grad_norm": 0.17957419157028198, "learning_rate": 0.0002771164814033282, "loss": 1.0392, "step": 17970 }, { "epoch": 0.9205406512389924, "grad_norm": 0.178439199924469, "learning_rate": 0.00027602966282613264, "loss": 1.0333, "step": 17980 }, { "epoch": 0.9210526315789473, "grad_norm": 0.17528565227985382, "learning_rate": 0.0002749471066248655, "loss": 1.035, "step": 17990 }, { "epoch": 0.9215646119189023, "grad_norm": 0.18786676228046417, "learning_rate": 0.0002738687960829849, "loss": 1.0263, "step": 18000 }, { "epoch": 0.9220765922588573, "grad_norm": 0.18565250933170319, "learning_rate": 0.00027279471454950873, "loss": 1.0266, "step": 18010 }, { "epoch": 0.9225885725988122, "grad_norm": 0.17576780915260315, "learning_rate": 0.00027172484543875865, "loss": 1.0472, "step": 18020 }, { "epoch": 0.9231005529387671, "grad_norm": 0.17549046874046326, "learning_rate": 0.00027065917223010303, "loss": 1.0357, "step": 18030 }, { "epoch": 0.9236125332787221, "grad_norm": 0.17524850368499756, "learning_rate": 0.00026959767846770227, "loss": 1.0194, "step": 18040 }, { "epoch": 0.9241245136186771, "grad_norm": 0.18681474030017853, "learning_rate": 0.00026854034776025495, "loss": 1.0406, "step": 18050 }, { "epoch": 0.924636493958632, "grad_norm": 0.1830626130104065, "learning_rate": 0.000267487163780744, "loss": 1.0445, "step": 18060 }, { "epoch": 0.9251484742985869, "grad_norm": 0.1787140816450119, "learning_rate": 0.00026643811026618537, "loss": 1.0365, "step": 18070 }, { "epoch": 0.9256604546385419, "grad_norm": 0.1781841665506363, "learning_rate": 0.00026539317101737637, "loss": 1.0278, "step": 18080 }, { "epoch": 0.9261724349784968, "grad_norm": 0.18114568293094635, "learning_rate": 0.00026435232989864576, "loss": 1.0273, "step": 18090 }, { "epoch": 0.9266844153184518, "grad_norm": 0.18065612018108368, "learning_rate": 0.0002633155708376045, "loss": 1.0435, "step": 18100 }, { "epoch": 0.9271963956584067, "grad_norm": 0.17828424274921417, "learning_rate": 0.0002622828778248974, "loss": 1.0103, "step": 18110 }, { "epoch": 0.9277083759983616, "grad_norm": 0.17807289958000183, "learning_rate": 0.0002612542349139565, "loss": 1.0437, "step": 18120 }, { "epoch": 0.9282203563383166, "grad_norm": 0.17496445775032043, "learning_rate": 0.0002602296262207541, "loss": 1.0219, "step": 18130 }, { "epoch": 0.9287323366782716, "grad_norm": 0.17806415259838104, "learning_rate": 0.00025920903592355785, "loss": 1.0256, "step": 18140 }, { "epoch": 0.9292443170182265, "grad_norm": 0.17231720685958862, "learning_rate": 0.00025819244826268654, "loss": 1.0487, "step": 18150 }, { "epoch": 0.9297562973581814, "grad_norm": 0.18158575892448425, "learning_rate": 0.00025717984754026655, "loss": 1.0258, "step": 18160 }, { "epoch": 0.9302682776981364, "grad_norm": 0.17217537760734558, "learning_rate": 0.0002561712181199894, "loss": 1.012, "step": 18170 }, { "epoch": 0.9307802580380914, "grad_norm": 0.16844135522842407, "learning_rate": 0.0002551665444268703, "loss": 1.0449, "step": 18180 }, { "epoch": 0.9312922383780463, "grad_norm": 0.17478111386299133, "learning_rate": 0.0002541658109470081, "loss": 1.0357, "step": 18190 }, { "epoch": 0.9318042187180012, "grad_norm": 0.17291343212127686, "learning_rate": 0.00025316900222734496, "loss": 1.0406, "step": 18200 }, { "epoch": 0.9323161990579562, "grad_norm": 0.17205969989299774, "learning_rate": 0.00025217610287542845, "loss": 1.0263, "step": 18210 }, { "epoch": 0.9328281793979111, "grad_norm": 0.17579463124275208, "learning_rate": 0.0002511870975591733, "loss": 1.0487, "step": 18220 }, { "epoch": 0.9333401597378661, "grad_norm": 0.185591459274292, "learning_rate": 0.00025020197100662507, "loss": 1.0289, "step": 18230 }, { "epoch": 0.933852140077821, "grad_norm": 0.18697933852672577, "learning_rate": 0.0002492207080057241, "loss": 1.0445, "step": 18240 }, { "epoch": 0.934364120417776, "grad_norm": 0.1702352613210678, "learning_rate": 0.00024824329340407056, "loss": 1.017, "step": 18250 }, { "epoch": 0.9348761007577309, "grad_norm": 0.17386525869369507, "learning_rate": 0.0002472697121086907, "loss": 1.0265, "step": 18260 }, { "epoch": 0.9353880810976859, "grad_norm": 0.17194058001041412, "learning_rate": 0.0002462999490858035, "loss": 1.0305, "step": 18270 }, { "epoch": 0.9359000614376408, "grad_norm": 0.17600733041763306, "learning_rate": 0.00024533398936058893, "loss": 1.0161, "step": 18280 }, { "epoch": 0.9364120417775957, "grad_norm": 0.17031820118427277, "learning_rate": 0.0002443718180169563, "loss": 1.0435, "step": 18290 }, { "epoch": 0.9369240221175507, "grad_norm": 0.17277632653713226, "learning_rate": 0.00024341342019731398, "loss": 1.0321, "step": 18300 }, { "epoch": 0.9374360024575056, "grad_norm": 0.17314958572387695, "learning_rate": 0.00024245878110234033, "loss": 1.0419, "step": 18310 }, { "epoch": 0.9379479827974606, "grad_norm": 0.17943693697452545, "learning_rate": 0.0002415078859907547, "loss": 1.0455, "step": 18320 }, { "epoch": 0.9384599631374155, "grad_norm": 0.17218518257141113, "learning_rate": 0.00024056072017909026, "loss": 1.0174, "step": 18330 }, { "epoch": 0.9389719434773705, "grad_norm": 0.1672009378671646, "learning_rate": 0.0002396172690414667, "loss": 1.0304, "step": 18340 }, { "epoch": 0.9394839238173254, "grad_norm": 0.16872192919254303, "learning_rate": 0.00023867751800936513, "loss": 1.0334, "step": 18350 }, { "epoch": 0.9399959041572804, "grad_norm": 0.17709334194660187, "learning_rate": 0.0002377414525714023, "loss": 1.043, "step": 18360 }, { "epoch": 0.9405078844972353, "grad_norm": 0.17235656082630157, "learning_rate": 0.00023680905827310717, "loss": 1.0296, "step": 18370 }, { "epoch": 0.9410198648371902, "grad_norm": 0.17677216231822968, "learning_rate": 0.0002358803207166974, "loss": 1.0304, "step": 18380 }, { "epoch": 0.9415318451771452, "grad_norm": 0.17921361327171326, "learning_rate": 0.00023495522556085693, "loss": 1.0287, "step": 18390 }, { "epoch": 0.9420438255171002, "grad_norm": 0.18774552643299103, "learning_rate": 0.0002340337585205149, "loss": 1.0303, "step": 18400 }, { "epoch": 0.9425558058570551, "grad_norm": 0.1885557323694229, "learning_rate": 0.00023311590536662463, "loss": 1.0225, "step": 18410 }, { "epoch": 0.94306778619701, "grad_norm": 0.17091277241706848, "learning_rate": 0.00023220165192594432, "loss": 1.0216, "step": 18420 }, { "epoch": 0.943579766536965, "grad_norm": 0.17530862987041473, "learning_rate": 0.00023129098408081777, "loss": 1.0303, "step": 18430 }, { "epoch": 0.9440917468769199, "grad_norm": 0.17937549948692322, "learning_rate": 0.00023038388776895662, "loss": 1.0234, "step": 18440 }, { "epoch": 0.9446037272168749, "grad_norm": 0.1720314472913742, "learning_rate": 0.00022948034898322335, "loss": 1.0304, "step": 18450 }, { "epoch": 0.9451157075568298, "grad_norm": 0.1731894463300705, "learning_rate": 0.00022858035377141452, "loss": 1.021, "step": 18460 }, { "epoch": 0.9456276878967848, "grad_norm": 0.17468558251857758, "learning_rate": 0.00022768388823604584, "loss": 1.0224, "step": 18470 }, { "epoch": 0.9461396682367397, "grad_norm": 0.17135438323020935, "learning_rate": 0.00022679093853413717, "loss": 1.0392, "step": 18480 }, { "epoch": 0.9466516485766947, "grad_norm": 0.1784532517194748, "learning_rate": 0.00022590149087699918, "loss": 1.0183, "step": 18490 }, { "epoch": 0.9471636289166496, "grad_norm": 0.18522332608699799, "learning_rate": 0.00022501553153001985, "loss": 1.0361, "step": 18500 }, { "epoch": 0.9476756092566045, "grad_norm": 0.18401268124580383, "learning_rate": 0.00022413304681245284, "loss": 1.0329, "step": 18510 }, { "epoch": 0.9481875895965595, "grad_norm": 0.16760528087615967, "learning_rate": 0.00022325402309720624, "loss": 1.0199, "step": 18520 }, { "epoch": 0.9486995699365144, "grad_norm": 0.18120263516902924, "learning_rate": 0.00022237844681063175, "loss": 1.0252, "step": 18530 }, { "epoch": 0.9492115502764694, "grad_norm": 0.1899506002664566, "learning_rate": 0.00022150630443231562, "loss": 1.0064, "step": 18540 }, { "epoch": 0.9497235306164243, "grad_norm": 0.1819719672203064, "learning_rate": 0.00022063758249486932, "loss": 1.0246, "step": 18550 }, { "epoch": 0.9502355109563793, "grad_norm": 0.17660754919052124, "learning_rate": 0.00021977226758372213, "loss": 1.0305, "step": 18560 }, { "epoch": 0.9507474912963342, "grad_norm": 0.17415086925029755, "learning_rate": 0.00021891034633691347, "loss": 1.0369, "step": 18570 }, { "epoch": 0.9512594716362892, "grad_norm": 0.17310403287410736, "learning_rate": 0.00021805180544488684, "loss": 1.0272, "step": 18580 }, { "epoch": 0.9517714519762441, "grad_norm": 0.17484420537948608, "learning_rate": 0.0002171966316502845, "loss": 1.028, "step": 18590 }, { "epoch": 0.952283432316199, "grad_norm": 0.18543212115764618, "learning_rate": 0.00021634481174774217, "loss": 1.0296, "step": 18600 }, { "epoch": 0.952795412656154, "grad_norm": 0.1763850450515747, "learning_rate": 0.00021549633258368582, "loss": 1.0307, "step": 18610 }, { "epoch": 0.953307392996109, "grad_norm": 0.16824059188365936, "learning_rate": 0.00021465118105612805, "loss": 1.0206, "step": 18620 }, { "epoch": 0.9538193733360639, "grad_norm": 0.17931176722049713, "learning_rate": 0.00021380934411446574, "loss": 1.016, "step": 18630 }, { "epoch": 0.9543313536760188, "grad_norm": 0.18147091567516327, "learning_rate": 0.00021297080875927913, "loss": 1.0211, "step": 18640 }, { "epoch": 0.9548433340159738, "grad_norm": 0.18163631856441498, "learning_rate": 0.00021213556204213033, "loss": 1.0263, "step": 18650 }, { "epoch": 0.9553553143559287, "grad_norm": 0.17591601610183716, "learning_rate": 0.00021130359106536384, "loss": 1.0417, "step": 18660 }, { "epoch": 0.9558672946958837, "grad_norm": 0.17677730321884155, "learning_rate": 0.00021047488298190723, "loss": 1.0299, "step": 18670 }, { "epoch": 0.9563792750358386, "grad_norm": 0.17326125502586365, "learning_rate": 0.0002096494249950729, "loss": 1.0268, "step": 18680 }, { "epoch": 0.9568912553757936, "grad_norm": 0.1793946474790573, "learning_rate": 0.00020882720435836026, "loss": 1.0355, "step": 18690 }, { "epoch": 0.9574032357157485, "grad_norm": 0.1703524887561798, "learning_rate": 0.00020800820837525892, "loss": 1.005, "step": 18700 }, { "epoch": 0.9579152160557035, "grad_norm": 0.17965586483478546, "learning_rate": 0.000207192424399053, "loss": 1.0182, "step": 18710 }, { "epoch": 0.9584271963956584, "grad_norm": 0.16650822758674622, "learning_rate": 0.00020637983983262526, "loss": 1.0304, "step": 18720 }, { "epoch": 0.9589391767356134, "grad_norm": 0.1700984239578247, "learning_rate": 0.00020557044212826323, "loss": 1.0103, "step": 18730 }, { "epoch": 0.9594511570755683, "grad_norm": 0.18094299733638763, "learning_rate": 0.0002047642187874647, "loss": 1.0247, "step": 18740 }, { "epoch": 0.9599631374155232, "grad_norm": 0.16972561180591583, "learning_rate": 0.0002039611573607455, "loss": 1.0328, "step": 18750 }, { "epoch": 0.9604751177554782, "grad_norm": 0.1718764752149582, "learning_rate": 0.0002031612454474467, "loss": 1.0015, "step": 18760 }, { "epoch": 0.9609870980954331, "grad_norm": 0.17211291193962097, "learning_rate": 0.00020236447069554324, "loss": 1.0485, "step": 18770 }, { "epoch": 0.9614990784353881, "grad_norm": 0.17325459420681, "learning_rate": 0.00020157082080145356, "loss": 1.0122, "step": 18780 }, { "epoch": 0.962011058775343, "grad_norm": 0.1677115559577942, "learning_rate": 0.00020078028350984888, "loss": 1.0144, "step": 18790 }, { "epoch": 0.962523039115298, "grad_norm": 0.17302511632442474, "learning_rate": 0.00019999284661346487, "loss": 1.0247, "step": 18800 }, { "epoch": 0.9630350194552529, "grad_norm": 0.1713932901620865, "learning_rate": 0.00019920849795291223, "loss": 1.0135, "step": 18810 }, { "epoch": 0.9635469997952079, "grad_norm": 0.1779249906539917, "learning_rate": 0.00019842722541648977, "loss": 1.0166, "step": 18820 }, { "epoch": 0.9640589801351628, "grad_norm": 0.17072229087352753, "learning_rate": 0.00019764901693999665, "loss": 1.0214, "step": 18830 }, { "epoch": 0.9645709604751177, "grad_norm": 0.17682915925979614, "learning_rate": 0.00019687386050654655, "loss": 1.0412, "step": 18840 }, { "epoch": 0.9650829408150727, "grad_norm": 0.17209376394748688, "learning_rate": 0.00019610174414638203, "loss": 1.0139, "step": 18850 }, { "epoch": 0.9655949211550277, "grad_norm": 0.16988667845726013, "learning_rate": 0.0001953326559366896, "loss": 1.03, "step": 18860 }, { "epoch": 0.9661069014949826, "grad_norm": 0.17056208848953247, "learning_rate": 0.0001945665840014157, "loss": 1.0335, "step": 18870 }, { "epoch": 0.9666188818349375, "grad_norm": 0.17054276168346405, "learning_rate": 0.0001938035165110831, "loss": 1.0281, "step": 18880 }, { "epoch": 0.9671308621748925, "grad_norm": 0.17490647733211517, "learning_rate": 0.00019304344168260865, "loss": 1.0401, "step": 18890 }, { "epoch": 0.9676428425148474, "grad_norm": 0.17823657393455505, "learning_rate": 0.00019228634777912089, "loss": 1.0225, "step": 18900 }, { "epoch": 0.9681548228548024, "grad_norm": 0.1651022583246231, "learning_rate": 0.00019153222310977906, "loss": 1.0088, "step": 18910 }, { "epoch": 0.9686668031947573, "grad_norm": 0.18135780096054077, "learning_rate": 0.00019078105602959264, "loss": 1.0289, "step": 18920 }, { "epoch": 0.9691787835347123, "grad_norm": 0.17016355693340302, "learning_rate": 0.00019003283493924117, "loss": 1.0111, "step": 18930 }, { "epoch": 0.9696907638746672, "grad_norm": 0.17754383385181427, "learning_rate": 0.00018928754828489555, "loss": 1.0291, "step": 18940 }, { "epoch": 0.9702027442146222, "grad_norm": 0.16962246596813202, "learning_rate": 0.00018854518455803946, "loss": 1.0228, "step": 18950 }, { "epoch": 0.9707147245545771, "grad_norm": 0.17820075154304504, "learning_rate": 0.00018780573229529142, "loss": 1.0231, "step": 18960 }, { "epoch": 0.971226704894532, "grad_norm": 0.16597416996955872, "learning_rate": 0.00018706918007822834, "loss": 1.0327, "step": 18970 }, { "epoch": 0.971738685234487, "grad_norm": 0.17721499502658844, "learning_rate": 0.00018633551653320852, "loss": 1.0084, "step": 18980 }, { "epoch": 0.972250665574442, "grad_norm": 0.17141114175319672, "learning_rate": 0.0001856047303311967, "loss": 1.0361, "step": 18990 }, { "epoch": 0.9727626459143969, "grad_norm": 0.17473644018173218, "learning_rate": 0.0001848768101875884, "loss": 1.0051, "step": 19000 }, { "epoch": 0.9732746262543518, "grad_norm": 0.17746561765670776, "learning_rate": 0.00018415174486203638, "loss": 1.0266, "step": 19010 }, { "epoch": 0.9737866065943068, "grad_norm": 0.16750702261924744, "learning_rate": 0.00018342952315827656, "loss": 1.0282, "step": 19020 }, { "epoch": 0.9742985869342617, "grad_norm": 0.1748443841934204, "learning_rate": 0.00018271013392395522, "loss": 1.0183, "step": 19030 }, { "epoch": 0.9748105672742167, "grad_norm": 0.17715822160243988, "learning_rate": 0.0001819935660504572, "loss": 1.0145, "step": 19040 }, { "epoch": 0.9753225476141716, "grad_norm": 0.17972363531589508, "learning_rate": 0.0001812798084727336, "loss": 1.0069, "step": 19050 }, { "epoch": 0.9758345279541265, "grad_norm": 0.17496472597122192, "learning_rate": 0.00018056885016913175, "loss": 1.0074, "step": 19060 }, { "epoch": 0.9763465082940815, "grad_norm": 0.18323951959609985, "learning_rate": 0.00017986068016122433, "loss": 1.0487, "step": 19070 }, { "epoch": 0.9768584886340365, "grad_norm": 0.16890741884708405, "learning_rate": 0.00017915528751364033, "loss": 1.0153, "step": 19080 }, { "epoch": 0.9773704689739914, "grad_norm": 0.17116831243038177, "learning_rate": 0.0001784526613338959, "loss": 1.0132, "step": 19090 }, { "epoch": 0.9778824493139463, "grad_norm": 0.17036503553390503, "learning_rate": 0.00017775279077222617, "loss": 1.0228, "step": 19100 }, { "epoch": 0.9783944296539013, "grad_norm": 0.17859075963497162, "learning_rate": 0.00017705566502141802, "loss": 1.0123, "step": 19110 }, { "epoch": 0.9789064099938563, "grad_norm": 0.17719532549381256, "learning_rate": 0.00017636127331664266, "loss": 1.0385, "step": 19120 }, { "epoch": 0.9794183903338112, "grad_norm": 0.17673194408416748, "learning_rate": 0.00017566960493528995, "loss": 1.0224, "step": 19130 }, { "epoch": 0.9799303706737661, "grad_norm": 0.1806950718164444, "learning_rate": 0.00017498064919680242, "loss": 1.0111, "step": 19140 }, { "epoch": 0.9804423510137211, "grad_norm": 0.16843082010746002, "learning_rate": 0.00017429439546251066, "loss": 1.0059, "step": 19150 }, { "epoch": 0.980954331353676, "grad_norm": 0.17275168001651764, "learning_rate": 0.00017361083313546875, "loss": 1.037, "step": 19160 }, { "epoch": 0.981466311693631, "grad_norm": 0.17250047624111176, "learning_rate": 0.0001729299516602907, "loss": 1.0193, "step": 19170 }, { "epoch": 0.9819782920335859, "grad_norm": 0.17009197175502777, "learning_rate": 0.00017225174052298777, "loss": 1.0412, "step": 19180 }, { "epoch": 0.9824902723735408, "grad_norm": 0.16845643520355225, "learning_rate": 0.0001715761892508056, "loss": 1.0268, "step": 19190 }, { "epoch": 0.9830022527134958, "grad_norm": 0.16763417422771454, "learning_rate": 0.0001709032874120629, "loss": 1.0425, "step": 19200 }, { "epoch": 0.9835142330534508, "grad_norm": 0.1747148334980011, "learning_rate": 0.00017023302461599015, "loss": 1.0228, "step": 19210 }, { "epoch": 0.9840262133934057, "grad_norm": 0.17626087367534637, "learning_rate": 0.0001695653905125693, "loss": 1.0142, "step": 19220 }, { "epoch": 0.9845381937333606, "grad_norm": 0.17711155116558075, "learning_rate": 0.00016890037479237377, "loss": 1.0238, "step": 19230 }, { "epoch": 0.9850501740733156, "grad_norm": 0.1858174353837967, "learning_rate": 0.00016823796718640937, "loss": 1.033, "step": 19240 }, { "epoch": 0.9855621544132706, "grad_norm": 0.1855236142873764, "learning_rate": 0.0001675781574659558, "loss": 1.0276, "step": 19250 }, { "epoch": 0.9860741347532255, "grad_norm": 0.16916634142398834, "learning_rate": 0.0001669209354424084, "loss": 1.0208, "step": 19260 }, { "epoch": 0.9865861150931804, "grad_norm": 0.18142545223236084, "learning_rate": 0.00016626629096712137, "loss": 1.0302, "step": 19270 }, { "epoch": 0.9870980954331353, "grad_norm": 0.16748617589473724, "learning_rate": 0.00016561421393125036, "loss": 1.0244, "step": 19280 }, { "epoch": 0.9876100757730903, "grad_norm": 0.180519700050354, "learning_rate": 0.000164964694265597, "loss": 1.0009, "step": 19290 }, { "epoch": 0.9881220561130453, "grad_norm": 0.16856172680854797, "learning_rate": 0.00016431772194045298, "loss": 1.009, "step": 19300 }, { "epoch": 0.9886340364530002, "grad_norm": 0.17907920479774475, "learning_rate": 0.00016367328696544536, "loss": 1.0182, "step": 19310 }, { "epoch": 0.9891460167929551, "grad_norm": 0.18012414872646332, "learning_rate": 0.00016303137938938238, "loss": 1.0238, "step": 19320 }, { "epoch": 0.9896579971329101, "grad_norm": 0.17940422892570496, "learning_rate": 0.0001623919893000996, "loss": 1.035, "step": 19330 }, { "epoch": 0.9901699774728651, "grad_norm": 0.17534732818603516, "learning_rate": 0.00016175510682430694, "loss": 1.0282, "step": 19340 }, { "epoch": 0.99068195781282, "grad_norm": 0.17742076516151428, "learning_rate": 0.0001611207221274363, "loss": 1.0308, "step": 19350 }, { "epoch": 0.9911939381527749, "grad_norm": 0.174584299325943, "learning_rate": 0.00016048882541348943, "loss": 1.0433, "step": 19360 }, { "epoch": 0.9917059184927299, "grad_norm": 0.17817029356956482, "learning_rate": 0.00015985940692488709, "loss": 1.0088, "step": 19370 }, { "epoch": 0.9922178988326849, "grad_norm": 0.1764860898256302, "learning_rate": 0.00015923245694231792, "loss": 1.0051, "step": 19380 }, { "epoch": 0.9927298791726398, "grad_norm": 0.1679990142583847, "learning_rate": 0.00015860796578458873, "loss": 1.0383, "step": 19390 }, { "epoch": 0.9932418595125947, "grad_norm": 0.17141203582286835, "learning_rate": 0.00015798592380847468, "loss": 1.0367, "step": 19400 }, { "epoch": 0.9937538398525496, "grad_norm": 0.17301303148269653, "learning_rate": 0.00015736632140857067, "loss": 1.0227, "step": 19410 }, { "epoch": 0.9942658201925046, "grad_norm": 0.17585515975952148, "learning_rate": 0.00015674914901714278, "loss": 1.0373, "step": 19420 }, { "epoch": 0.9947778005324596, "grad_norm": 0.17036980390548706, "learning_rate": 0.0001561343971039807, "loss": 1.0025, "step": 19430 }, { "epoch": 0.9952897808724145, "grad_norm": 0.1802191138267517, "learning_rate": 0.00015552205617625053, "loss": 1.0378, "step": 19440 }, { "epoch": 0.9958017612123694, "grad_norm": 0.17641904950141907, "learning_rate": 0.000154912116778348, "loss": 1.0317, "step": 19450 }, { "epoch": 0.9963137415523244, "grad_norm": 0.18595443665981293, "learning_rate": 0.0001543045694917528, "loss": 1.0081, "step": 19460 }, { "epoch": 0.9968257218922794, "grad_norm": 0.17444072663784027, "learning_rate": 0.0001536994049348828, "loss": 1.0242, "step": 19470 }, { "epoch": 0.9973377022322343, "grad_norm": 0.17894035577774048, "learning_rate": 0.00015309661376294953, "loss": 1.0269, "step": 19480 }, { "epoch": 0.9978496825721892, "grad_norm": 0.17125560343265533, "learning_rate": 0.00015249618666781352, "loss": 1.0189, "step": 19490 }, { "epoch": 0.9983616629121441, "grad_norm": 0.1681634485721588, "learning_rate": 0.0001518981143778408, "loss": 1.0014, "step": 19500 }, { "epoch": 0.9988736432520992, "grad_norm": 0.17360231280326843, "learning_rate": 0.0001513023876577597, "loss": 1.0033, "step": 19510 }, { "epoch": 0.9993856235920541, "grad_norm": 0.17242667078971863, "learning_rate": 0.00015070899730851815, "loss": 1.0236, "step": 19520 }, { "epoch": 0.999897603932009, "grad_norm": 0.16095665097236633, "learning_rate": 0.0001501179341671418, "loss": 1.0393, "step": 19530 } ], "logging_steps": 10, "max_steps": 19532, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8288897328545792e+17, "train_batch_size": 512, "trial_name": null, "trial_params": null }