{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9998487674547563, "eval_steps": 500, "global_step": 29754, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1782.354232835545, "learning_rate": 3.3602150537634413e-09, "loss": 17.1486, "step": 1 }, { "epoch": 0.0, "grad_norm": 1724.0339589676632, "learning_rate": 1.6801075268817205e-08, "loss": 16.7835, "step": 5 }, { "epoch": 0.0, "grad_norm": 1700.514961570837, "learning_rate": 3.360215053763441e-08, "loss": 16.6795, "step": 10 }, { "epoch": 0.0, "grad_norm": 1570.7438431775615, "learning_rate": 5.040322580645161e-08, "loss": 16.2985, "step": 15 }, { "epoch": 0.0, "grad_norm": 1602.1277018435649, "learning_rate": 6.720430107526882e-08, "loss": 16.4868, "step": 20 }, { "epoch": 0.0, "grad_norm": 1499.0619348435034, "learning_rate": 8.400537634408603e-08, "loss": 15.7543, "step": 25 }, { "epoch": 0.0, "grad_norm": 1438.121413693714, "learning_rate": 1.0080645161290322e-07, "loss": 14.2784, "step": 30 }, { "epoch": 0.0, "grad_norm": 1205.847939284157, "learning_rate": 1.1760752688172043e-07, "loss": 13.4497, "step": 35 }, { "epoch": 0.0, "grad_norm": 1081.138946537131, "learning_rate": 1.3440860215053764e-07, "loss": 11.4945, "step": 40 }, { "epoch": 0.0, "grad_norm": 1122.9535780227677, "learning_rate": 1.5120967741935485e-07, "loss": 10.4741, "step": 45 }, { "epoch": 0.01, "grad_norm": 915.8074736571381, "learning_rate": 1.6801075268817206e-07, "loss": 9.3298, "step": 50 }, { "epoch": 0.01, "grad_norm": 485.2026272196928, "learning_rate": 1.8481182795698927e-07, "loss": 7.5624, "step": 55 }, { "epoch": 0.01, "grad_norm": 413.4948329618694, "learning_rate": 2.0161290322580645e-07, "loss": 6.5282, "step": 60 }, { "epoch": 0.01, "grad_norm": 341.22189353250485, "learning_rate": 2.1841397849462368e-07, "loss": 5.8548, "step": 65 }, { "epoch": 0.01, "grad_norm": 314.30197693693736, "learning_rate": 2.3521505376344087e-07, "loss": 5.4052, "step": 70 }, { "epoch": 0.01, "grad_norm": 219.09034450096308, "learning_rate": 2.520161290322581e-07, "loss": 4.9707, "step": 75 }, { "epoch": 0.01, "grad_norm": 243.8000015899658, "learning_rate": 2.688172043010753e-07, "loss": 4.7539, "step": 80 }, { "epoch": 0.01, "grad_norm": 158.48368358483174, "learning_rate": 2.856182795698925e-07, "loss": 4.4491, "step": 85 }, { "epoch": 0.01, "grad_norm": 177.99221919611344, "learning_rate": 3.024193548387097e-07, "loss": 4.0891, "step": 90 }, { "epoch": 0.01, "grad_norm": 147.33085776107916, "learning_rate": 3.192204301075269e-07, "loss": 3.8303, "step": 95 }, { "epoch": 0.01, "grad_norm": 118.84914037685353, "learning_rate": 3.360215053763441e-07, "loss": 3.5773, "step": 100 }, { "epoch": 0.01, "grad_norm": 115.65208604223525, "learning_rate": 3.528225806451614e-07, "loss": 3.2914, "step": 105 }, { "epoch": 0.01, "grad_norm": 117.31300158152648, "learning_rate": 3.6962365591397853e-07, "loss": 2.9765, "step": 110 }, { "epoch": 0.01, "grad_norm": 105.60650363440705, "learning_rate": 3.8642473118279574e-07, "loss": 3.0645, "step": 115 }, { "epoch": 0.01, "grad_norm": 97.2173603475482, "learning_rate": 4.032258064516129e-07, "loss": 2.6624, "step": 120 }, { "epoch": 0.01, "grad_norm": 77.426266243747, "learning_rate": 4.200268817204301e-07, "loss": 2.689, "step": 125 }, { "epoch": 0.01, "grad_norm": 76.25148368706107, "learning_rate": 4.3682795698924737e-07, "loss": 2.581, "step": 130 }, { "epoch": 0.01, "grad_norm": 60.52769003302683, "learning_rate": 4.536290322580646e-07, "loss": 2.49, "step": 135 }, { "epoch": 0.01, "grad_norm": 50.57180850107596, "learning_rate": 4.7043010752688173e-07, "loss": 2.4519, "step": 140 }, { "epoch": 0.01, "grad_norm": 62.69948504309535, "learning_rate": 4.872311827956989e-07, "loss": 2.3989, "step": 145 }, { "epoch": 0.02, "grad_norm": 47.631212194728114, "learning_rate": 5.040322580645161e-07, "loss": 2.2853, "step": 150 }, { "epoch": 0.02, "grad_norm": 42.17283021141972, "learning_rate": 5.208333333333334e-07, "loss": 2.3543, "step": 155 }, { "epoch": 0.02, "grad_norm": 34.08359958874836, "learning_rate": 5.376344086021506e-07, "loss": 2.1276, "step": 160 }, { "epoch": 0.02, "grad_norm": 40.674430325440596, "learning_rate": 5.544354838709678e-07, "loss": 2.1955, "step": 165 }, { "epoch": 0.02, "grad_norm": 31.948809198091773, "learning_rate": 5.71236559139785e-07, "loss": 2.0281, "step": 170 }, { "epoch": 0.02, "grad_norm": 40.27205465031299, "learning_rate": 5.880376344086022e-07, "loss": 1.9945, "step": 175 }, { "epoch": 0.02, "grad_norm": 30.44956418233564, "learning_rate": 6.048387096774194e-07, "loss": 2.0563, "step": 180 }, { "epoch": 0.02, "grad_norm": 25.864985022985085, "learning_rate": 6.216397849462366e-07, "loss": 2.0562, "step": 185 }, { "epoch": 0.02, "grad_norm": 48.57307017843553, "learning_rate": 6.384408602150538e-07, "loss": 2.0814, "step": 190 }, { "epoch": 0.02, "grad_norm": 34.41894528967244, "learning_rate": 6.55241935483871e-07, "loss": 2.0217, "step": 195 }, { "epoch": 0.02, "grad_norm": 25.687554758258678, "learning_rate": 6.720430107526882e-07, "loss": 1.9102, "step": 200 }, { "epoch": 0.02, "grad_norm": 22.2588126225637, "learning_rate": 6.888440860215053e-07, "loss": 1.8607, "step": 205 }, { "epoch": 0.02, "grad_norm": 26.021175684312663, "learning_rate": 7.056451612903228e-07, "loss": 1.8601, "step": 210 }, { "epoch": 0.02, "grad_norm": 24.55833963633963, "learning_rate": 7.224462365591399e-07, "loss": 1.8857, "step": 215 }, { "epoch": 0.02, "grad_norm": 24.853385573955414, "learning_rate": 7.392473118279571e-07, "loss": 1.9314, "step": 220 }, { "epoch": 0.02, "grad_norm": 24.241953706581974, "learning_rate": 7.560483870967743e-07, "loss": 1.8315, "step": 225 }, { "epoch": 0.02, "grad_norm": 30.963286237241633, "learning_rate": 7.728494623655915e-07, "loss": 1.9002, "step": 230 }, { "epoch": 0.02, "grad_norm": 22.03379591891388, "learning_rate": 7.896505376344087e-07, "loss": 1.884, "step": 235 }, { "epoch": 0.02, "grad_norm": 24.578647797403292, "learning_rate": 8.064516129032258e-07, "loss": 1.8904, "step": 240 }, { "epoch": 0.02, "grad_norm": 21.216487036162114, "learning_rate": 8.23252688172043e-07, "loss": 1.8219, "step": 245 }, { "epoch": 0.03, "grad_norm": 20.861193581911664, "learning_rate": 8.400537634408602e-07, "loss": 1.6311, "step": 250 }, { "epoch": 0.03, "grad_norm": 24.299865558645777, "learning_rate": 8.568548387096774e-07, "loss": 1.8558, "step": 255 }, { "epoch": 0.03, "grad_norm": 24.746355187719427, "learning_rate": 8.736559139784947e-07, "loss": 1.7711, "step": 260 }, { "epoch": 0.03, "grad_norm": 21.940626478094718, "learning_rate": 8.904569892473119e-07, "loss": 1.6859, "step": 265 }, { "epoch": 0.03, "grad_norm": 19.6483613759701, "learning_rate": 9.072580645161292e-07, "loss": 1.7343, "step": 270 }, { "epoch": 0.03, "grad_norm": 19.89611142642267, "learning_rate": 9.240591397849464e-07, "loss": 1.6938, "step": 275 }, { "epoch": 0.03, "grad_norm": 25.001859350266486, "learning_rate": 9.408602150537635e-07, "loss": 1.738, "step": 280 }, { "epoch": 0.03, "grad_norm": 17.066884810710853, "learning_rate": 9.576612903225808e-07, "loss": 1.783, "step": 285 }, { "epoch": 0.03, "grad_norm": 24.174269785302588, "learning_rate": 9.744623655913979e-07, "loss": 1.8442, "step": 290 }, { "epoch": 0.03, "grad_norm": 18.12810335166148, "learning_rate": 9.91263440860215e-07, "loss": 1.7999, "step": 295 }, { "epoch": 0.03, "grad_norm": 20.191857447339, "learning_rate": 1.0080645161290323e-06, "loss": 1.7081, "step": 300 }, { "epoch": 0.03, "grad_norm": 19.815036999646132, "learning_rate": 1.0248655913978496e-06, "loss": 1.6522, "step": 305 }, { "epoch": 0.03, "grad_norm": 15.835894404829006, "learning_rate": 1.0416666666666667e-06, "loss": 1.6562, "step": 310 }, { "epoch": 0.03, "grad_norm": 17.63269929975477, "learning_rate": 1.058467741935484e-06, "loss": 1.6752, "step": 315 }, { "epoch": 0.03, "grad_norm": 24.828660268995748, "learning_rate": 1.0752688172043011e-06, "loss": 1.5975, "step": 320 }, { "epoch": 0.03, "grad_norm": 22.533688396632005, "learning_rate": 1.0920698924731184e-06, "loss": 1.6904, "step": 325 }, { "epoch": 0.03, "grad_norm": 19.855998449745552, "learning_rate": 1.1088709677419356e-06, "loss": 1.6313, "step": 330 }, { "epoch": 0.03, "grad_norm": 33.592956121234906, "learning_rate": 1.1256720430107527e-06, "loss": 1.7334, "step": 335 }, { "epoch": 0.03, "grad_norm": 17.355880614952422, "learning_rate": 1.14247311827957e-06, "loss": 1.6071, "step": 340 }, { "epoch": 0.03, "grad_norm": 17.581379694577958, "learning_rate": 1.159274193548387e-06, "loss": 1.6236, "step": 345 }, { "epoch": 0.04, "grad_norm": 14.977879814802337, "learning_rate": 1.1760752688172044e-06, "loss": 1.5829, "step": 350 }, { "epoch": 0.04, "grad_norm": 14.8664963134277, "learning_rate": 1.1928763440860217e-06, "loss": 1.695, "step": 355 }, { "epoch": 0.04, "grad_norm": 20.709276914746113, "learning_rate": 1.2096774193548388e-06, "loss": 1.6185, "step": 360 }, { "epoch": 0.04, "grad_norm": 17.65158447171455, "learning_rate": 1.2264784946236561e-06, "loss": 1.5907, "step": 365 }, { "epoch": 0.04, "grad_norm": 20.428125598199816, "learning_rate": 1.2432795698924732e-06, "loss": 1.5583, "step": 370 }, { "epoch": 0.04, "grad_norm": 17.84519660574699, "learning_rate": 1.2600806451612903e-06, "loss": 1.5405, "step": 375 }, { "epoch": 0.04, "grad_norm": 13.767453102040555, "learning_rate": 1.2768817204301076e-06, "loss": 1.5337, "step": 380 }, { "epoch": 0.04, "grad_norm": 14.818777203371633, "learning_rate": 1.2936827956989247e-06, "loss": 1.6202, "step": 385 }, { "epoch": 0.04, "grad_norm": 15.367040069929933, "learning_rate": 1.310483870967742e-06, "loss": 1.5684, "step": 390 }, { "epoch": 0.04, "grad_norm": 14.598127290678107, "learning_rate": 1.3272849462365592e-06, "loss": 1.5376, "step": 395 }, { "epoch": 0.04, "grad_norm": 17.719109719831028, "learning_rate": 1.3440860215053765e-06, "loss": 1.4921, "step": 400 }, { "epoch": 0.04, "grad_norm": 13.86472842753203, "learning_rate": 1.3608870967741936e-06, "loss": 1.5219, "step": 405 }, { "epoch": 0.04, "grad_norm": 16.638436674699083, "learning_rate": 1.3776881720430107e-06, "loss": 1.5141, "step": 410 }, { "epoch": 0.04, "grad_norm": 16.282678581255507, "learning_rate": 1.394489247311828e-06, "loss": 1.4588, "step": 415 }, { "epoch": 0.04, "grad_norm": 13.90193197057512, "learning_rate": 1.4112903225806455e-06, "loss": 1.5744, "step": 420 }, { "epoch": 0.04, "grad_norm": 14.65628343176628, "learning_rate": 1.4280913978494626e-06, "loss": 1.4718, "step": 425 }, { "epoch": 0.04, "grad_norm": 14.634571109448922, "learning_rate": 1.4448924731182797e-06, "loss": 1.5239, "step": 430 }, { "epoch": 0.04, "grad_norm": 13.749287700582828, "learning_rate": 1.461693548387097e-06, "loss": 1.4566, "step": 435 }, { "epoch": 0.04, "grad_norm": 14.547106603930775, "learning_rate": 1.4784946236559141e-06, "loss": 1.5024, "step": 440 }, { "epoch": 0.04, "grad_norm": 17.773314653888498, "learning_rate": 1.4952956989247315e-06, "loss": 1.6064, "step": 445 }, { "epoch": 0.05, "grad_norm": 15.139400485945263, "learning_rate": 1.5120967741935486e-06, "loss": 1.5204, "step": 450 }, { "epoch": 0.05, "grad_norm": 25.575817089248652, "learning_rate": 1.5288978494623657e-06, "loss": 1.4468, "step": 455 }, { "epoch": 0.05, "grad_norm": 16.012505506084903, "learning_rate": 1.545698924731183e-06, "loss": 1.4547, "step": 460 }, { "epoch": 0.05, "grad_norm": 17.410132583317328, "learning_rate": 1.5625e-06, "loss": 1.446, "step": 465 }, { "epoch": 0.05, "grad_norm": 14.868370038078499, "learning_rate": 1.5793010752688174e-06, "loss": 1.5959, "step": 470 }, { "epoch": 0.05, "grad_norm": 14.812418592555467, "learning_rate": 1.5961021505376345e-06, "loss": 1.4709, "step": 475 }, { "epoch": 0.05, "grad_norm": 15.905386132006193, "learning_rate": 1.6129032258064516e-06, "loss": 1.5097, "step": 480 }, { "epoch": 0.05, "grad_norm": 14.946674581803094, "learning_rate": 1.629704301075269e-06, "loss": 1.4466, "step": 485 }, { "epoch": 0.05, "grad_norm": 13.214144758770525, "learning_rate": 1.646505376344086e-06, "loss": 1.4626, "step": 490 }, { "epoch": 0.05, "grad_norm": 20.39110267796856, "learning_rate": 1.6633064516129033e-06, "loss": 1.5832, "step": 495 }, { "epoch": 0.05, "grad_norm": 14.768822449697835, "learning_rate": 1.6801075268817204e-06, "loss": 1.4171, "step": 500 }, { "epoch": 0.05, "grad_norm": 15.346879819966862, "learning_rate": 1.6969086021505377e-06, "loss": 1.4682, "step": 505 }, { "epoch": 0.05, "grad_norm": 11.776476285237981, "learning_rate": 1.7137096774193548e-06, "loss": 1.4112, "step": 510 }, { "epoch": 0.05, "grad_norm": 14.468416925660526, "learning_rate": 1.7305107526881724e-06, "loss": 1.3957, "step": 515 }, { "epoch": 0.05, "grad_norm": 16.81879140532043, "learning_rate": 1.7473118279569895e-06, "loss": 1.4286, "step": 520 }, { "epoch": 0.05, "grad_norm": 22.268339884303696, "learning_rate": 1.7641129032258068e-06, "loss": 1.4274, "step": 525 }, { "epoch": 0.05, "grad_norm": 16.974470923302114, "learning_rate": 1.7809139784946239e-06, "loss": 1.5043, "step": 530 }, { "epoch": 0.05, "grad_norm": 16.01147151337686, "learning_rate": 1.797715053763441e-06, "loss": 1.4032, "step": 535 }, { "epoch": 0.05, "grad_norm": 22.10572306421554, "learning_rate": 1.8145161290322583e-06, "loss": 1.4333, "step": 540 }, { "epoch": 0.05, "grad_norm": 12.640677257163176, "learning_rate": 1.8313172043010754e-06, "loss": 1.4681, "step": 545 }, { "epoch": 0.06, "grad_norm": 26.564187554827637, "learning_rate": 1.8481182795698927e-06, "loss": 1.4594, "step": 550 }, { "epoch": 0.06, "grad_norm": 15.08193320266031, "learning_rate": 1.8649193548387098e-06, "loss": 1.4362, "step": 555 }, { "epoch": 0.06, "grad_norm": 34.685212861993335, "learning_rate": 1.881720430107527e-06, "loss": 1.471, "step": 560 }, { "epoch": 0.06, "grad_norm": 43.100869084203225, "learning_rate": 1.8985215053763442e-06, "loss": 1.4642, "step": 565 }, { "epoch": 0.06, "grad_norm": 15.559894937589828, "learning_rate": 1.9153225806451616e-06, "loss": 1.3963, "step": 570 }, { "epoch": 0.06, "grad_norm": 38.2485410412665, "learning_rate": 1.9321236559139787e-06, "loss": 1.4386, "step": 575 }, { "epoch": 0.06, "grad_norm": 30.95124448720418, "learning_rate": 1.9489247311827958e-06, "loss": 1.4151, "step": 580 }, { "epoch": 0.06, "grad_norm": 51.70839528402327, "learning_rate": 1.965725806451613e-06, "loss": 1.4209, "step": 585 }, { "epoch": 0.06, "grad_norm": 23.196005284612998, "learning_rate": 1.98252688172043e-06, "loss": 1.4012, "step": 590 }, { "epoch": 0.06, "grad_norm": 27.1622839764402, "learning_rate": 1.9993279569892475e-06, "loss": 1.3261, "step": 595 }, { "epoch": 0.06, "grad_norm": 13.43452977396187, "learning_rate": 2.0161290322580646e-06, "loss": 1.3766, "step": 600 }, { "epoch": 0.06, "grad_norm": 12.033787281630863, "learning_rate": 2.032930107526882e-06, "loss": 1.4099, "step": 605 }, { "epoch": 0.06, "grad_norm": 13.955820585990788, "learning_rate": 2.0497311827956992e-06, "loss": 1.3794, "step": 610 }, { "epoch": 0.06, "grad_norm": 15.461768151074137, "learning_rate": 2.0665322580645163e-06, "loss": 1.4485, "step": 615 }, { "epoch": 0.06, "grad_norm": 19.64480417285225, "learning_rate": 2.0833333333333334e-06, "loss": 1.3828, "step": 620 }, { "epoch": 0.06, "grad_norm": 13.057917363440225, "learning_rate": 2.100134408602151e-06, "loss": 1.4158, "step": 625 }, { "epoch": 0.06, "grad_norm": 23.390563850096793, "learning_rate": 2.116935483870968e-06, "loss": 1.4126, "step": 630 }, { "epoch": 0.06, "grad_norm": 27.856058877726337, "learning_rate": 2.133736559139785e-06, "loss": 1.3153, "step": 635 }, { "epoch": 0.06, "grad_norm": 15.627432697331793, "learning_rate": 2.1505376344086023e-06, "loss": 1.3693, "step": 640 }, { "epoch": 0.07, "grad_norm": 17.018250112690833, "learning_rate": 2.1673387096774194e-06, "loss": 1.3771, "step": 645 }, { "epoch": 0.07, "grad_norm": 24.363765549799407, "learning_rate": 2.184139784946237e-06, "loss": 1.4001, "step": 650 }, { "epoch": 0.07, "grad_norm": 16.8886205337284, "learning_rate": 2.200940860215054e-06, "loss": 1.3851, "step": 655 }, { "epoch": 0.07, "grad_norm": 12.200499403924693, "learning_rate": 2.217741935483871e-06, "loss": 1.3914, "step": 660 }, { "epoch": 0.07, "grad_norm": 22.506933751518403, "learning_rate": 2.234543010752688e-06, "loss": 1.3736, "step": 665 }, { "epoch": 0.07, "grad_norm": 21.168953266789856, "learning_rate": 2.2513440860215053e-06, "loss": 1.349, "step": 670 }, { "epoch": 0.07, "grad_norm": 58.48769280628009, "learning_rate": 2.268145161290323e-06, "loss": 1.4582, "step": 675 }, { "epoch": 0.07, "grad_norm": 34.01636176936607, "learning_rate": 2.28494623655914e-06, "loss": 1.4758, "step": 680 }, { "epoch": 0.07, "grad_norm": 25.025355540731983, "learning_rate": 2.301747311827957e-06, "loss": 1.3476, "step": 685 }, { "epoch": 0.07, "grad_norm": 14.751839601928431, "learning_rate": 2.318548387096774e-06, "loss": 1.3785, "step": 690 }, { "epoch": 0.07, "grad_norm": 12.071805821551914, "learning_rate": 2.3353494623655917e-06, "loss": 1.3635, "step": 695 }, { "epoch": 0.07, "grad_norm": 20.602958992356196, "learning_rate": 2.3521505376344088e-06, "loss": 1.3845, "step": 700 }, { "epoch": 0.07, "grad_norm": 17.385890285083427, "learning_rate": 2.3689516129032263e-06, "loss": 1.2857, "step": 705 }, { "epoch": 0.07, "grad_norm": 12.215016267194583, "learning_rate": 2.3857526881720434e-06, "loss": 1.3015, "step": 710 }, { "epoch": 0.07, "grad_norm": 33.48226663382944, "learning_rate": 2.4025537634408605e-06, "loss": 1.4045, "step": 715 }, { "epoch": 0.07, "grad_norm": 23.942731192443755, "learning_rate": 2.4193548387096776e-06, "loss": 1.3649, "step": 720 }, { "epoch": 0.07, "grad_norm": 11.558052499204832, "learning_rate": 2.4361559139784947e-06, "loss": 1.3811, "step": 725 }, { "epoch": 0.07, "grad_norm": 18.69964334303326, "learning_rate": 2.4529569892473122e-06, "loss": 1.3416, "step": 730 }, { "epoch": 0.07, "grad_norm": 13.177544261859945, "learning_rate": 2.4697580645161293e-06, "loss": 1.3243, "step": 735 }, { "epoch": 0.07, "grad_norm": 12.803122016869308, "learning_rate": 2.4865591397849464e-06, "loss": 1.371, "step": 740 }, { "epoch": 0.08, "grad_norm": 23.51671956073884, "learning_rate": 2.503360215053764e-06, "loss": 1.3652, "step": 745 }, { "epoch": 0.08, "grad_norm": 15.72972602267457, "learning_rate": 2.5201612903225806e-06, "loss": 1.3826, "step": 750 }, { "epoch": 0.08, "grad_norm": 11.865572453001107, "learning_rate": 2.536962365591398e-06, "loss": 1.387, "step": 755 }, { "epoch": 0.08, "grad_norm": 21.938389282777255, "learning_rate": 2.5537634408602153e-06, "loss": 1.3031, "step": 760 }, { "epoch": 0.08, "grad_norm": 14.036068148740473, "learning_rate": 2.570564516129033e-06, "loss": 1.392, "step": 765 }, { "epoch": 0.08, "grad_norm": 15.200040828281905, "learning_rate": 2.5873655913978495e-06, "loss": 1.4077, "step": 770 }, { "epoch": 0.08, "grad_norm": 16.776949710823484, "learning_rate": 2.604166666666667e-06, "loss": 1.3833, "step": 775 }, { "epoch": 0.08, "grad_norm": 14.947572172197686, "learning_rate": 2.620967741935484e-06, "loss": 1.4015, "step": 780 }, { "epoch": 0.08, "grad_norm": 76.93992013037158, "learning_rate": 2.6377688172043016e-06, "loss": 1.4051, "step": 785 }, { "epoch": 0.08, "grad_norm": 12.942371317725181, "learning_rate": 2.6545698924731183e-06, "loss": 1.348, "step": 790 }, { "epoch": 0.08, "grad_norm": 13.086369840086844, "learning_rate": 2.671370967741936e-06, "loss": 1.3453, "step": 795 }, { "epoch": 0.08, "grad_norm": 35.89028186175731, "learning_rate": 2.688172043010753e-06, "loss": 1.3807, "step": 800 }, { "epoch": 0.08, "grad_norm": 29.63422412243611, "learning_rate": 2.70497311827957e-06, "loss": 1.3625, "step": 805 }, { "epoch": 0.08, "grad_norm": 55.32993232323523, "learning_rate": 2.721774193548387e-06, "loss": 1.4005, "step": 810 }, { "epoch": 0.08, "grad_norm": 15.984066997357118, "learning_rate": 2.7385752688172047e-06, "loss": 1.3381, "step": 815 }, { "epoch": 0.08, "grad_norm": 25.472146032597063, "learning_rate": 2.7553763440860214e-06, "loss": 1.4245, "step": 820 }, { "epoch": 0.08, "grad_norm": 15.457358515963906, "learning_rate": 2.772177419354839e-06, "loss": 1.3625, "step": 825 }, { "epoch": 0.08, "grad_norm": 16.759070196363787, "learning_rate": 2.788978494623656e-06, "loss": 1.448, "step": 830 }, { "epoch": 0.08, "grad_norm": 40.79061945114763, "learning_rate": 2.8057795698924735e-06, "loss": 1.3464, "step": 835 }, { "epoch": 0.08, "grad_norm": 25.161756708332057, "learning_rate": 2.822580645161291e-06, "loss": 1.3722, "step": 840 }, { "epoch": 0.09, "grad_norm": 25.188019881756727, "learning_rate": 2.8393817204301077e-06, "loss": 1.2966, "step": 845 }, { "epoch": 0.09, "grad_norm": 20.235137203610662, "learning_rate": 2.8561827956989252e-06, "loss": 1.3905, "step": 850 }, { "epoch": 0.09, "grad_norm": 36.617132909309895, "learning_rate": 2.872983870967742e-06, "loss": 1.3609, "step": 855 }, { "epoch": 0.09, "grad_norm": 29.49169037802908, "learning_rate": 2.8897849462365594e-06, "loss": 1.3619, "step": 860 }, { "epoch": 0.09, "grad_norm": 46.26563028512804, "learning_rate": 2.9065860215053765e-06, "loss": 1.3855, "step": 865 }, { "epoch": 0.09, "grad_norm": 22.0915541742763, "learning_rate": 2.923387096774194e-06, "loss": 1.4284, "step": 870 }, { "epoch": 0.09, "grad_norm": 35.015078004933514, "learning_rate": 2.9401881720430108e-06, "loss": 1.3365, "step": 875 }, { "epoch": 0.09, "grad_norm": 95.54066941374312, "learning_rate": 2.9569892473118283e-06, "loss": 1.3615, "step": 880 }, { "epoch": 0.09, "grad_norm": 69.29704172941422, "learning_rate": 2.9737903225806454e-06, "loss": 1.3591, "step": 885 }, { "epoch": 0.09, "grad_norm": 28.783068659669784, "learning_rate": 2.990591397849463e-06, "loss": 1.3271, "step": 890 }, { "epoch": 0.09, "grad_norm": 18.165110584313982, "learning_rate": 3.0073924731182796e-06, "loss": 1.2884, "step": 895 }, { "epoch": 0.09, "grad_norm": 41.205307526496874, "learning_rate": 3.024193548387097e-06, "loss": 1.3995, "step": 900 }, { "epoch": 0.09, "grad_norm": 40.001454194930204, "learning_rate": 3.0409946236559142e-06, "loss": 1.3513, "step": 905 }, { "epoch": 0.09, "grad_norm": 15.56121470148507, "learning_rate": 3.0577956989247313e-06, "loss": 1.2726, "step": 910 }, { "epoch": 0.09, "grad_norm": 13.766228304604502, "learning_rate": 3.0745967741935484e-06, "loss": 1.3304, "step": 915 }, { "epoch": 0.09, "grad_norm": 16.262090744904206, "learning_rate": 3.091397849462366e-06, "loss": 1.3634, "step": 920 }, { "epoch": 0.09, "grad_norm": 20.491180802304278, "learning_rate": 3.1081989247311826e-06, "loss": 1.3582, "step": 925 }, { "epoch": 0.09, "grad_norm": 44.89752185269729, "learning_rate": 3.125e-06, "loss": 1.3173, "step": 930 }, { "epoch": 0.09, "grad_norm": 35.91291644151354, "learning_rate": 3.1418010752688177e-06, "loss": 1.3348, "step": 935 }, { "epoch": 0.09, "grad_norm": 11.238116458693074, "learning_rate": 3.1586021505376348e-06, "loss": 1.3507, "step": 940 }, { "epoch": 0.1, "grad_norm": 66.27238623554678, "learning_rate": 3.1754032258064523e-06, "loss": 1.4242, "step": 945 }, { "epoch": 0.1, "grad_norm": 23.33319086370242, "learning_rate": 3.192204301075269e-06, "loss": 1.3167, "step": 950 }, { "epoch": 0.1, "grad_norm": 27.96170215869396, "learning_rate": 3.2090053763440865e-06, "loss": 1.3044, "step": 955 }, { "epoch": 0.1, "grad_norm": 24.96629022725591, "learning_rate": 3.225806451612903e-06, "loss": 1.3122, "step": 960 }, { "epoch": 0.1, "grad_norm": 31.97296302595778, "learning_rate": 3.2426075268817207e-06, "loss": 1.3263, "step": 965 }, { "epoch": 0.1, "grad_norm": 40.56869211119147, "learning_rate": 3.259408602150538e-06, "loss": 1.3216, "step": 970 }, { "epoch": 0.1, "grad_norm": 48.86733327981514, "learning_rate": 3.2762096774193553e-06, "loss": 1.3852, "step": 975 }, { "epoch": 0.1, "grad_norm": 48.8589827825722, "learning_rate": 3.293010752688172e-06, "loss": 1.3942, "step": 980 }, { "epoch": 0.1, "grad_norm": 105.92172275966337, "learning_rate": 3.3098118279569895e-06, "loss": 1.331, "step": 985 }, { "epoch": 0.1, "grad_norm": 79.25673823625935, "learning_rate": 3.3266129032258067e-06, "loss": 1.4146, "step": 990 }, { "epoch": 0.1, "grad_norm": 22.753954756085893, "learning_rate": 3.343413978494624e-06, "loss": 1.3425, "step": 995 }, { "epoch": 0.1, "grad_norm": 17.938951839801916, "learning_rate": 3.360215053763441e-06, "loss": 1.2828, "step": 1000 }, { "epoch": 0.1, "grad_norm": 40.93281890650386, "learning_rate": 3.3770161290322584e-06, "loss": 1.4161, "step": 1005 }, { "epoch": 0.1, "grad_norm": 23.016726816711316, "learning_rate": 3.3938172043010755e-06, "loss": 1.342, "step": 1010 }, { "epoch": 0.1, "grad_norm": 19.694780338872466, "learning_rate": 3.4106182795698926e-06, "loss": 1.2934, "step": 1015 }, { "epoch": 0.1, "grad_norm": 12.232488558864302, "learning_rate": 3.4274193548387097e-06, "loss": 1.4007, "step": 1020 }, { "epoch": 0.1, "grad_norm": 14.56739533203392, "learning_rate": 3.4442204301075272e-06, "loss": 1.2597, "step": 1025 }, { "epoch": 0.1, "grad_norm": 17.55384322451927, "learning_rate": 3.4610215053763447e-06, "loss": 1.3139, "step": 1030 }, { "epoch": 0.1, "grad_norm": 14.51771613755045, "learning_rate": 3.4778225806451614e-06, "loss": 1.2369, "step": 1035 }, { "epoch": 0.1, "grad_norm": 12.547628388697412, "learning_rate": 3.494623655913979e-06, "loss": 1.3077, "step": 1040 }, { "epoch": 0.11, "grad_norm": 21.253857468288704, "learning_rate": 3.511424731182796e-06, "loss": 1.2945, "step": 1045 }, { "epoch": 0.11, "grad_norm": 32.2978178380615, "learning_rate": 3.5282258064516136e-06, "loss": 1.3445, "step": 1050 }, { "epoch": 0.11, "grad_norm": 11.768307513613498, "learning_rate": 3.5450268817204303e-06, "loss": 1.3081, "step": 1055 }, { "epoch": 0.11, "grad_norm": 15.387095517003354, "learning_rate": 3.5618279569892478e-06, "loss": 1.2361, "step": 1060 }, { "epoch": 0.11, "grad_norm": 14.17899433065435, "learning_rate": 3.578629032258065e-06, "loss": 1.307, "step": 1065 }, { "epoch": 0.11, "grad_norm": 19.757988892716817, "learning_rate": 3.595430107526882e-06, "loss": 1.3318, "step": 1070 }, { "epoch": 0.11, "grad_norm": 14.618417336002926, "learning_rate": 3.612231182795699e-06, "loss": 1.311, "step": 1075 }, { "epoch": 0.11, "grad_norm": 11.71596301767083, "learning_rate": 3.6290322580645166e-06, "loss": 1.226, "step": 1080 }, { "epoch": 0.11, "grad_norm": 11.068963240572275, "learning_rate": 3.6458333333333333e-06, "loss": 1.2774, "step": 1085 }, { "epoch": 0.11, "grad_norm": 12.802355495211414, "learning_rate": 3.662634408602151e-06, "loss": 1.3052, "step": 1090 }, { "epoch": 0.11, "grad_norm": 15.684098983890863, "learning_rate": 3.679435483870968e-06, "loss": 1.2614, "step": 1095 }, { "epoch": 0.11, "grad_norm": 11.099864032228766, "learning_rate": 3.6962365591397855e-06, "loss": 1.2375, "step": 1100 }, { "epoch": 0.11, "grad_norm": 44.01005527420453, "learning_rate": 3.713037634408602e-06, "loss": 1.269, "step": 1105 }, { "epoch": 0.11, "grad_norm": 13.69498598023566, "learning_rate": 3.7298387096774197e-06, "loss": 1.3259, "step": 1110 }, { "epoch": 0.11, "grad_norm": 22.87277001637021, "learning_rate": 3.7466397849462368e-06, "loss": 1.2521, "step": 1115 }, { "epoch": 0.11, "grad_norm": 12.733526400008255, "learning_rate": 3.763440860215054e-06, "loss": 1.3009, "step": 1120 }, { "epoch": 0.11, "grad_norm": 17.430350713728973, "learning_rate": 3.7802419354838714e-06, "loss": 1.2388, "step": 1125 }, { "epoch": 0.11, "grad_norm": 15.85124486151365, "learning_rate": 3.7970430107526885e-06, "loss": 1.3255, "step": 1130 }, { "epoch": 0.11, "grad_norm": 11.2973639532908, "learning_rate": 3.813844086021506e-06, "loss": 1.2868, "step": 1135 }, { "epoch": 0.11, "grad_norm": 16.044475090556347, "learning_rate": 3.830645161290323e-06, "loss": 1.2921, "step": 1140 }, { "epoch": 0.12, "grad_norm": 11.419443385517743, "learning_rate": 3.84744623655914e-06, "loss": 1.3027, "step": 1145 }, { "epoch": 0.12, "grad_norm": 9.408727766295128, "learning_rate": 3.864247311827957e-06, "loss": 1.2636, "step": 1150 }, { "epoch": 0.12, "grad_norm": 28.714229113569143, "learning_rate": 3.8810483870967744e-06, "loss": 1.3094, "step": 1155 }, { "epoch": 0.12, "grad_norm": 19.468610643519202, "learning_rate": 3.8978494623655915e-06, "loss": 1.295, "step": 1160 }, { "epoch": 0.12, "grad_norm": 22.147649353738434, "learning_rate": 3.914650537634409e-06, "loss": 1.268, "step": 1165 }, { "epoch": 0.12, "grad_norm": 21.815639864910924, "learning_rate": 3.931451612903226e-06, "loss": 1.3022, "step": 1170 }, { "epoch": 0.12, "grad_norm": 10.131219829800257, "learning_rate": 3.948252688172044e-06, "loss": 1.3033, "step": 1175 }, { "epoch": 0.12, "grad_norm": 40.422268139360455, "learning_rate": 3.96505376344086e-06, "loss": 1.2916, "step": 1180 }, { "epoch": 0.12, "grad_norm": 13.18996572855082, "learning_rate": 3.981854838709678e-06, "loss": 1.2089, "step": 1185 }, { "epoch": 0.12, "grad_norm": 34.97991531805703, "learning_rate": 3.998655913978495e-06, "loss": 1.25, "step": 1190 }, { "epoch": 0.12, "grad_norm": 24.84307661290493, "learning_rate": 4.015456989247312e-06, "loss": 1.3503, "step": 1195 }, { "epoch": 0.12, "grad_norm": 23.814660056097257, "learning_rate": 4.032258064516129e-06, "loss": 1.2976, "step": 1200 }, { "epoch": 0.12, "grad_norm": 48.66684356663973, "learning_rate": 4.049059139784946e-06, "loss": 1.278, "step": 1205 }, { "epoch": 0.12, "grad_norm": 16.743111074822284, "learning_rate": 4.065860215053764e-06, "loss": 1.2592, "step": 1210 }, { "epoch": 0.12, "grad_norm": 24.74596115728974, "learning_rate": 4.082661290322581e-06, "loss": 1.2825, "step": 1215 }, { "epoch": 0.12, "grad_norm": 34.36014978790517, "learning_rate": 4.0994623655913985e-06, "loss": 1.2459, "step": 1220 }, { "epoch": 0.12, "grad_norm": 20.999547137931017, "learning_rate": 4.1162634408602156e-06, "loss": 1.2121, "step": 1225 }, { "epoch": 0.12, "grad_norm": 12.868497455108407, "learning_rate": 4.133064516129033e-06, "loss": 1.2768, "step": 1230 }, { "epoch": 0.12, "grad_norm": 30.434809468480765, "learning_rate": 4.14986559139785e-06, "loss": 1.2675, "step": 1235 }, { "epoch": 0.13, "grad_norm": 11.616266135748583, "learning_rate": 4.166666666666667e-06, "loss": 1.2608, "step": 1240 }, { "epoch": 0.13, "grad_norm": 12.532446205798287, "learning_rate": 4.183467741935484e-06, "loss": 1.3375, "step": 1245 }, { "epoch": 0.13, "grad_norm": 21.560371038383625, "learning_rate": 4.200268817204302e-06, "loss": 1.2329, "step": 1250 }, { "epoch": 0.13, "grad_norm": 13.724212892297027, "learning_rate": 4.217069892473118e-06, "loss": 1.2697, "step": 1255 }, { "epoch": 0.13, "grad_norm": 22.2529932746503, "learning_rate": 4.233870967741936e-06, "loss": 1.2645, "step": 1260 }, { "epoch": 0.13, "grad_norm": 13.558716472596469, "learning_rate": 4.250672043010753e-06, "loss": 1.2197, "step": 1265 }, { "epoch": 0.13, "grad_norm": 29.247532948847013, "learning_rate": 4.26747311827957e-06, "loss": 1.2919, "step": 1270 }, { "epoch": 0.13, "grad_norm": 11.804159591275033, "learning_rate": 4.2842741935483874e-06, "loss": 1.3064, "step": 1275 }, { "epoch": 0.13, "grad_norm": 21.409715922052758, "learning_rate": 4.3010752688172045e-06, "loss": 1.2718, "step": 1280 }, { "epoch": 0.13, "grad_norm": 14.77842104466719, "learning_rate": 4.317876344086022e-06, "loss": 1.3095, "step": 1285 }, { "epoch": 0.13, "grad_norm": 15.685031119948517, "learning_rate": 4.334677419354839e-06, "loss": 1.2832, "step": 1290 }, { "epoch": 0.13, "grad_norm": 21.385894858494375, "learning_rate": 4.351478494623656e-06, "loss": 1.3237, "step": 1295 }, { "epoch": 0.13, "grad_norm": 13.723338453835758, "learning_rate": 4.368279569892474e-06, "loss": 1.2666, "step": 1300 }, { "epoch": 0.13, "grad_norm": 12.439591311170961, "learning_rate": 4.385080645161291e-06, "loss": 1.2909, "step": 1305 }, { "epoch": 0.13, "grad_norm": 10.514215609608113, "learning_rate": 4.401881720430108e-06, "loss": 1.2327, "step": 1310 }, { "epoch": 0.13, "grad_norm": 17.845827804092984, "learning_rate": 4.418682795698925e-06, "loss": 1.2617, "step": 1315 }, { "epoch": 0.13, "grad_norm": 9.823360940605603, "learning_rate": 4.435483870967742e-06, "loss": 1.2915, "step": 1320 }, { "epoch": 0.13, "grad_norm": 11.472975718436162, "learning_rate": 4.452284946236559e-06, "loss": 1.302, "step": 1325 }, { "epoch": 0.13, "grad_norm": 11.284002557459615, "learning_rate": 4.469086021505376e-06, "loss": 1.2788, "step": 1330 }, { "epoch": 0.13, "grad_norm": 38.27911520285089, "learning_rate": 4.485887096774194e-06, "loss": 1.2552, "step": 1335 }, { "epoch": 0.14, "grad_norm": 9.432535268516432, "learning_rate": 4.502688172043011e-06, "loss": 1.2759, "step": 1340 }, { "epoch": 0.14, "grad_norm": 10.98747211347199, "learning_rate": 4.5194892473118286e-06, "loss": 1.2844, "step": 1345 }, { "epoch": 0.14, "grad_norm": 12.511423262582975, "learning_rate": 4.536290322580646e-06, "loss": 1.2283, "step": 1350 }, { "epoch": 0.14, "grad_norm": 20.514439149634352, "learning_rate": 4.553091397849463e-06, "loss": 1.2353, "step": 1355 }, { "epoch": 0.14, "grad_norm": 16.015991119714123, "learning_rate": 4.56989247311828e-06, "loss": 1.2959, "step": 1360 }, { "epoch": 0.14, "grad_norm": 10.106660893981587, "learning_rate": 4.586693548387097e-06, "loss": 1.2239, "step": 1365 }, { "epoch": 0.14, "grad_norm": 29.853730062938347, "learning_rate": 4.603494623655914e-06, "loss": 1.3194, "step": 1370 }, { "epoch": 0.14, "grad_norm": 14.685737996052852, "learning_rate": 4.620295698924732e-06, "loss": 1.2148, "step": 1375 }, { "epoch": 0.14, "grad_norm": 10.877331149990967, "learning_rate": 4.637096774193548e-06, "loss": 1.2045, "step": 1380 }, { "epoch": 0.14, "grad_norm": 11.008218814296683, "learning_rate": 4.653897849462366e-06, "loss": 1.2477, "step": 1385 }, { "epoch": 0.14, "grad_norm": 22.743570797984013, "learning_rate": 4.670698924731183e-06, "loss": 1.2775, "step": 1390 }, { "epoch": 0.14, "grad_norm": 29.524974297593605, "learning_rate": 4.6875000000000004e-06, "loss": 1.299, "step": 1395 }, { "epoch": 0.14, "grad_norm": 19.37930037376844, "learning_rate": 4.7043010752688175e-06, "loss": 1.3329, "step": 1400 }, { "epoch": 0.14, "grad_norm": 12.598571626831996, "learning_rate": 4.721102150537635e-06, "loss": 1.2942, "step": 1405 }, { "epoch": 0.14, "grad_norm": 12.752457322963501, "learning_rate": 4.737903225806453e-06, "loss": 1.2408, "step": 1410 }, { "epoch": 0.14, "grad_norm": 12.210288312721424, "learning_rate": 4.754704301075269e-06, "loss": 1.2762, "step": 1415 }, { "epoch": 0.14, "grad_norm": 15.028431784981668, "learning_rate": 4.771505376344087e-06, "loss": 1.2817, "step": 1420 }, { "epoch": 0.14, "grad_norm": 14.993762858927266, "learning_rate": 4.788306451612904e-06, "loss": 1.2521, "step": 1425 }, { "epoch": 0.14, "grad_norm": 26.11557096134634, "learning_rate": 4.805107526881721e-06, "loss": 1.2275, "step": 1430 }, { "epoch": 0.14, "grad_norm": 23.808664429913215, "learning_rate": 4.821908602150538e-06, "loss": 1.2729, "step": 1435 }, { "epoch": 0.15, "grad_norm": 13.214083146280673, "learning_rate": 4.838709677419355e-06, "loss": 1.2103, "step": 1440 }, { "epoch": 0.15, "grad_norm": 14.950068491419977, "learning_rate": 4.855510752688172e-06, "loss": 1.2407, "step": 1445 }, { "epoch": 0.15, "grad_norm": 49.77700236183305, "learning_rate": 4.872311827956989e-06, "loss": 1.2599, "step": 1450 }, { "epoch": 0.15, "grad_norm": 12.2907307607193, "learning_rate": 4.8891129032258065e-06, "loss": 1.2365, "step": 1455 }, { "epoch": 0.15, "grad_norm": 35.24775519440709, "learning_rate": 4.9059139784946245e-06, "loss": 1.2727, "step": 1460 }, { "epoch": 0.15, "grad_norm": 19.209125211904805, "learning_rate": 4.922715053763441e-06, "loss": 1.2516, "step": 1465 }, { "epoch": 0.15, "grad_norm": 13.378215907461524, "learning_rate": 4.939516129032259e-06, "loss": 1.2488, "step": 1470 }, { "epoch": 0.15, "grad_norm": 17.6633476819897, "learning_rate": 4.956317204301076e-06, "loss": 1.2668, "step": 1475 }, { "epoch": 0.15, "grad_norm": 10.740924281703688, "learning_rate": 4.973118279569893e-06, "loss": 1.249, "step": 1480 }, { "epoch": 0.15, "grad_norm": 14.726705345231204, "learning_rate": 4.98991935483871e-06, "loss": 1.3307, "step": 1485 }, { "epoch": 0.15, "grad_norm": 14.59599163185456, "learning_rate": 5.006720430107528e-06, "loss": 1.2854, "step": 1490 }, { "epoch": 0.15, "grad_norm": 41.090146378069534, "learning_rate": 5.023521505376344e-06, "loss": 1.291, "step": 1495 }, { "epoch": 0.15, "grad_norm": 11.10786448778689, "learning_rate": 5.040322580645161e-06, "loss": 1.2843, "step": 1500 }, { "epoch": 0.15, "grad_norm": 16.353102002707647, "learning_rate": 5.057123655913979e-06, "loss": 1.2556, "step": 1505 }, { "epoch": 0.15, "grad_norm": 15.89417888688282, "learning_rate": 5.073924731182796e-06, "loss": 1.2582, "step": 1510 }, { "epoch": 0.15, "grad_norm": 38.41649367763352, "learning_rate": 5.090725806451613e-06, "loss": 1.3358, "step": 1515 }, { "epoch": 0.15, "grad_norm": 13.107954803233616, "learning_rate": 5.1075268817204305e-06, "loss": 1.28, "step": 1520 }, { "epoch": 0.15, "grad_norm": 11.825179318943986, "learning_rate": 5.124327956989248e-06, "loss": 1.2516, "step": 1525 }, { "epoch": 0.15, "grad_norm": 26.492711678315192, "learning_rate": 5.141129032258066e-06, "loss": 1.2652, "step": 1530 }, { "epoch": 0.15, "grad_norm": 17.328853885466696, "learning_rate": 5.157930107526882e-06, "loss": 1.2965, "step": 1535 }, { "epoch": 0.16, "grad_norm": 17.50828288344384, "learning_rate": 5.174731182795699e-06, "loss": 1.2399, "step": 1540 }, { "epoch": 0.16, "grad_norm": 11.950092920119129, "learning_rate": 5.191532258064517e-06, "loss": 1.3137, "step": 1545 }, { "epoch": 0.16, "grad_norm": 20.052669655145927, "learning_rate": 5.208333333333334e-06, "loss": 1.3046, "step": 1550 }, { "epoch": 0.16, "grad_norm": 53.370358081765325, "learning_rate": 5.22513440860215e-06, "loss": 1.2716, "step": 1555 }, { "epoch": 0.16, "grad_norm": 23.64171905195358, "learning_rate": 5.241935483870968e-06, "loss": 1.2413, "step": 1560 }, { "epoch": 0.16, "grad_norm": 18.501262717248586, "learning_rate": 5.258736559139785e-06, "loss": 1.3243, "step": 1565 }, { "epoch": 0.16, "grad_norm": 46.31409454126866, "learning_rate": 5.275537634408603e-06, "loss": 1.2501, "step": 1570 }, { "epoch": 0.16, "grad_norm": 24.436697787509363, "learning_rate": 5.2923387096774195e-06, "loss": 1.2446, "step": 1575 }, { "epoch": 0.16, "grad_norm": 15.211955398526683, "learning_rate": 5.309139784946237e-06, "loss": 1.2485, "step": 1580 }, { "epoch": 0.16, "grad_norm": 51.17545641781786, "learning_rate": 5.3259408602150546e-06, "loss": 1.2664, "step": 1585 }, { "epoch": 0.16, "grad_norm": 22.73729158273849, "learning_rate": 5.342741935483872e-06, "loss": 1.2712, "step": 1590 }, { "epoch": 0.16, "grad_norm": 14.40335244214581, "learning_rate": 5.359543010752689e-06, "loss": 1.2636, "step": 1595 }, { "epoch": 0.16, "grad_norm": 10.214516983760586, "learning_rate": 5.376344086021506e-06, "loss": 1.2158, "step": 1600 }, { "epoch": 0.16, "grad_norm": 10.34966113388582, "learning_rate": 5.393145161290323e-06, "loss": 1.2513, "step": 1605 }, { "epoch": 0.16, "grad_norm": 27.012632212205652, "learning_rate": 5.40994623655914e-06, "loss": 1.2153, "step": 1610 }, { "epoch": 0.16, "grad_norm": 66.26220201569467, "learning_rate": 5.426747311827958e-06, "loss": 1.2941, "step": 1615 }, { "epoch": 0.16, "grad_norm": 14.447517864939536, "learning_rate": 5.443548387096774e-06, "loss": 1.2104, "step": 1620 }, { "epoch": 0.16, "grad_norm": 53.03873905700024, "learning_rate": 5.460349462365591e-06, "loss": 1.2912, "step": 1625 }, { "epoch": 0.16, "grad_norm": 82.47953703104959, "learning_rate": 5.477150537634409e-06, "loss": 1.2963, "step": 1630 }, { "epoch": 0.16, "grad_norm": 59.35832189434322, "learning_rate": 5.4939516129032264e-06, "loss": 1.299, "step": 1635 }, { "epoch": 0.17, "grad_norm": 21.23456282221694, "learning_rate": 5.510752688172043e-06, "loss": 1.2955, "step": 1640 }, { "epoch": 0.17, "grad_norm": 17.208909499987218, "learning_rate": 5.527553763440861e-06, "loss": 1.2827, "step": 1645 }, { "epoch": 0.17, "grad_norm": 17.814579537509662, "learning_rate": 5.544354838709678e-06, "loss": 1.2782, "step": 1650 }, { "epoch": 0.17, "grad_norm": 44.84766228129635, "learning_rate": 5.561155913978496e-06, "loss": 1.3388, "step": 1655 }, { "epoch": 0.17, "grad_norm": 80.02157305544218, "learning_rate": 5.577956989247312e-06, "loss": 1.3387, "step": 1660 }, { "epoch": 0.17, "grad_norm": 47.30693371027271, "learning_rate": 5.594758064516129e-06, "loss": 1.2853, "step": 1665 }, { "epoch": 0.17, "grad_norm": 31.111971822450105, "learning_rate": 5.611559139784947e-06, "loss": 1.3207, "step": 1670 }, { "epoch": 0.17, "grad_norm": 29.323472551959778, "learning_rate": 5.628360215053764e-06, "loss": 1.2522, "step": 1675 }, { "epoch": 0.17, "grad_norm": 40.92036524504599, "learning_rate": 5.645161290322582e-06, "loss": 1.3008, "step": 1680 }, { "epoch": 0.17, "grad_norm": 33.7231465176679, "learning_rate": 5.661962365591398e-06, "loss": 1.2796, "step": 1685 }, { "epoch": 0.17, "grad_norm": 11.506027765246968, "learning_rate": 5.678763440860215e-06, "loss": 1.2689, "step": 1690 }, { "epoch": 0.17, "grad_norm": 23.309943493618878, "learning_rate": 5.6955645161290325e-06, "loss": 1.2803, "step": 1695 }, { "epoch": 0.17, "grad_norm": 14.260279103971566, "learning_rate": 5.7123655913978505e-06, "loss": 1.2479, "step": 1700 }, { "epoch": 0.17, "grad_norm": 17.417965761132848, "learning_rate": 5.729166666666667e-06, "loss": 1.2659, "step": 1705 }, { "epoch": 0.17, "grad_norm": 19.069359196115766, "learning_rate": 5.745967741935484e-06, "loss": 1.258, "step": 1710 }, { "epoch": 0.17, "grad_norm": 31.862010501733916, "learning_rate": 5.762768817204302e-06, "loss": 1.2745, "step": 1715 }, { "epoch": 0.17, "grad_norm": 14.171126284799604, "learning_rate": 5.779569892473119e-06, "loss": 1.2201, "step": 1720 }, { "epoch": 0.17, "grad_norm": 10.839283951323784, "learning_rate": 5.796370967741935e-06, "loss": 1.2394, "step": 1725 }, { "epoch": 0.17, "grad_norm": 28.093728374613843, "learning_rate": 5.813172043010753e-06, "loss": 1.2638, "step": 1730 }, { "epoch": 0.17, "grad_norm": 44.58712437099615, "learning_rate": 5.82997311827957e-06, "loss": 1.263, "step": 1735 }, { "epoch": 0.18, "grad_norm": 11.611237724562416, "learning_rate": 5.846774193548388e-06, "loss": 1.2396, "step": 1740 }, { "epoch": 0.18, "grad_norm": 10.721134129684025, "learning_rate": 5.863575268817204e-06, "loss": 1.2612, "step": 1745 }, { "epoch": 0.18, "grad_norm": 28.526923438499946, "learning_rate": 5.8803763440860215e-06, "loss": 1.2348, "step": 1750 }, { "epoch": 0.18, "grad_norm": 28.344913656755676, "learning_rate": 5.8971774193548394e-06, "loss": 1.2826, "step": 1755 }, { "epoch": 0.18, "grad_norm": 11.471422006525902, "learning_rate": 5.9139784946236566e-06, "loss": 1.2943, "step": 1760 }, { "epoch": 0.18, "grad_norm": 10.434390641845038, "learning_rate": 5.930779569892473e-06, "loss": 1.2229, "step": 1765 }, { "epoch": 0.18, "grad_norm": 16.75539696310857, "learning_rate": 5.947580645161291e-06, "loss": 1.2526, "step": 1770 }, { "epoch": 0.18, "grad_norm": 12.3925293626586, "learning_rate": 5.964381720430108e-06, "loss": 1.2394, "step": 1775 }, { "epoch": 0.18, "grad_norm": 12.786185248449616, "learning_rate": 5.981182795698926e-06, "loss": 1.2927, "step": 1780 }, { "epoch": 0.18, "grad_norm": 28.326800741035534, "learning_rate": 5.997983870967743e-06, "loss": 1.3106, "step": 1785 }, { "epoch": 0.18, "grad_norm": 13.276235173795225, "learning_rate": 6.014784946236559e-06, "loss": 1.2011, "step": 1790 }, { "epoch": 0.18, "grad_norm": 19.043491768486806, "learning_rate": 6.031586021505377e-06, "loss": 1.31, "step": 1795 }, { "epoch": 0.18, "grad_norm": 12.982912118702751, "learning_rate": 6.048387096774194e-06, "loss": 1.1834, "step": 1800 }, { "epoch": 0.18, "grad_norm": 23.46029378615589, "learning_rate": 6.065188172043011e-06, "loss": 1.2058, "step": 1805 }, { "epoch": 0.18, "grad_norm": 9.56457697533073, "learning_rate": 6.0819892473118284e-06, "loss": 1.2209, "step": 1810 }, { "epoch": 0.18, "grad_norm": 20.007818323470907, "learning_rate": 6.0987903225806455e-06, "loss": 1.2084, "step": 1815 }, { "epoch": 0.18, "grad_norm": 30.888852345480156, "learning_rate": 6.115591397849463e-06, "loss": 1.2878, "step": 1820 }, { "epoch": 0.18, "grad_norm": 11.206797920592592, "learning_rate": 6.132392473118281e-06, "loss": 1.256, "step": 1825 }, { "epoch": 0.18, "grad_norm": 41.77653719952749, "learning_rate": 6.149193548387097e-06, "loss": 1.2143, "step": 1830 }, { "epoch": 0.19, "grad_norm": 22.773443987679865, "learning_rate": 6.165994623655914e-06, "loss": 1.2212, "step": 1835 }, { "epoch": 0.19, "grad_norm": 15.003527818874442, "learning_rate": 6.182795698924732e-06, "loss": 1.1451, "step": 1840 }, { "epoch": 0.19, "grad_norm": 10.832295518728609, "learning_rate": 6.199596774193549e-06, "loss": 1.2371, "step": 1845 }, { "epoch": 0.19, "grad_norm": 14.106996556562182, "learning_rate": 6.216397849462365e-06, "loss": 1.2564, "step": 1850 }, { "epoch": 0.19, "grad_norm": 11.6321798945971, "learning_rate": 6.233198924731183e-06, "loss": 1.235, "step": 1855 }, { "epoch": 0.19, "grad_norm": 12.246030421588882, "learning_rate": 6.25e-06, "loss": 1.3035, "step": 1860 }, { "epoch": 0.19, "grad_norm": 14.37717478205693, "learning_rate": 6.266801075268818e-06, "loss": 1.2778, "step": 1865 }, { "epoch": 0.19, "grad_norm": 15.24027128069132, "learning_rate": 6.283602150537635e-06, "loss": 1.272, "step": 1870 }, { "epoch": 0.19, "grad_norm": 34.70380358345755, "learning_rate": 6.300403225806452e-06, "loss": 1.2512, "step": 1875 }, { "epoch": 0.19, "grad_norm": 46.86598098882556, "learning_rate": 6.3172043010752696e-06, "loss": 1.2413, "step": 1880 }, { "epoch": 0.19, "grad_norm": 24.5265225085656, "learning_rate": 6.334005376344087e-06, "loss": 1.2549, "step": 1885 }, { "epoch": 0.19, "grad_norm": 12.176692968830055, "learning_rate": 6.350806451612905e-06, "loss": 1.2243, "step": 1890 }, { "epoch": 0.19, "grad_norm": 12.623066798462252, "learning_rate": 6.367607526881721e-06, "loss": 1.2203, "step": 1895 }, { "epoch": 0.19, "grad_norm": 20.506001827294856, "learning_rate": 6.384408602150538e-06, "loss": 1.2448, "step": 1900 }, { "epoch": 0.19, "grad_norm": 24.346786364555726, "learning_rate": 6.401209677419356e-06, "loss": 1.1681, "step": 1905 }, { "epoch": 0.19, "grad_norm": 22.37322556152353, "learning_rate": 6.418010752688173e-06, "loss": 1.2679, "step": 1910 }, { "epoch": 0.19, "grad_norm": 18.538859354210924, "learning_rate": 6.434811827956989e-06, "loss": 1.225, "step": 1915 }, { "epoch": 0.19, "grad_norm": 34.54548609395492, "learning_rate": 6.451612903225806e-06, "loss": 1.2446, "step": 1920 }, { "epoch": 0.19, "grad_norm": 10.82382489549032, "learning_rate": 6.468413978494624e-06, "loss": 1.2507, "step": 1925 }, { "epoch": 0.19, "grad_norm": 19.144353601820267, "learning_rate": 6.4852150537634414e-06, "loss": 1.2938, "step": 1930 }, { "epoch": 0.2, "grad_norm": 29.58265058763573, "learning_rate": 6.502016129032258e-06, "loss": 1.2952, "step": 1935 }, { "epoch": 0.2, "grad_norm": 9.721343926876305, "learning_rate": 6.518817204301076e-06, "loss": 1.1828, "step": 1940 }, { "epoch": 0.2, "grad_norm": 10.514261400823013, "learning_rate": 6.535618279569893e-06, "loss": 1.2053, "step": 1945 }, { "epoch": 0.2, "grad_norm": 15.533730854979197, "learning_rate": 6.552419354838711e-06, "loss": 1.2568, "step": 1950 }, { "epoch": 0.2, "grad_norm": 16.386926044621365, "learning_rate": 6.569220430107528e-06, "loss": 1.2712, "step": 1955 }, { "epoch": 0.2, "grad_norm": 9.035649299385856, "learning_rate": 6.586021505376344e-06, "loss": 1.1855, "step": 1960 }, { "epoch": 0.2, "grad_norm": 19.69163264701267, "learning_rate": 6.602822580645162e-06, "loss": 1.2761, "step": 1965 }, { "epoch": 0.2, "grad_norm": 37.0944311318195, "learning_rate": 6.619623655913979e-06, "loss": 1.2577, "step": 1970 }, { "epoch": 0.2, "grad_norm": 9.865249614590498, "learning_rate": 6.636424731182797e-06, "loss": 1.26, "step": 1975 }, { "epoch": 0.2, "grad_norm": 11.904200524254145, "learning_rate": 6.653225806451613e-06, "loss": 1.2764, "step": 1980 }, { "epoch": 0.2, "grad_norm": 12.933867176670313, "learning_rate": 6.67002688172043e-06, "loss": 1.2721, "step": 1985 }, { "epoch": 0.2, "grad_norm": 29.639862278436, "learning_rate": 6.686827956989248e-06, "loss": 1.2208, "step": 1990 }, { "epoch": 0.2, "grad_norm": 25.54302227116381, "learning_rate": 6.7036290322580655e-06, "loss": 1.2142, "step": 1995 }, { "epoch": 0.2, "grad_norm": 13.655438913627023, "learning_rate": 6.720430107526882e-06, "loss": 1.2235, "step": 2000 }, { "epoch": 0.2, "grad_norm": 18.24343043881259, "learning_rate": 6.7372311827957e-06, "loss": 1.2186, "step": 2005 }, { "epoch": 0.2, "grad_norm": 12.287545958285396, "learning_rate": 6.754032258064517e-06, "loss": 1.2351, "step": 2010 }, { "epoch": 0.2, "grad_norm": 27.61706184500475, "learning_rate": 6.770833333333334e-06, "loss": 1.219, "step": 2015 }, { "epoch": 0.2, "grad_norm": 12.006823598473522, "learning_rate": 6.787634408602151e-06, "loss": 1.2106, "step": 2020 }, { "epoch": 0.2, "grad_norm": 24.899259234998013, "learning_rate": 6.804435483870968e-06, "loss": 1.2471, "step": 2025 }, { "epoch": 0.2, "grad_norm": 108.13820601181524, "learning_rate": 6.821236559139785e-06, "loss": 1.3069, "step": 2030 }, { "epoch": 0.21, "grad_norm": 51.41482122176862, "learning_rate": 6.838037634408603e-06, "loss": 1.274, "step": 2035 }, { "epoch": 0.21, "grad_norm": 27.969160983681736, "learning_rate": 6.854838709677419e-06, "loss": 1.2421, "step": 2040 }, { "epoch": 0.21, "grad_norm": 116.52262746448919, "learning_rate": 6.8716397849462365e-06, "loss": 1.209, "step": 2045 }, { "epoch": 0.21, "grad_norm": 28.9605266075955, "learning_rate": 6.8884408602150544e-06, "loss": 1.2727, "step": 2050 }, { "epoch": 0.21, "grad_norm": 20.262447992796503, "learning_rate": 6.9052419354838715e-06, "loss": 1.2644, "step": 2055 }, { "epoch": 0.21, "grad_norm": 12.79893422071457, "learning_rate": 6.9220430107526895e-06, "loss": 1.2442, "step": 2060 }, { "epoch": 0.21, "grad_norm": 23.597737721499232, "learning_rate": 6.938844086021506e-06, "loss": 1.241, "step": 2065 }, { "epoch": 0.21, "grad_norm": 24.518983087782463, "learning_rate": 6.955645161290323e-06, "loss": 1.2307, "step": 2070 }, { "epoch": 0.21, "grad_norm": 20.560253477289546, "learning_rate": 6.972446236559141e-06, "loss": 1.2691, "step": 2075 }, { "epoch": 0.21, "grad_norm": 25.848118116745248, "learning_rate": 6.989247311827958e-06, "loss": 1.2724, "step": 2080 }, { "epoch": 0.21, "grad_norm": 9.914019924510955, "learning_rate": 7.006048387096774e-06, "loss": 1.2748, "step": 2085 }, { "epoch": 0.21, "grad_norm": 10.925737866573701, "learning_rate": 7.022849462365592e-06, "loss": 1.2468, "step": 2090 }, { "epoch": 0.21, "grad_norm": 10.000718814137622, "learning_rate": 7.039650537634409e-06, "loss": 1.2004, "step": 2095 }, { "epoch": 0.21, "grad_norm": 14.595339324971437, "learning_rate": 7.056451612903227e-06, "loss": 1.2976, "step": 2100 }, { "epoch": 0.21, "grad_norm": 15.463529208058105, "learning_rate": 7.073252688172043e-06, "loss": 1.2366, "step": 2105 }, { "epoch": 0.21, "grad_norm": 29.45323360640321, "learning_rate": 7.0900537634408605e-06, "loss": 1.2691, "step": 2110 }, { "epoch": 0.21, "grad_norm": 12.03636155828545, "learning_rate": 7.1068548387096785e-06, "loss": 1.2626, "step": 2115 }, { "epoch": 0.21, "grad_norm": 41.205485411736404, "learning_rate": 7.1236559139784956e-06, "loss": 1.2255, "step": 2120 }, { "epoch": 0.21, "grad_norm": 19.8354928734948, "learning_rate": 7.140456989247312e-06, "loss": 1.2627, "step": 2125 }, { "epoch": 0.21, "grad_norm": 13.143130418752412, "learning_rate": 7.15725806451613e-06, "loss": 1.2636, "step": 2130 }, { "epoch": 0.22, "grad_norm": 26.329639735822322, "learning_rate": 7.174059139784947e-06, "loss": 1.2913, "step": 2135 }, { "epoch": 0.22, "grad_norm": 10.53974742117649, "learning_rate": 7.190860215053764e-06, "loss": 1.1796, "step": 2140 }, { "epoch": 0.22, "grad_norm": 19.81150442809384, "learning_rate": 7.207661290322582e-06, "loss": 1.2697, "step": 2145 }, { "epoch": 0.22, "grad_norm": 41.27431702908246, "learning_rate": 7.224462365591398e-06, "loss": 1.2155, "step": 2150 }, { "epoch": 0.22, "grad_norm": 17.502773503666184, "learning_rate": 7.241263440860215e-06, "loss": 1.2091, "step": 2155 }, { "epoch": 0.22, "grad_norm": 9.51563458004467, "learning_rate": 7.258064516129033e-06, "loss": 1.2635, "step": 2160 }, { "epoch": 0.22, "grad_norm": 15.575762923082294, "learning_rate": 7.27486559139785e-06, "loss": 1.221, "step": 2165 }, { "epoch": 0.22, "grad_norm": 21.278627675604866, "learning_rate": 7.291666666666667e-06, "loss": 1.2411, "step": 2170 }, { "epoch": 0.22, "grad_norm": 13.04465876557275, "learning_rate": 7.3084677419354845e-06, "loss": 1.2134, "step": 2175 }, { "epoch": 0.22, "grad_norm": 11.011104679604054, "learning_rate": 7.325268817204302e-06, "loss": 1.2422, "step": 2180 }, { "epoch": 0.22, "grad_norm": 16.353363056360596, "learning_rate": 7.34206989247312e-06, "loss": 1.2202, "step": 2185 }, { "epoch": 0.22, "grad_norm": 12.87880319538541, "learning_rate": 7.358870967741936e-06, "loss": 1.2097, "step": 2190 }, { "epoch": 0.22, "grad_norm": 11.533815983023235, "learning_rate": 7.375672043010753e-06, "loss": 1.2266, "step": 2195 }, { "epoch": 0.22, "grad_norm": 55.96590550959414, "learning_rate": 7.392473118279571e-06, "loss": 1.2446, "step": 2200 }, { "epoch": 0.22, "grad_norm": 27.6433561291497, "learning_rate": 7.409274193548388e-06, "loss": 1.2762, "step": 2205 }, { "epoch": 0.22, "grad_norm": 49.82007468136901, "learning_rate": 7.426075268817204e-06, "loss": 1.2283, "step": 2210 }, { "epoch": 0.22, "grad_norm": 14.84247480602059, "learning_rate": 7.442876344086022e-06, "loss": 1.2353, "step": 2215 }, { "epoch": 0.22, "grad_norm": 13.413311841272398, "learning_rate": 7.459677419354839e-06, "loss": 1.2622, "step": 2220 }, { "epoch": 0.22, "grad_norm": 40.40763721338431, "learning_rate": 7.476478494623656e-06, "loss": 1.2936, "step": 2225 }, { "epoch": 0.22, "grad_norm": 82.94010690130378, "learning_rate": 7.4932795698924735e-06, "loss": 1.2799, "step": 2230 }, { "epoch": 0.23, "grad_norm": 40.39126638765991, "learning_rate": 7.510080645161291e-06, "loss": 1.3345, "step": 2235 }, { "epoch": 0.23, "grad_norm": 47.39627347260159, "learning_rate": 7.526881720430108e-06, "loss": 1.228, "step": 2240 }, { "epoch": 0.23, "grad_norm": 11.629121665795358, "learning_rate": 7.543682795698926e-06, "loss": 1.2331, "step": 2245 }, { "epoch": 0.23, "grad_norm": 15.306221861015311, "learning_rate": 7.560483870967743e-06, "loss": 1.2713, "step": 2250 }, { "epoch": 0.23, "grad_norm": 24.542736233239204, "learning_rate": 7.577284946236559e-06, "loss": 1.2892, "step": 2255 }, { "epoch": 0.23, "grad_norm": 21.07591070120843, "learning_rate": 7.594086021505377e-06, "loss": 1.2692, "step": 2260 }, { "epoch": 0.23, "grad_norm": 29.56250640158638, "learning_rate": 7.610887096774194e-06, "loss": 1.2271, "step": 2265 }, { "epoch": 0.23, "grad_norm": 46.59319817819226, "learning_rate": 7.627688172043012e-06, "loss": 1.2268, "step": 2270 }, { "epoch": 0.23, "grad_norm": 84.07647878086206, "learning_rate": 7.644489247311827e-06, "loss": 1.2491, "step": 2275 }, { "epoch": 0.23, "grad_norm": 42.82187183604961, "learning_rate": 7.661290322580646e-06, "loss": 1.2686, "step": 2280 }, { "epoch": 0.23, "grad_norm": 63.93417339646263, "learning_rate": 7.678091397849463e-06, "loss": 1.2721, "step": 2285 }, { "epoch": 0.23, "grad_norm": 33.91029033440181, "learning_rate": 7.69489247311828e-06, "loss": 1.2616, "step": 2290 }, { "epoch": 0.23, "grad_norm": 9.582134518943782, "learning_rate": 7.711693548387098e-06, "loss": 1.2684, "step": 2295 }, { "epoch": 0.23, "grad_norm": 24.52154465326118, "learning_rate": 7.728494623655915e-06, "loss": 1.268, "step": 2300 }, { "epoch": 0.23, "grad_norm": 39.68127286315339, "learning_rate": 7.745295698924732e-06, "loss": 1.2984, "step": 2305 }, { "epoch": 0.23, "grad_norm": 64.21209850893193, "learning_rate": 7.762096774193549e-06, "loss": 1.3044, "step": 2310 }, { "epoch": 0.23, "grad_norm": 51.574901468650154, "learning_rate": 7.778897849462366e-06, "loss": 1.2065, "step": 2315 }, { "epoch": 0.23, "grad_norm": 29.1347166115766, "learning_rate": 7.795698924731183e-06, "loss": 1.2155, "step": 2320 }, { "epoch": 0.23, "grad_norm": 14.815362147149694, "learning_rate": 7.8125e-06, "loss": 1.2377, "step": 2325 }, { "epoch": 0.23, "grad_norm": 31.023853624949705, "learning_rate": 7.829301075268817e-06, "loss": 1.2757, "step": 2330 }, { "epoch": 0.24, "grad_norm": 11.259718591127633, "learning_rate": 7.846102150537636e-06, "loss": 1.2178, "step": 2335 }, { "epoch": 0.24, "grad_norm": 10.77824590902307, "learning_rate": 7.862903225806451e-06, "loss": 1.27, "step": 2340 }, { "epoch": 0.24, "grad_norm": 26.383993695953727, "learning_rate": 7.879704301075269e-06, "loss": 1.2079, "step": 2345 }, { "epoch": 0.24, "grad_norm": 26.736318983340148, "learning_rate": 7.896505376344087e-06, "loss": 1.2078, "step": 2350 }, { "epoch": 0.24, "grad_norm": 27.43805184607868, "learning_rate": 7.913306451612904e-06, "loss": 1.2061, "step": 2355 }, { "epoch": 0.24, "grad_norm": 10.12846401107268, "learning_rate": 7.93010752688172e-06, "loss": 1.2931, "step": 2360 }, { "epoch": 0.24, "grad_norm": 8.723056959950561, "learning_rate": 7.946908602150539e-06, "loss": 1.1951, "step": 2365 }, { "epoch": 0.24, "grad_norm": 8.066035570374572, "learning_rate": 7.963709677419356e-06, "loss": 1.1917, "step": 2370 }, { "epoch": 0.24, "grad_norm": 16.15126653319653, "learning_rate": 7.980510752688173e-06, "loss": 1.2501, "step": 2375 }, { "epoch": 0.24, "grad_norm": 9.825376500667879, "learning_rate": 7.99731182795699e-06, "loss": 1.2303, "step": 2380 }, { "epoch": 0.24, "grad_norm": 15.19258369455789, "learning_rate": 8.014112903225807e-06, "loss": 1.2147, "step": 2385 }, { "epoch": 0.24, "grad_norm": 18.237466445781678, "learning_rate": 8.030913978494624e-06, "loss": 1.1802, "step": 2390 }, { "epoch": 0.24, "grad_norm": 28.40527681526738, "learning_rate": 8.047715053763441e-06, "loss": 1.262, "step": 2395 }, { "epoch": 0.24, "grad_norm": 9.153614338557704, "learning_rate": 8.064516129032258e-06, "loss": 1.2535, "step": 2400 }, { "epoch": 0.24, "grad_norm": 18.013184380961132, "learning_rate": 8.081317204301075e-06, "loss": 1.3406, "step": 2405 }, { "epoch": 0.24, "grad_norm": 11.551706178969816, "learning_rate": 8.098118279569893e-06, "loss": 1.2798, "step": 2410 }, { "epoch": 0.24, "grad_norm": 29.301817149313784, "learning_rate": 8.114919354838711e-06, "loss": 1.2021, "step": 2415 }, { "epoch": 0.24, "grad_norm": 14.604552659024082, "learning_rate": 8.131720430107529e-06, "loss": 1.2193, "step": 2420 }, { "epoch": 0.24, "grad_norm": 13.661579135545585, "learning_rate": 8.148521505376344e-06, "loss": 1.1753, "step": 2425 }, { "epoch": 0.24, "grad_norm": 17.732871735188713, "learning_rate": 8.165322580645163e-06, "loss": 1.1896, "step": 2430 }, { "epoch": 0.25, "grad_norm": 21.94264730965134, "learning_rate": 8.18212365591398e-06, "loss": 1.294, "step": 2435 }, { "epoch": 0.25, "grad_norm": 8.98862683040616, "learning_rate": 8.198924731182797e-06, "loss": 1.3597, "step": 2440 }, { "epoch": 0.25, "grad_norm": 9.348562191116157, "learning_rate": 8.215725806451614e-06, "loss": 1.1981, "step": 2445 }, { "epoch": 0.25, "grad_norm": 12.382309784539512, "learning_rate": 8.232526881720431e-06, "loss": 1.197, "step": 2450 }, { "epoch": 0.25, "grad_norm": 12.802639889690079, "learning_rate": 8.249327956989248e-06, "loss": 1.2518, "step": 2455 }, { "epoch": 0.25, "grad_norm": 9.105120008248589, "learning_rate": 8.266129032258065e-06, "loss": 1.2412, "step": 2460 }, { "epoch": 0.25, "grad_norm": 22.5005191369109, "learning_rate": 8.282930107526882e-06, "loss": 1.2095, "step": 2465 }, { "epoch": 0.25, "grad_norm": 55.06496286963062, "learning_rate": 8.2997311827957e-06, "loss": 1.2292, "step": 2470 }, { "epoch": 0.25, "grad_norm": 25.40056685381794, "learning_rate": 8.316532258064517e-06, "loss": 1.2229, "step": 2475 }, { "epoch": 0.25, "grad_norm": 19.75732050971706, "learning_rate": 8.333333333333334e-06, "loss": 1.276, "step": 2480 }, { "epoch": 0.25, "grad_norm": 19.259188424158435, "learning_rate": 8.35013440860215e-06, "loss": 1.2295, "step": 2485 }, { "epoch": 0.25, "grad_norm": 48.034449754252755, "learning_rate": 8.366935483870968e-06, "loss": 1.2694, "step": 2490 }, { "epoch": 0.25, "grad_norm": 27.913204135569178, "learning_rate": 8.383736559139785e-06, "loss": 1.2747, "step": 2495 }, { "epoch": 0.25, "grad_norm": 34.71178286331895, "learning_rate": 8.400537634408604e-06, "loss": 1.2141, "step": 2500 }, { "epoch": 0.25, "grad_norm": 19.01748344569644, "learning_rate": 8.41733870967742e-06, "loss": 1.2222, "step": 2505 }, { "epoch": 0.25, "grad_norm": 10.43993883230521, "learning_rate": 8.434139784946236e-06, "loss": 1.258, "step": 2510 }, { "epoch": 0.25, "grad_norm": 38.56997728761633, "learning_rate": 8.450940860215055e-06, "loss": 1.2001, "step": 2515 }, { "epoch": 0.25, "grad_norm": 22.647653948196986, "learning_rate": 8.467741935483872e-06, "loss": 1.2565, "step": 2520 }, { "epoch": 0.25, "grad_norm": 31.804400232690394, "learning_rate": 8.48454301075269e-06, "loss": 1.2916, "step": 2525 }, { "epoch": 0.26, "grad_norm": 10.568518363297464, "learning_rate": 8.501344086021506e-06, "loss": 1.2009, "step": 2530 }, { "epoch": 0.26, "grad_norm": 13.26372201231781, "learning_rate": 8.518145161290324e-06, "loss": 1.209, "step": 2535 }, { "epoch": 0.26, "grad_norm": 46.54550273923697, "learning_rate": 8.53494623655914e-06, "loss": 1.245, "step": 2540 }, { "epoch": 0.26, "grad_norm": 9.219462432954233, "learning_rate": 8.551747311827958e-06, "loss": 1.2544, "step": 2545 }, { "epoch": 0.26, "grad_norm": 44.110924602079656, "learning_rate": 8.568548387096775e-06, "loss": 1.2368, "step": 2550 }, { "epoch": 0.26, "grad_norm": 43.35815365202579, "learning_rate": 8.585349462365592e-06, "loss": 1.2318, "step": 2555 }, { "epoch": 0.26, "grad_norm": 11.019119762296853, "learning_rate": 8.602150537634409e-06, "loss": 1.2579, "step": 2560 }, { "epoch": 0.26, "grad_norm": 13.597771671863768, "learning_rate": 8.618951612903226e-06, "loss": 1.255, "step": 2565 }, { "epoch": 0.26, "grad_norm": 50.13693425901624, "learning_rate": 8.635752688172043e-06, "loss": 1.256, "step": 2570 }, { "epoch": 0.26, "grad_norm": 7.904065398841818, "learning_rate": 8.65255376344086e-06, "loss": 1.2039, "step": 2575 }, { "epoch": 0.26, "grad_norm": 37.45772381933171, "learning_rate": 8.669354838709677e-06, "loss": 1.2229, "step": 2580 }, { "epoch": 0.26, "grad_norm": 39.27057013113243, "learning_rate": 8.686155913978496e-06, "loss": 1.2191, "step": 2585 }, { "epoch": 0.26, "grad_norm": 70.60558760616007, "learning_rate": 8.702956989247312e-06, "loss": 1.3142, "step": 2590 }, { "epoch": 0.26, "grad_norm": 28.19659398732191, "learning_rate": 8.719758064516129e-06, "loss": 1.2259, "step": 2595 }, { "epoch": 0.26, "grad_norm": 23.9566064510682, "learning_rate": 8.736559139784948e-06, "loss": 1.2543, "step": 2600 }, { "epoch": 0.26, "grad_norm": 15.671051560916663, "learning_rate": 8.753360215053765e-06, "loss": 1.2193, "step": 2605 }, { "epoch": 0.26, "grad_norm": 29.838824406491774, "learning_rate": 8.770161290322582e-06, "loss": 1.2308, "step": 2610 }, { "epoch": 0.26, "grad_norm": 8.987977176453866, "learning_rate": 8.786962365591399e-06, "loss": 1.2298, "step": 2615 }, { "epoch": 0.26, "grad_norm": 16.52133772623778, "learning_rate": 8.803763440860216e-06, "loss": 1.2272, "step": 2620 }, { "epoch": 0.26, "grad_norm": 15.693819052666122, "learning_rate": 8.820564516129033e-06, "loss": 1.2678, "step": 2625 }, { "epoch": 0.27, "grad_norm": 20.076796566784243, "learning_rate": 8.83736559139785e-06, "loss": 1.1967, "step": 2630 }, { "epoch": 0.27, "grad_norm": 29.791845399978822, "learning_rate": 8.854166666666667e-06, "loss": 1.2163, "step": 2635 }, { "epoch": 0.27, "grad_norm": 47.53203127521539, "learning_rate": 8.870967741935484e-06, "loss": 1.2407, "step": 2640 }, { "epoch": 0.27, "grad_norm": 16.00670809300897, "learning_rate": 8.887768817204302e-06, "loss": 1.238, "step": 2645 }, { "epoch": 0.27, "grad_norm": 13.108573334947934, "learning_rate": 8.904569892473119e-06, "loss": 1.2901, "step": 2650 }, { "epoch": 0.27, "grad_norm": 16.094412129400425, "learning_rate": 8.921370967741936e-06, "loss": 1.1964, "step": 2655 }, { "epoch": 0.27, "grad_norm": 11.235105946719376, "learning_rate": 8.938172043010753e-06, "loss": 1.245, "step": 2660 }, { "epoch": 0.27, "grad_norm": 41.31431089609605, "learning_rate": 8.95497311827957e-06, "loss": 1.2624, "step": 2665 }, { "epoch": 0.27, "grad_norm": 16.115421164459278, "learning_rate": 8.971774193548389e-06, "loss": 1.231, "step": 2670 }, { "epoch": 0.27, "grad_norm": 55.68637807130677, "learning_rate": 8.988575268817204e-06, "loss": 1.2537, "step": 2675 }, { "epoch": 0.27, "grad_norm": 44.42132523032244, "learning_rate": 9.005376344086021e-06, "loss": 1.1876, "step": 2680 }, { "epoch": 0.27, "grad_norm": 27.949890360806133, "learning_rate": 9.02217741935484e-06, "loss": 1.2117, "step": 2685 }, { "epoch": 0.27, "grad_norm": 40.761220118311314, "learning_rate": 9.038978494623657e-06, "loss": 1.2633, "step": 2690 }, { "epoch": 0.27, "grad_norm": 20.224082082802013, "learning_rate": 9.055779569892473e-06, "loss": 1.2541, "step": 2695 }, { "epoch": 0.27, "grad_norm": 17.094096668262384, "learning_rate": 9.072580645161291e-06, "loss": 1.2348, "step": 2700 }, { "epoch": 0.27, "grad_norm": 60.1962371836327, "learning_rate": 9.089381720430108e-06, "loss": 1.2331, "step": 2705 }, { "epoch": 0.27, "grad_norm": 55.94215746637798, "learning_rate": 9.106182795698926e-06, "loss": 1.2352, "step": 2710 }, { "epoch": 0.27, "grad_norm": 32.57124388066799, "learning_rate": 9.122983870967743e-06, "loss": 1.2323, "step": 2715 }, { "epoch": 0.27, "grad_norm": 16.941148434081548, "learning_rate": 9.13978494623656e-06, "loss": 1.2388, "step": 2720 }, { "epoch": 0.27, "grad_norm": 18.389045889903844, "learning_rate": 9.156586021505377e-06, "loss": 1.2326, "step": 2725 }, { "epoch": 0.28, "grad_norm": 16.3662470293946, "learning_rate": 9.173387096774194e-06, "loss": 1.2977, "step": 2730 }, { "epoch": 0.28, "grad_norm": 14.143682290674668, "learning_rate": 9.190188172043013e-06, "loss": 1.2299, "step": 2735 }, { "epoch": 0.28, "grad_norm": 10.488199757167923, "learning_rate": 9.206989247311828e-06, "loss": 1.1949, "step": 2740 }, { "epoch": 0.28, "grad_norm": 15.543690391067857, "learning_rate": 9.223790322580645e-06, "loss": 1.285, "step": 2745 }, { "epoch": 0.28, "grad_norm": 37.70309866093175, "learning_rate": 9.240591397849464e-06, "loss": 1.2336, "step": 2750 }, { "epoch": 0.28, "grad_norm": 62.688716746409945, "learning_rate": 9.257392473118281e-06, "loss": 1.2581, "step": 2755 }, { "epoch": 0.28, "grad_norm": 37.72106543644531, "learning_rate": 9.274193548387097e-06, "loss": 1.2729, "step": 2760 }, { "epoch": 0.28, "grad_norm": 56.0751796884353, "learning_rate": 9.290994623655915e-06, "loss": 1.2609, "step": 2765 }, { "epoch": 0.28, "grad_norm": 25.60819582600334, "learning_rate": 9.307795698924732e-06, "loss": 1.22, "step": 2770 }, { "epoch": 0.28, "grad_norm": 15.017430243129686, "learning_rate": 9.32459677419355e-06, "loss": 1.2838, "step": 2775 }, { "epoch": 0.28, "grad_norm": 21.030906077378877, "learning_rate": 9.341397849462367e-06, "loss": 1.2826, "step": 2780 }, { "epoch": 0.28, "grad_norm": 28.879404465573838, "learning_rate": 9.358198924731184e-06, "loss": 1.252, "step": 2785 }, { "epoch": 0.28, "grad_norm": 19.44653324996414, "learning_rate": 9.375000000000001e-06, "loss": 1.2714, "step": 2790 }, { "epoch": 0.28, "grad_norm": 10.247990145821044, "learning_rate": 9.391801075268818e-06, "loss": 1.2259, "step": 2795 }, { "epoch": 0.28, "grad_norm": 24.325834288611475, "learning_rate": 9.408602150537635e-06, "loss": 1.2544, "step": 2800 }, { "epoch": 0.28, "grad_norm": 14.962373850237869, "learning_rate": 9.425403225806452e-06, "loss": 1.2342, "step": 2805 }, { "epoch": 0.28, "grad_norm": 20.103796795195294, "learning_rate": 9.44220430107527e-06, "loss": 1.2428, "step": 2810 }, { "epoch": 0.28, "grad_norm": 65.72278478898748, "learning_rate": 9.459005376344086e-06, "loss": 1.2779, "step": 2815 }, { "epoch": 0.28, "grad_norm": 34.57590700018222, "learning_rate": 9.475806451612905e-06, "loss": 1.2214, "step": 2820 }, { "epoch": 0.28, "grad_norm": 29.87845279871248, "learning_rate": 9.49260752688172e-06, "loss": 1.2607, "step": 2825 }, { "epoch": 0.29, "grad_norm": 15.99531248344474, "learning_rate": 9.509408602150538e-06, "loss": 1.215, "step": 2830 }, { "epoch": 0.29, "grad_norm": 40.937307170959855, "learning_rate": 9.526209677419356e-06, "loss": 1.2376, "step": 2835 }, { "epoch": 0.29, "grad_norm": 24.801802543062614, "learning_rate": 9.543010752688174e-06, "loss": 1.2416, "step": 2840 }, { "epoch": 0.29, "grad_norm": 14.843620697232266, "learning_rate": 9.559811827956989e-06, "loss": 1.2758, "step": 2845 }, { "epoch": 0.29, "grad_norm": 25.767959275553633, "learning_rate": 9.576612903225808e-06, "loss": 1.2924, "step": 2850 }, { "epoch": 0.29, "grad_norm": 24.835210656953024, "learning_rate": 9.593413978494625e-06, "loss": 1.3021, "step": 2855 }, { "epoch": 0.29, "grad_norm": 14.811252228116862, "learning_rate": 9.610215053763442e-06, "loss": 1.1958, "step": 2860 }, { "epoch": 0.29, "grad_norm": 8.84580463225306, "learning_rate": 9.627016129032259e-06, "loss": 1.2299, "step": 2865 }, { "epoch": 0.29, "grad_norm": 35.50525850048963, "learning_rate": 9.643817204301076e-06, "loss": 1.2543, "step": 2870 }, { "epoch": 0.29, "grad_norm": 17.821644675146317, "learning_rate": 9.660618279569893e-06, "loss": 1.2512, "step": 2875 }, { "epoch": 0.29, "grad_norm": 25.16120140943554, "learning_rate": 9.67741935483871e-06, "loss": 1.2644, "step": 2880 }, { "epoch": 0.29, "grad_norm": 26.443037158380964, "learning_rate": 9.694220430107528e-06, "loss": 1.2873, "step": 2885 }, { "epoch": 0.29, "grad_norm": 32.58614186448375, "learning_rate": 9.711021505376345e-06, "loss": 1.2056, "step": 2890 }, { "epoch": 0.29, "grad_norm": 28.009823108590062, "learning_rate": 9.727822580645162e-06, "loss": 1.2312, "step": 2895 }, { "epoch": 0.29, "grad_norm": 29.593172996117087, "learning_rate": 9.744623655913979e-06, "loss": 1.2385, "step": 2900 }, { "epoch": 0.29, "grad_norm": 46.463353284382364, "learning_rate": 9.761424731182798e-06, "loss": 1.2203, "step": 2905 }, { "epoch": 0.29, "grad_norm": 14.522172617728398, "learning_rate": 9.778225806451613e-06, "loss": 1.2851, "step": 2910 }, { "epoch": 0.29, "grad_norm": 21.8644130205428, "learning_rate": 9.79502688172043e-06, "loss": 1.3051, "step": 2915 }, { "epoch": 0.29, "grad_norm": 60.03291649051756, "learning_rate": 9.811827956989249e-06, "loss": 1.2196, "step": 2920 }, { "epoch": 0.29, "grad_norm": 60.594682600646706, "learning_rate": 9.828629032258066e-06, "loss": 1.2187, "step": 2925 }, { "epoch": 0.3, "grad_norm": 8.381452691629116, "learning_rate": 9.845430107526881e-06, "loss": 1.2154, "step": 2930 }, { "epoch": 0.3, "grad_norm": 10.090385181932243, "learning_rate": 9.8622311827957e-06, "loss": 1.2283, "step": 2935 }, { "epoch": 0.3, "grad_norm": 18.85732363275535, "learning_rate": 9.879032258064517e-06, "loss": 1.3106, "step": 2940 }, { "epoch": 0.3, "grad_norm": 39.278581764228036, "learning_rate": 9.895833333333334e-06, "loss": 1.2291, "step": 2945 }, { "epoch": 0.3, "grad_norm": 44.68729775499523, "learning_rate": 9.912634408602152e-06, "loss": 1.2338, "step": 2950 }, { "epoch": 0.3, "grad_norm": 43.45808222006517, "learning_rate": 9.929435483870969e-06, "loss": 1.3069, "step": 2955 }, { "epoch": 0.3, "grad_norm": 17.02806886416131, "learning_rate": 9.946236559139786e-06, "loss": 1.2378, "step": 2960 }, { "epoch": 0.3, "grad_norm": 35.73834294646412, "learning_rate": 9.963037634408603e-06, "loss": 1.2714, "step": 2965 }, { "epoch": 0.3, "grad_norm": 24.782915964923706, "learning_rate": 9.97983870967742e-06, "loss": 1.2359, "step": 2970 }, { "epoch": 0.3, "grad_norm": 70.72793777002627, "learning_rate": 9.996639784946237e-06, "loss": 1.3251, "step": 2975 }, { "epoch": 0.3, "grad_norm": 57.16942627102183, "learning_rate": 9.999999449441523e-06, "loss": 1.3049, "step": 2980 }, { "epoch": 0.3, "grad_norm": 94.53740449854857, "learning_rate": 9.999997212797916e-06, "loss": 1.2459, "step": 2985 }, { "epoch": 0.3, "grad_norm": 111.95977469538862, "learning_rate": 9.999993255660043e-06, "loss": 1.3163, "step": 2990 }, { "epoch": 0.3, "grad_norm": 15.031804726431162, "learning_rate": 9.999987578029268e-06, "loss": 1.3024, "step": 2995 }, { "epoch": 0.3, "grad_norm": 71.96322221490063, "learning_rate": 9.999980179907541e-06, "loss": 1.299, "step": 3000 }, { "epoch": 0.3, "grad_norm": 51.906863459767855, "learning_rate": 9.999971061297411e-06, "loss": 1.3545, "step": 3005 }, { "epoch": 0.3, "grad_norm": 41.68798761739742, "learning_rate": 9.999960222202014e-06, "loss": 1.2937, "step": 3010 }, { "epoch": 0.3, "grad_norm": 46.196693267941605, "learning_rate": 9.99994766262508e-06, "loss": 1.291, "step": 3015 }, { "epoch": 0.3, "grad_norm": 83.24411008845641, "learning_rate": 9.99993338257093e-06, "loss": 1.3184, "step": 3020 }, { "epoch": 0.3, "grad_norm": 79.47916536696067, "learning_rate": 9.999917382044479e-06, "loss": 1.3597, "step": 3025 }, { "epoch": 0.31, "grad_norm": 47.364807117651814, "learning_rate": 9.999899661051232e-06, "loss": 1.2771, "step": 3030 }, { "epoch": 0.31, "grad_norm": 8.949929395733275, "learning_rate": 9.999880219597287e-06, "loss": 1.2681, "step": 3035 }, { "epoch": 0.31, "grad_norm": 36.05513683071444, "learning_rate": 9.999859057689336e-06, "loss": 1.2577, "step": 3040 }, { "epoch": 0.31, "grad_norm": 49.1723693913278, "learning_rate": 9.999836175334657e-06, "loss": 1.2624, "step": 3045 }, { "epoch": 0.31, "grad_norm": 19.818863595628407, "learning_rate": 9.999811572541125e-06, "loss": 1.252, "step": 3050 }, { "epoch": 0.31, "grad_norm": 65.3095406563796, "learning_rate": 9.999785249317207e-06, "loss": 1.2392, "step": 3055 }, { "epoch": 0.31, "grad_norm": 33.44323503436429, "learning_rate": 9.999757205671959e-06, "loss": 1.2445, "step": 3060 }, { "epoch": 0.31, "grad_norm": 54.9536579150963, "learning_rate": 9.999727441615032e-06, "loss": 1.3022, "step": 3065 }, { "epoch": 0.31, "grad_norm": 21.625118717845428, "learning_rate": 9.99969595715667e-06, "loss": 1.2172, "step": 3070 }, { "epoch": 0.31, "grad_norm": 53.04058016138585, "learning_rate": 9.999662752307702e-06, "loss": 1.2891, "step": 3075 }, { "epoch": 0.31, "grad_norm": 41.0790648049123, "learning_rate": 9.999627827079557e-06, "loss": 1.2606, "step": 3080 }, { "epoch": 0.31, "grad_norm": 12.949311683171679, "learning_rate": 9.999591181484251e-06, "loss": 1.2433, "step": 3085 }, { "epoch": 0.31, "grad_norm": 13.606680721081727, "learning_rate": 9.999552815534396e-06, "loss": 1.2767, "step": 3090 }, { "epoch": 0.31, "grad_norm": 23.548287110125504, "learning_rate": 9.999512729243191e-06, "loss": 1.2329, "step": 3095 }, { "epoch": 0.31, "grad_norm": 11.659213956234366, "learning_rate": 9.999470922624432e-06, "loss": 1.2118, "step": 3100 }, { "epoch": 0.31, "grad_norm": 11.109026198379695, "learning_rate": 9.999427395692502e-06, "loss": 1.271, "step": 3105 }, { "epoch": 0.31, "grad_norm": 16.205675455931562, "learning_rate": 9.999382148462382e-06, "loss": 1.2228, "step": 3110 }, { "epoch": 0.31, "grad_norm": 13.84615719190485, "learning_rate": 9.99933518094964e-06, "loss": 1.2587, "step": 3115 }, { "epoch": 0.31, "grad_norm": 12.473676379339645, "learning_rate": 9.999286493170435e-06, "loss": 1.2854, "step": 3120 }, { "epoch": 0.32, "grad_norm": 9.029530584732344, "learning_rate": 9.999236085141523e-06, "loss": 1.2422, "step": 3125 }, { "epoch": 0.32, "grad_norm": 8.574237532934099, "learning_rate": 9.999183956880252e-06, "loss": 1.1645, "step": 3130 }, { "epoch": 0.32, "grad_norm": 9.224399893537099, "learning_rate": 9.999130108404553e-06, "loss": 1.2224, "step": 3135 }, { "epoch": 0.32, "grad_norm": 8.540594477353714, "learning_rate": 9.99907453973296e-06, "loss": 1.238, "step": 3140 }, { "epoch": 0.32, "grad_norm": 25.11320923926726, "learning_rate": 9.999017250884591e-06, "loss": 1.2602, "step": 3145 }, { "epoch": 0.32, "grad_norm": 14.434342173799088, "learning_rate": 9.998958241879163e-06, "loss": 1.2327, "step": 3150 }, { "epoch": 0.32, "grad_norm": 23.92510726176057, "learning_rate": 9.998897512736977e-06, "loss": 1.2771, "step": 3155 }, { "epoch": 0.32, "grad_norm": 11.27586016590783, "learning_rate": 9.998835063478928e-06, "loss": 1.2186, "step": 3160 }, { "epoch": 0.32, "grad_norm": 17.206208499134267, "learning_rate": 9.998770894126513e-06, "loss": 1.244, "step": 3165 }, { "epoch": 0.32, "grad_norm": 9.071587368592734, "learning_rate": 9.998705004701805e-06, "loss": 1.2746, "step": 3170 }, { "epoch": 0.32, "grad_norm": 8.851985458784839, "learning_rate": 9.998637395227481e-06, "loss": 1.2293, "step": 3175 }, { "epoch": 0.32, "grad_norm": 11.125235158488461, "learning_rate": 9.998568065726804e-06, "loss": 1.2395, "step": 3180 }, { "epoch": 0.32, "grad_norm": 14.459385418108235, "learning_rate": 9.998497016223628e-06, "loss": 1.2507, "step": 3185 }, { "epoch": 0.32, "grad_norm": 8.723762738270437, "learning_rate": 9.998424246742403e-06, "loss": 1.2461, "step": 3190 }, { "epoch": 0.32, "grad_norm": 9.560316300456964, "learning_rate": 9.99834975730817e-06, "loss": 1.1793, "step": 3195 }, { "epoch": 0.32, "grad_norm": 17.330824182062663, "learning_rate": 9.998273547946557e-06, "loss": 1.2471, "step": 3200 }, { "epoch": 0.32, "grad_norm": 8.256225012715516, "learning_rate": 9.998195618683793e-06, "loss": 1.2439, "step": 3205 }, { "epoch": 0.32, "grad_norm": 13.292485899881653, "learning_rate": 9.99811596954669e-06, "loss": 1.1854, "step": 3210 }, { "epoch": 0.32, "grad_norm": 8.509099993425679, "learning_rate": 9.998034600562654e-06, "loss": 1.2467, "step": 3215 }, { "epoch": 0.32, "grad_norm": 12.822666642474209, "learning_rate": 9.997951511759686e-06, "loss": 1.321, "step": 3220 }, { "epoch": 0.33, "grad_norm": 8.367649827222804, "learning_rate": 9.997866703166376e-06, "loss": 1.2055, "step": 3225 }, { "epoch": 0.33, "grad_norm": 7.915246360335449, "learning_rate": 9.997780174811908e-06, "loss": 1.2468, "step": 3230 }, { "epoch": 0.33, "grad_norm": 7.3235819296260605, "learning_rate": 9.997691926726055e-06, "loss": 1.2301, "step": 3235 }, { "epoch": 0.33, "grad_norm": 7.881549658254508, "learning_rate": 9.997601958939185e-06, "loss": 1.2476, "step": 3240 }, { "epoch": 0.33, "grad_norm": 7.774689870813962, "learning_rate": 9.997510271482252e-06, "loss": 1.2587, "step": 3245 }, { "epoch": 0.33, "grad_norm": 23.119656881210187, "learning_rate": 9.997416864386808e-06, "loss": 1.2944, "step": 3250 }, { "epoch": 0.33, "grad_norm": 44.43248294796607, "learning_rate": 9.997321737684994e-06, "loss": 1.2861, "step": 3255 }, { "epoch": 0.33, "grad_norm": 47.999482127159396, "learning_rate": 9.997224891409542e-06, "loss": 1.2578, "step": 3260 }, { "epoch": 0.33, "grad_norm": 70.59257075033224, "learning_rate": 9.99712632559378e-06, "loss": 1.2717, "step": 3265 }, { "epoch": 0.33, "grad_norm": 23.71641327422162, "learning_rate": 9.997026040271623e-06, "loss": 1.2947, "step": 3270 }, { "epoch": 0.33, "grad_norm": 22.5621081905156, "learning_rate": 9.996924035477575e-06, "loss": 1.2861, "step": 3275 }, { "epoch": 0.33, "grad_norm": 11.337136579692809, "learning_rate": 9.996820311246741e-06, "loss": 1.2452, "step": 3280 }, { "epoch": 0.33, "grad_norm": 38.92682377208474, "learning_rate": 9.99671486761481e-06, "loss": 1.227, "step": 3285 }, { "epoch": 0.33, "grad_norm": 39.12070470753764, "learning_rate": 9.996607704618067e-06, "loss": 1.2175, "step": 3290 }, { "epoch": 0.33, "grad_norm": 55.06863641189487, "learning_rate": 9.996498822293383e-06, "loss": 1.261, "step": 3295 }, { "epoch": 0.33, "grad_norm": 53.5917064046568, "learning_rate": 9.996388220678226e-06, "loss": 1.267, "step": 3300 }, { "epoch": 0.33, "grad_norm": 22.7001638237988, "learning_rate": 9.996275899810657e-06, "loss": 1.2679, "step": 3305 }, { "epoch": 0.33, "grad_norm": 26.240695732587145, "learning_rate": 9.996161859729322e-06, "loss": 1.2276, "step": 3310 }, { "epoch": 0.33, "grad_norm": 28.864109624837717, "learning_rate": 9.99604610047346e-06, "loss": 1.2622, "step": 3315 }, { "epoch": 0.33, "grad_norm": 14.181745192592954, "learning_rate": 9.99592862208291e-06, "loss": 1.2353, "step": 3320 }, { "epoch": 0.34, "grad_norm": 31.135738640073715, "learning_rate": 9.99580942459809e-06, "loss": 1.2259, "step": 3325 }, { "epoch": 0.34, "grad_norm": 14.694094175623361, "learning_rate": 9.995688508060022e-06, "loss": 1.2638, "step": 3330 }, { "epoch": 0.34, "grad_norm": 14.336896391555797, "learning_rate": 9.995565872510306e-06, "loss": 1.2791, "step": 3335 }, { "epoch": 0.34, "grad_norm": 67.85777726137141, "learning_rate": 9.995441517991149e-06, "loss": 1.2656, "step": 3340 }, { "epoch": 0.34, "grad_norm": 49.402079926275604, "learning_rate": 9.995315444545332e-06, "loss": 1.2348, "step": 3345 }, { "epoch": 0.34, "grad_norm": 104.59190696680739, "learning_rate": 9.995187652216245e-06, "loss": 1.3082, "step": 3350 }, { "epoch": 0.34, "grad_norm": 48.01718012292942, "learning_rate": 9.995058141047857e-06, "loss": 1.2673, "step": 3355 }, { "epoch": 0.34, "grad_norm": 33.64789850910811, "learning_rate": 9.994926911084734e-06, "loss": 1.2607, "step": 3360 }, { "epoch": 0.34, "grad_norm": 31.926457677825184, "learning_rate": 9.994793962372032e-06, "loss": 1.3047, "step": 3365 }, { "epoch": 0.34, "grad_norm": 36.43634702069511, "learning_rate": 9.994659294955496e-06, "loss": 1.2635, "step": 3370 }, { "epoch": 0.34, "grad_norm": 22.716948822292796, "learning_rate": 9.994522908881468e-06, "loss": 1.2718, "step": 3375 }, { "epoch": 0.34, "grad_norm": 15.93812477910786, "learning_rate": 9.994384804196877e-06, "loss": 1.2071, "step": 3380 }, { "epoch": 0.34, "grad_norm": 7.4307138152681285, "learning_rate": 9.994244980949246e-06, "loss": 1.2283, "step": 3385 }, { "epoch": 0.34, "grad_norm": 26.716814730179262, "learning_rate": 9.994103439186686e-06, "loss": 1.2311, "step": 3390 }, { "epoch": 0.34, "grad_norm": 13.57535376565661, "learning_rate": 9.993960178957904e-06, "loss": 1.2416, "step": 3395 }, { "epoch": 0.34, "grad_norm": 9.489410995507601, "learning_rate": 9.993815200312194e-06, "loss": 1.2401, "step": 3400 }, { "epoch": 0.34, "grad_norm": 8.963757789326618, "learning_rate": 9.993668503299443e-06, "loss": 1.2223, "step": 3405 }, { "epoch": 0.34, "grad_norm": 11.731516475386249, "learning_rate": 9.993520087970128e-06, "loss": 1.2622, "step": 3410 }, { "epoch": 0.34, "grad_norm": 15.199217760337424, "learning_rate": 9.993369954375322e-06, "loss": 1.2187, "step": 3415 }, { "epoch": 0.34, "grad_norm": 20.61144183377057, "learning_rate": 9.993218102566683e-06, "loss": 1.2648, "step": 3420 }, { "epoch": 0.35, "grad_norm": 7.007496284136545, "learning_rate": 9.993064532596464e-06, "loss": 1.2809, "step": 3425 }, { "epoch": 0.35, "grad_norm": 8.051987396263192, "learning_rate": 9.992909244517507e-06, "loss": 1.2683, "step": 3430 }, { "epoch": 0.35, "grad_norm": 6.8353867234768675, "learning_rate": 9.99275223838325e-06, "loss": 1.2712, "step": 3435 }, { "epoch": 0.35, "grad_norm": 10.769209591212741, "learning_rate": 9.992593514247716e-06, "loss": 1.2244, "step": 3440 }, { "epoch": 0.35, "grad_norm": 9.723137752045876, "learning_rate": 9.992433072165521e-06, "loss": 1.2055, "step": 3445 }, { "epoch": 0.35, "grad_norm": 7.580879719381715, "learning_rate": 9.992270912191875e-06, "loss": 1.214, "step": 3450 }, { "epoch": 0.35, "grad_norm": 8.348028440545775, "learning_rate": 9.992107034382576e-06, "loss": 1.2512, "step": 3455 }, { "epoch": 0.35, "grad_norm": 9.119279921782843, "learning_rate": 9.991941438794016e-06, "loss": 1.2072, "step": 3460 }, { "epoch": 0.35, "grad_norm": 11.524058585005298, "learning_rate": 9.991774125483173e-06, "loss": 1.2263, "step": 3465 }, { "epoch": 0.35, "grad_norm": 9.444283873220677, "learning_rate": 9.991605094507621e-06, "loss": 1.2625, "step": 3470 }, { "epoch": 0.35, "grad_norm": 7.837669847558407, "learning_rate": 9.991434345925525e-06, "loss": 1.2565, "step": 3475 }, { "epoch": 0.35, "grad_norm": 27.594282860281425, "learning_rate": 9.991261879795637e-06, "loss": 1.2006, "step": 3480 }, { "epoch": 0.35, "grad_norm": 9.791241576434466, "learning_rate": 9.991087696177304e-06, "loss": 1.2963, "step": 3485 }, { "epoch": 0.35, "grad_norm": 30.203839734682212, "learning_rate": 9.990911795130461e-06, "loss": 1.2561, "step": 3490 }, { "epoch": 0.35, "grad_norm": 98.23200362278807, "learning_rate": 9.990734176715635e-06, "loss": 1.2461, "step": 3495 }, { "epoch": 0.35, "grad_norm": 27.271840312622874, "learning_rate": 9.990554840993948e-06, "loss": 1.3156, "step": 3500 }, { "epoch": 0.35, "grad_norm": 61.93216058953879, "learning_rate": 9.990373788027105e-06, "loss": 1.3108, "step": 3505 }, { "epoch": 0.35, "grad_norm": 15.847753788646035, "learning_rate": 9.990191017877408e-06, "loss": 1.2783, "step": 3510 }, { "epoch": 0.35, "grad_norm": 16.410406057080326, "learning_rate": 9.99000653060775e-06, "loss": 1.2319, "step": 3515 }, { "epoch": 0.35, "grad_norm": 17.31825759414051, "learning_rate": 9.989820326281608e-06, "loss": 1.2944, "step": 3520 }, { "epoch": 0.36, "grad_norm": 24.686023979520524, "learning_rate": 9.989632404963058e-06, "loss": 1.2311, "step": 3525 }, { "epoch": 0.36, "grad_norm": 11.056066379950447, "learning_rate": 9.989442766716766e-06, "loss": 1.2642, "step": 3530 }, { "epoch": 0.36, "grad_norm": 8.193503192165913, "learning_rate": 9.98925141160798e-06, "loss": 1.229, "step": 3535 }, { "epoch": 0.36, "grad_norm": 12.672413486305313, "learning_rate": 9.98905833970255e-06, "loss": 1.2523, "step": 3540 }, { "epoch": 0.36, "grad_norm": 9.013438018268829, "learning_rate": 9.98886355106691e-06, "loss": 1.2177, "step": 3545 }, { "epoch": 0.36, "grad_norm": 9.739984602942702, "learning_rate": 9.988667045768088e-06, "loss": 1.3048, "step": 3550 }, { "epoch": 0.36, "grad_norm": 14.313811608656064, "learning_rate": 9.988468823873701e-06, "loss": 1.2297, "step": 3555 }, { "epoch": 0.36, "grad_norm": 13.080105167095407, "learning_rate": 9.988268885451954e-06, "loss": 1.2716, "step": 3560 }, { "epoch": 0.36, "grad_norm": 12.052177241976224, "learning_rate": 9.988067230571648e-06, "loss": 1.2084, "step": 3565 }, { "epoch": 0.36, "grad_norm": 43.26819159375231, "learning_rate": 9.987863859302174e-06, "loss": 1.2811, "step": 3570 }, { "epoch": 0.36, "grad_norm": 31.906852111691876, "learning_rate": 9.987658771713508e-06, "loss": 1.2278, "step": 3575 }, { "epoch": 0.36, "grad_norm": 32.0926758251754, "learning_rate": 9.987451967876222e-06, "loss": 1.1654, "step": 3580 }, { "epoch": 0.36, "grad_norm": 12.778055781974132, "learning_rate": 9.987243447861479e-06, "loss": 1.2623, "step": 3585 }, { "epoch": 0.36, "grad_norm": 26.73272187671943, "learning_rate": 9.987033211741028e-06, "loss": 1.1936, "step": 3590 }, { "epoch": 0.36, "grad_norm": 23.712272553679398, "learning_rate": 9.986821259587214e-06, "loss": 1.2293, "step": 3595 }, { "epoch": 0.36, "grad_norm": 25.72299647520127, "learning_rate": 9.986607591472966e-06, "loss": 1.2311, "step": 3600 }, { "epoch": 0.36, "grad_norm": 13.615736190580805, "learning_rate": 9.98639220747181e-06, "loss": 1.3012, "step": 3605 }, { "epoch": 0.36, "grad_norm": 17.95268313491177, "learning_rate": 9.986175107657855e-06, "loss": 1.2166, "step": 3610 }, { "epoch": 0.36, "grad_norm": 19.109081494313262, "learning_rate": 9.985956292105809e-06, "loss": 1.21, "step": 3615 }, { "epoch": 0.36, "grad_norm": 10.46050026278034, "learning_rate": 9.985735760890966e-06, "loss": 1.2424, "step": 3620 }, { "epoch": 0.37, "grad_norm": 9.548313419301463, "learning_rate": 9.985513514089209e-06, "loss": 1.2031, "step": 3625 }, { "epoch": 0.37, "grad_norm": 11.367090000488474, "learning_rate": 9.985289551777014e-06, "loss": 1.2292, "step": 3630 }, { "epoch": 0.37, "grad_norm": 11.359290128056417, "learning_rate": 9.985063874031444e-06, "loss": 1.2018, "step": 3635 }, { "epoch": 0.37, "grad_norm": 25.69776468028513, "learning_rate": 9.984836480930157e-06, "loss": 1.246, "step": 3640 }, { "epoch": 0.37, "grad_norm": 26.738006219526483, "learning_rate": 9.9846073725514e-06, "loss": 1.232, "step": 3645 }, { "epoch": 0.37, "grad_norm": 27.98023302821429, "learning_rate": 9.984376548974005e-06, "loss": 1.1696, "step": 3650 }, { "epoch": 0.37, "grad_norm": 14.861073552576952, "learning_rate": 9.984144010277402e-06, "loss": 1.2144, "step": 3655 }, { "epoch": 0.37, "grad_norm": 7.373906157122731, "learning_rate": 9.983909756541603e-06, "loss": 1.2201, "step": 3660 }, { "epoch": 0.37, "grad_norm": 14.88970548441138, "learning_rate": 9.983673787847218e-06, "loss": 1.2754, "step": 3665 }, { "epoch": 0.37, "grad_norm": 11.941159431577093, "learning_rate": 9.983436104275443e-06, "loss": 1.1831, "step": 3670 }, { "epoch": 0.37, "grad_norm": 10.114563134319768, "learning_rate": 9.983196705908063e-06, "loss": 1.3016, "step": 3675 }, { "epoch": 0.37, "grad_norm": 7.319121830353334, "learning_rate": 9.982955592827456e-06, "loss": 1.1637, "step": 3680 }, { "epoch": 0.37, "grad_norm": 15.652938083479674, "learning_rate": 9.98271276511659e-06, "loss": 1.2296, "step": 3685 }, { "epoch": 0.37, "grad_norm": 16.455205422152094, "learning_rate": 9.982468222859021e-06, "loss": 1.2656, "step": 3690 }, { "epoch": 0.37, "grad_norm": 13.94661713597918, "learning_rate": 9.982221966138895e-06, "loss": 1.2311, "step": 3695 }, { "epoch": 0.37, "grad_norm": 12.960873556702204, "learning_rate": 9.981973995040948e-06, "loss": 1.2198, "step": 3700 }, { "epoch": 0.37, "grad_norm": 22.09218480259108, "learning_rate": 9.981724309650507e-06, "loss": 1.323, "step": 3705 }, { "epoch": 0.37, "grad_norm": 8.0080144446023, "learning_rate": 9.981472910053492e-06, "loss": 1.2343, "step": 3710 }, { "epoch": 0.37, "grad_norm": 21.145227700843645, "learning_rate": 9.981219796336403e-06, "loss": 1.2497, "step": 3715 }, { "epoch": 0.38, "grad_norm": 19.79162580341351, "learning_rate": 9.980964968586342e-06, "loss": 1.2426, "step": 3720 }, { "epoch": 0.38, "grad_norm": 9.497505379032013, "learning_rate": 9.980708426890993e-06, "loss": 1.1945, "step": 3725 }, { "epoch": 0.38, "grad_norm": 10.618953280914997, "learning_rate": 9.98045017133863e-06, "loss": 1.2648, "step": 3730 }, { "epoch": 0.38, "grad_norm": 18.349019252093978, "learning_rate": 9.980190202018121e-06, "loss": 1.2245, "step": 3735 }, { "epoch": 0.38, "grad_norm": 12.344019064583094, "learning_rate": 9.979928519018919e-06, "loss": 1.2064, "step": 3740 }, { "epoch": 0.38, "grad_norm": 14.945729051308529, "learning_rate": 9.97966512243107e-06, "loss": 1.1961, "step": 3745 }, { "epoch": 0.38, "grad_norm": 9.531189055193323, "learning_rate": 9.97940001234521e-06, "loss": 1.2092, "step": 3750 }, { "epoch": 0.38, "grad_norm": 23.213046385730106, "learning_rate": 9.97913318885256e-06, "loss": 1.2197, "step": 3755 }, { "epoch": 0.38, "grad_norm": 32.725060083495215, "learning_rate": 9.978864652044936e-06, "loss": 1.2902, "step": 3760 }, { "epoch": 0.38, "grad_norm": 43.98473480275498, "learning_rate": 9.97859440201474e-06, "loss": 1.2233, "step": 3765 }, { "epoch": 0.38, "grad_norm": 42.96571294466646, "learning_rate": 9.978322438854966e-06, "loss": 1.2434, "step": 3770 }, { "epoch": 0.38, "grad_norm": 67.18105533450216, "learning_rate": 9.978048762659195e-06, "loss": 1.2261, "step": 3775 }, { "epoch": 0.38, "grad_norm": 6.791734699201156, "learning_rate": 9.977773373521601e-06, "loss": 1.2142, "step": 3780 }, { "epoch": 0.38, "grad_norm": 10.897995141075134, "learning_rate": 9.977496271536943e-06, "loss": 1.2249, "step": 3785 }, { "epoch": 0.38, "grad_norm": 14.497407187298952, "learning_rate": 9.977217456800572e-06, "loss": 1.2123, "step": 3790 }, { "epoch": 0.38, "grad_norm": 7.736211949879045, "learning_rate": 9.976936929408427e-06, "loss": 1.2156, "step": 3795 }, { "epoch": 0.38, "grad_norm": 18.303746441739822, "learning_rate": 9.976654689457038e-06, "loss": 1.2863, "step": 3800 }, { "epoch": 0.38, "grad_norm": 9.829497062940318, "learning_rate": 9.976370737043525e-06, "loss": 1.2618, "step": 3805 }, { "epoch": 0.38, "grad_norm": 25.263446092793245, "learning_rate": 9.976085072265593e-06, "loss": 1.2526, "step": 3810 }, { "epoch": 0.38, "grad_norm": 35.3377923043493, "learning_rate": 9.975797695221542e-06, "loss": 1.2453, "step": 3815 }, { "epoch": 0.39, "grad_norm": 17.464297585102695, "learning_rate": 9.975508606010254e-06, "loss": 1.2531, "step": 3820 }, { "epoch": 0.39, "grad_norm": 23.692828808280154, "learning_rate": 9.975217804731208e-06, "loss": 1.2235, "step": 3825 }, { "epoch": 0.39, "grad_norm": 11.668925213714026, "learning_rate": 9.974925291484468e-06, "loss": 1.2151, "step": 3830 }, { "epoch": 0.39, "grad_norm": 12.866655427075113, "learning_rate": 9.974631066370685e-06, "loss": 1.2726, "step": 3835 }, { "epoch": 0.39, "grad_norm": 25.841814770895773, "learning_rate": 9.974335129491107e-06, "loss": 1.27, "step": 3840 }, { "epoch": 0.39, "grad_norm": 23.100541426504538, "learning_rate": 9.974037480947558e-06, "loss": 1.2535, "step": 3845 }, { "epoch": 0.39, "grad_norm": 64.29530761193791, "learning_rate": 9.973738120842465e-06, "loss": 1.2538, "step": 3850 }, { "epoch": 0.39, "grad_norm": 41.529956070053096, "learning_rate": 9.973437049278833e-06, "loss": 1.2561, "step": 3855 }, { "epoch": 0.39, "grad_norm": 9.71277426315383, "learning_rate": 9.973134266360265e-06, "loss": 1.2695, "step": 3860 }, { "epoch": 0.39, "grad_norm": 8.312591268093923, "learning_rate": 9.972829772190944e-06, "loss": 1.236, "step": 3865 }, { "epoch": 0.39, "grad_norm": 7.681689247726292, "learning_rate": 9.972523566875649e-06, "loss": 1.2536, "step": 3870 }, { "epoch": 0.39, "grad_norm": 26.235793994940313, "learning_rate": 9.972215650519743e-06, "loss": 1.3028, "step": 3875 }, { "epoch": 0.39, "grad_norm": 15.132056462810064, "learning_rate": 9.97190602322918e-06, "loss": 1.1961, "step": 3880 }, { "epoch": 0.39, "grad_norm": 32.44650553471024, "learning_rate": 9.971594685110507e-06, "loss": 1.3053, "step": 3885 }, { "epoch": 0.39, "grad_norm": 16.110785173854836, "learning_rate": 9.971281636270847e-06, "loss": 1.2593, "step": 3890 }, { "epoch": 0.39, "grad_norm": 10.444171464337078, "learning_rate": 9.970966876817928e-06, "loss": 1.2561, "step": 3895 }, { "epoch": 0.39, "grad_norm": 25.17838705462199, "learning_rate": 9.970650406860051e-06, "loss": 1.2029, "step": 3900 }, { "epoch": 0.39, "grad_norm": 7.115984906641157, "learning_rate": 9.970332226506118e-06, "loss": 1.2493, "step": 3905 }, { "epoch": 0.39, "grad_norm": 19.70047927862016, "learning_rate": 9.970012335865612e-06, "loss": 1.1869, "step": 3910 }, { "epoch": 0.39, "grad_norm": 18.038800651626424, "learning_rate": 9.96969073504861e-06, "loss": 1.2424, "step": 3915 }, { "epoch": 0.4, "grad_norm": 8.15861746144962, "learning_rate": 9.96936742416577e-06, "loss": 1.1838, "step": 3920 }, { "epoch": 0.4, "grad_norm": 12.484113761097328, "learning_rate": 9.969042403328348e-06, "loss": 1.2171, "step": 3925 }, { "epoch": 0.4, "grad_norm": 8.110356553423042, "learning_rate": 9.96871567264818e-06, "loss": 1.2518, "step": 3930 }, { "epoch": 0.4, "grad_norm": 17.73363097364326, "learning_rate": 9.968387232237695e-06, "loss": 1.2789, "step": 3935 }, { "epoch": 0.4, "grad_norm": 27.686523594105953, "learning_rate": 9.968057082209909e-06, "loss": 1.244, "step": 3940 }, { "epoch": 0.4, "grad_norm": 11.820108672992458, "learning_rate": 9.967725222678426e-06, "loss": 1.2913, "step": 3945 }, { "epoch": 0.4, "grad_norm": 19.67650247073177, "learning_rate": 9.967391653757438e-06, "loss": 1.2102, "step": 3950 }, { "epoch": 0.4, "grad_norm": 18.73171602233947, "learning_rate": 9.967056375561726e-06, "loss": 1.1982, "step": 3955 }, { "epoch": 0.4, "grad_norm": 24.675528714132533, "learning_rate": 9.966719388206661e-06, "loss": 1.2475, "step": 3960 }, { "epoch": 0.4, "grad_norm": 45.25930029609521, "learning_rate": 9.966380691808197e-06, "loss": 1.2594, "step": 3965 }, { "epoch": 0.4, "grad_norm": 87.67463131497611, "learning_rate": 9.96604028648288e-06, "loss": 1.2446, "step": 3970 }, { "epoch": 0.4, "grad_norm": 7.474598522395306, "learning_rate": 9.965698172347843e-06, "loss": 1.2846, "step": 3975 }, { "epoch": 0.4, "grad_norm": 56.77411416314017, "learning_rate": 9.96535434952081e-06, "loss": 1.252, "step": 3980 }, { "epoch": 0.4, "grad_norm": 46.8997659190866, "learning_rate": 9.965008818120088e-06, "loss": 1.2369, "step": 3985 }, { "epoch": 0.4, "grad_norm": 27.77528218311658, "learning_rate": 9.964661578264571e-06, "loss": 1.2456, "step": 3990 }, { "epoch": 0.4, "grad_norm": 18.546895946518553, "learning_rate": 9.964312630073749e-06, "loss": 1.2298, "step": 3995 }, { "epoch": 0.4, "grad_norm": 9.279947297055188, "learning_rate": 9.963961973667691e-06, "loss": 1.1957, "step": 4000 }, { "epoch": 0.4, "grad_norm": 51.66062157374361, "learning_rate": 9.96360960916706e-06, "loss": 1.2408, "step": 4005 }, { "epoch": 0.4, "grad_norm": 38.003334883771586, "learning_rate": 9.963255536693103e-06, "loss": 1.2566, "step": 4010 }, { "epoch": 0.4, "grad_norm": 36.49216818644186, "learning_rate": 9.962899756367657e-06, "loss": 1.2388, "step": 4015 }, { "epoch": 0.41, "grad_norm": 37.691359368487895, "learning_rate": 9.962542268313144e-06, "loss": 1.2522, "step": 4020 }, { "epoch": 0.41, "grad_norm": 13.338664665110976, "learning_rate": 9.962183072652577e-06, "loss": 1.2367, "step": 4025 }, { "epoch": 0.41, "grad_norm": 9.087845748300886, "learning_rate": 9.961822169509552e-06, "loss": 1.2158, "step": 4030 }, { "epoch": 0.41, "grad_norm": 38.570589682996335, "learning_rate": 9.96145955900826e-06, "loss": 1.2555, "step": 4035 }, { "epoch": 0.41, "grad_norm": 27.693672064187652, "learning_rate": 9.96109524127347e-06, "loss": 1.206, "step": 4040 }, { "epoch": 0.41, "grad_norm": 21.133340119549086, "learning_rate": 9.960729216430549e-06, "loss": 1.269, "step": 4045 }, { "epoch": 0.41, "grad_norm": 47.127690639583996, "learning_rate": 9.96036148460544e-06, "loss": 1.255, "step": 4050 }, { "epoch": 0.41, "grad_norm": 13.301073562064612, "learning_rate": 9.959992045924682e-06, "loss": 1.2739, "step": 4055 }, { "epoch": 0.41, "grad_norm": 11.609836658516967, "learning_rate": 9.9596209005154e-06, "loss": 1.3207, "step": 4060 }, { "epoch": 0.41, "grad_norm": 33.995550626776534, "learning_rate": 9.959248048505304e-06, "loss": 1.2085, "step": 4065 }, { "epoch": 0.41, "grad_norm": 59.88588971982497, "learning_rate": 9.958873490022688e-06, "loss": 1.2102, "step": 4070 }, { "epoch": 0.41, "grad_norm": 30.261967435183816, "learning_rate": 9.958497225196441e-06, "loss": 1.2009, "step": 4075 }, { "epoch": 0.41, "grad_norm": 20.236600833942322, "learning_rate": 9.958119254156036e-06, "loss": 1.2641, "step": 4080 }, { "epoch": 0.41, "grad_norm": 9.097576830169164, "learning_rate": 9.95773957703153e-06, "loss": 1.2568, "step": 4085 }, { "epoch": 0.41, "grad_norm": 28.322236699356623, "learning_rate": 9.957358193953573e-06, "loss": 1.2436, "step": 4090 }, { "epoch": 0.41, "grad_norm": 12.21633921097602, "learning_rate": 9.956975105053395e-06, "loss": 1.2931, "step": 4095 }, { "epoch": 0.41, "grad_norm": 10.400387277003981, "learning_rate": 9.956590310462817e-06, "loss": 1.2489, "step": 4100 }, { "epoch": 0.41, "grad_norm": 8.919282255738858, "learning_rate": 9.956203810314248e-06, "loss": 1.2455, "step": 4105 }, { "epoch": 0.41, "grad_norm": 11.852815217381016, "learning_rate": 9.955815604740682e-06, "loss": 1.2237, "step": 4110 }, { "epoch": 0.41, "grad_norm": 9.280350390136551, "learning_rate": 9.955425693875699e-06, "loss": 1.2531, "step": 4115 }, { "epoch": 0.42, "grad_norm": 7.206007829007667, "learning_rate": 9.955034077853466e-06, "loss": 1.1968, "step": 4120 }, { "epoch": 0.42, "grad_norm": 8.565179622370264, "learning_rate": 9.954640756808743e-06, "loss": 1.2529, "step": 4125 }, { "epoch": 0.42, "grad_norm": 17.6317617587089, "learning_rate": 9.954245730876866e-06, "loss": 1.2129, "step": 4130 }, { "epoch": 0.42, "grad_norm": 18.47417695536017, "learning_rate": 9.953849000193764e-06, "loss": 1.233, "step": 4135 }, { "epoch": 0.42, "grad_norm": 23.358928325643685, "learning_rate": 9.953450564895955e-06, "loss": 1.2425, "step": 4140 }, { "epoch": 0.42, "grad_norm": 38.85271406815992, "learning_rate": 9.953050425120537e-06, "loss": 1.2653, "step": 4145 }, { "epoch": 0.42, "grad_norm": 26.5549459705775, "learning_rate": 9.952648581005196e-06, "loss": 1.2167, "step": 4150 }, { "epoch": 0.42, "grad_norm": 7.9806651207861155, "learning_rate": 9.95224503268821e-06, "loss": 1.2159, "step": 4155 }, { "epoch": 0.42, "grad_norm": 9.204088379198158, "learning_rate": 9.951839780308439e-06, "loss": 1.1693, "step": 4160 }, { "epoch": 0.42, "grad_norm": 34.843347127473734, "learning_rate": 9.951432824005328e-06, "loss": 1.2514, "step": 4165 }, { "epoch": 0.42, "grad_norm": 11.431225825049113, "learning_rate": 9.951024163918913e-06, "loss": 1.2578, "step": 4170 }, { "epoch": 0.42, "grad_norm": 11.273926092048018, "learning_rate": 9.95061380018981e-06, "loss": 1.1902, "step": 4175 }, { "epoch": 0.42, "grad_norm": 10.601147365831144, "learning_rate": 9.950201732959228e-06, "loss": 1.2412, "step": 4180 }, { "epoch": 0.42, "grad_norm": 39.646460393382455, "learning_rate": 9.949787962368957e-06, "loss": 1.2466, "step": 4185 }, { "epoch": 0.42, "grad_norm": 23.64410488897537, "learning_rate": 9.949372488561377e-06, "loss": 1.1957, "step": 4190 }, { "epoch": 0.42, "grad_norm": 10.449811055571177, "learning_rate": 9.94895531167945e-06, "loss": 1.228, "step": 4195 }, { "epoch": 0.42, "grad_norm": 11.967857762143987, "learning_rate": 9.948536431866726e-06, "loss": 1.2249, "step": 4200 }, { "epoch": 0.42, "grad_norm": 16.977410140447304, "learning_rate": 9.948115849267344e-06, "loss": 1.2819, "step": 4205 }, { "epoch": 0.42, "grad_norm": 13.49390147390754, "learning_rate": 9.947693564026025e-06, "loss": 1.252, "step": 4210 }, { "epoch": 0.42, "grad_norm": 14.579440419526714, "learning_rate": 9.947269576288074e-06, "loss": 1.2551, "step": 4215 }, { "epoch": 0.43, "grad_norm": 23.073349590631008, "learning_rate": 9.946843886199387e-06, "loss": 1.2799, "step": 4220 }, { "epoch": 0.43, "grad_norm": 33.080214220661595, "learning_rate": 9.946416493906445e-06, "loss": 1.219, "step": 4225 }, { "epoch": 0.43, "grad_norm": 17.340182315915886, "learning_rate": 9.94598739955631e-06, "loss": 1.2418, "step": 4230 }, { "epoch": 0.43, "grad_norm": 7.732259768036213, "learning_rate": 9.945556603296636e-06, "loss": 1.2616, "step": 4235 }, { "epoch": 0.43, "grad_norm": 12.220849733614314, "learning_rate": 9.945124105275658e-06, "loss": 1.2264, "step": 4240 }, { "epoch": 0.43, "grad_norm": 7.415288492518843, "learning_rate": 9.9446899056422e-06, "loss": 1.2292, "step": 4245 }, { "epoch": 0.43, "grad_norm": 11.51497427087494, "learning_rate": 9.944254004545666e-06, "loss": 1.2158, "step": 4250 }, { "epoch": 0.43, "grad_norm": 45.84960290897499, "learning_rate": 9.943816402136053e-06, "loss": 1.185, "step": 4255 }, { "epoch": 0.43, "grad_norm": 51.5481831854867, "learning_rate": 9.943377098563936e-06, "loss": 1.2144, "step": 4260 }, { "epoch": 0.43, "grad_norm": 26.0885083725824, "learning_rate": 9.942936093980482e-06, "loss": 1.2406, "step": 4265 }, { "epoch": 0.43, "grad_norm": 52.86487745850491, "learning_rate": 9.94249338853744e-06, "loss": 1.2343, "step": 4270 }, { "epoch": 0.43, "grad_norm": 32.085728312012584, "learning_rate": 9.942048982387142e-06, "loss": 1.2485, "step": 4275 }, { "epoch": 0.43, "grad_norm": 10.147507968619474, "learning_rate": 9.94160287568251e-06, "loss": 1.2447, "step": 4280 }, { "epoch": 0.43, "grad_norm": 11.746530392136881, "learning_rate": 9.941155068577049e-06, "loss": 1.2367, "step": 4285 }, { "epoch": 0.43, "grad_norm": 38.852565860595654, "learning_rate": 9.940705561224847e-06, "loss": 1.2278, "step": 4290 }, { "epoch": 0.43, "grad_norm": 51.92516409644444, "learning_rate": 9.940254353780581e-06, "loss": 1.2651, "step": 4295 }, { "epoch": 0.43, "grad_norm": 43.46149206212037, "learning_rate": 9.939801446399511e-06, "loss": 1.2728, "step": 4300 }, { "epoch": 0.43, "grad_norm": 26.44387192644677, "learning_rate": 9.93934683923748e-06, "loss": 1.1896, "step": 4305 }, { "epoch": 0.43, "grad_norm": 37.746036203691254, "learning_rate": 9.93889053245092e-06, "loss": 1.2588, "step": 4310 }, { "epoch": 0.44, "grad_norm": 12.520173237581492, "learning_rate": 9.938432526196844e-06, "loss": 1.1869, "step": 4315 }, { "epoch": 0.44, "grad_norm": 10.467561168011299, "learning_rate": 9.937972820632854e-06, "loss": 1.2904, "step": 4320 }, { "epoch": 0.44, "grad_norm": 18.609641849366643, "learning_rate": 9.937511415917132e-06, "loss": 1.2455, "step": 4325 }, { "epoch": 0.44, "grad_norm": 14.00022249781794, "learning_rate": 9.937048312208448e-06, "loss": 1.2635, "step": 4330 }, { "epoch": 0.44, "grad_norm": 19.548844065392103, "learning_rate": 9.936583509666154e-06, "loss": 1.2521, "step": 4335 }, { "epoch": 0.44, "grad_norm": 10.740443268777065, "learning_rate": 9.93611700845019e-06, "loss": 1.214, "step": 4340 }, { "epoch": 0.44, "grad_norm": 15.354451049628311, "learning_rate": 9.93564880872108e-06, "loss": 1.2474, "step": 4345 }, { "epoch": 0.44, "grad_norm": 13.354353870738024, "learning_rate": 9.935178910639927e-06, "loss": 1.2202, "step": 4350 }, { "epoch": 0.44, "grad_norm": 11.461666361813926, "learning_rate": 9.934707314368424e-06, "loss": 1.1891, "step": 4355 }, { "epoch": 0.44, "grad_norm": 26.26375051175625, "learning_rate": 9.934234020068847e-06, "loss": 1.2721, "step": 4360 }, { "epoch": 0.44, "grad_norm": 19.66123236538432, "learning_rate": 9.933759027904058e-06, "loss": 1.1961, "step": 4365 }, { "epoch": 0.44, "grad_norm": 9.849872593058937, "learning_rate": 9.9332823380375e-06, "loss": 1.191, "step": 4370 }, { "epoch": 0.44, "grad_norm": 10.52585365597777, "learning_rate": 9.932803950633199e-06, "loss": 1.2106, "step": 4375 }, { "epoch": 0.44, "grad_norm": 23.505146587479103, "learning_rate": 9.93232386585577e-06, "loss": 1.2883, "step": 4380 }, { "epoch": 0.44, "grad_norm": 20.704178651242195, "learning_rate": 9.931842083870413e-06, "loss": 1.2569, "step": 4385 }, { "epoch": 0.44, "grad_norm": 21.662070835592704, "learning_rate": 9.931358604842902e-06, "loss": 1.2289, "step": 4390 }, { "epoch": 0.44, "grad_norm": 13.059661247937026, "learning_rate": 9.930873428939607e-06, "loss": 1.2457, "step": 4395 }, { "epoch": 0.44, "grad_norm": 12.51351005431564, "learning_rate": 9.930386556327475e-06, "loss": 1.2639, "step": 4400 }, { "epoch": 0.44, "grad_norm": 25.232301350217114, "learning_rate": 9.929897987174038e-06, "loss": 1.1957, "step": 4405 }, { "epoch": 0.44, "grad_norm": 15.350420590665204, "learning_rate": 9.92940772164741e-06, "loss": 1.2223, "step": 4410 }, { "epoch": 0.45, "grad_norm": 11.311765119921033, "learning_rate": 9.928915759916295e-06, "loss": 1.2363, "step": 4415 }, { "epoch": 0.45, "grad_norm": 8.127480756243877, "learning_rate": 9.928422102149974e-06, "loss": 1.2429, "step": 4420 }, { "epoch": 0.45, "grad_norm": 7.699872791778428, "learning_rate": 9.927926748518316e-06, "loss": 1.2488, "step": 4425 }, { "epoch": 0.45, "grad_norm": 11.510205983564731, "learning_rate": 9.927429699191768e-06, "loss": 1.2004, "step": 4430 }, { "epoch": 0.45, "grad_norm": 31.498092704192338, "learning_rate": 9.926930954341368e-06, "loss": 1.2013, "step": 4435 }, { "epoch": 0.45, "grad_norm": 23.41388595460819, "learning_rate": 9.926430514138734e-06, "loss": 1.2054, "step": 4440 }, { "epoch": 0.45, "grad_norm": 33.179614627446334, "learning_rate": 9.925928378756064e-06, "loss": 1.2418, "step": 4445 }, { "epoch": 0.45, "grad_norm": 15.854169811315563, "learning_rate": 9.925424548366142e-06, "loss": 1.2694, "step": 4450 }, { "epoch": 0.45, "grad_norm": 22.130365073256336, "learning_rate": 9.92491902314234e-06, "loss": 1.2199, "step": 4455 }, { "epoch": 0.45, "grad_norm": 26.12787060881701, "learning_rate": 9.924411803258604e-06, "loss": 1.2743, "step": 4460 }, { "epoch": 0.45, "grad_norm": 44.41777719215755, "learning_rate": 9.923902888889472e-06, "loss": 1.2235, "step": 4465 }, { "epoch": 0.45, "grad_norm": 68.4105816881735, "learning_rate": 9.923392280210056e-06, "loss": 1.2475, "step": 4470 }, { "epoch": 0.45, "grad_norm": 65.17654692737699, "learning_rate": 9.922879977396062e-06, "loss": 1.2378, "step": 4475 }, { "epoch": 0.45, "grad_norm": 15.007706083704576, "learning_rate": 9.922365980623768e-06, "loss": 1.3025, "step": 4480 }, { "epoch": 0.45, "grad_norm": 33.37928601370577, "learning_rate": 9.92185029007004e-06, "loss": 1.2351, "step": 4485 }, { "epoch": 0.45, "grad_norm": 49.204195129373645, "learning_rate": 9.92133290591233e-06, "loss": 1.2301, "step": 4490 }, { "epoch": 0.45, "grad_norm": 20.865406533566258, "learning_rate": 9.920813828328668e-06, "loss": 1.2425, "step": 4495 }, { "epoch": 0.45, "grad_norm": 9.484545945850263, "learning_rate": 9.920293057497665e-06, "loss": 1.2036, "step": 4500 }, { "epoch": 0.45, "grad_norm": 10.112272089623518, "learning_rate": 9.919770593598523e-06, "loss": 1.1751, "step": 4505 }, { "epoch": 0.45, "grad_norm": 16.091124986461764, "learning_rate": 9.919246436811017e-06, "loss": 1.1866, "step": 4510 }, { "epoch": 0.46, "grad_norm": 10.14013552151064, "learning_rate": 9.918720587315512e-06, "loss": 1.2246, "step": 4515 }, { "epoch": 0.46, "grad_norm": 15.0306005576408, "learning_rate": 9.918193045292949e-06, "loss": 1.2514, "step": 4520 }, { "epoch": 0.46, "grad_norm": 8.245379854317187, "learning_rate": 9.917663810924858e-06, "loss": 1.1635, "step": 4525 }, { "epoch": 0.46, "grad_norm": 20.92733969341995, "learning_rate": 9.917132884393346e-06, "loss": 1.2316, "step": 4530 }, { "epoch": 0.46, "grad_norm": 8.617987539724332, "learning_rate": 9.916600265881104e-06, "loss": 1.2483, "step": 4535 }, { "epoch": 0.46, "grad_norm": 7.6733268929963545, "learning_rate": 9.916065955571408e-06, "loss": 1.2179, "step": 4540 }, { "epoch": 0.46, "grad_norm": 15.242155444345359, "learning_rate": 9.915529953648111e-06, "loss": 1.2364, "step": 4545 }, { "epoch": 0.46, "grad_norm": 7.546232213228472, "learning_rate": 9.914992260295653e-06, "loss": 1.2006, "step": 4550 }, { "epoch": 0.46, "grad_norm": 8.412672944813984, "learning_rate": 9.914452875699053e-06, "loss": 1.2184, "step": 4555 }, { "epoch": 0.46, "grad_norm": 9.453619543099387, "learning_rate": 9.91391180004391e-06, "loss": 1.2225, "step": 4560 }, { "epoch": 0.46, "grad_norm": 15.707388871143154, "learning_rate": 9.913369033516412e-06, "loss": 1.2034, "step": 4565 }, { "epoch": 0.46, "grad_norm": 16.46315709092553, "learning_rate": 9.912824576303321e-06, "loss": 1.2387, "step": 4570 }, { "epoch": 0.46, "grad_norm": 10.270494960714121, "learning_rate": 9.912278428591986e-06, "loss": 1.2646, "step": 4575 }, { "epoch": 0.46, "grad_norm": 11.417488967738182, "learning_rate": 9.911730590570335e-06, "loss": 1.2333, "step": 4580 }, { "epoch": 0.46, "grad_norm": 28.781749944510707, "learning_rate": 9.91118106242688e-06, "loss": 1.2128, "step": 4585 }, { "epoch": 0.46, "grad_norm": 33.17270676630123, "learning_rate": 9.910629844350711e-06, "loss": 1.2591, "step": 4590 }, { "epoch": 0.46, "grad_norm": 27.21735457760835, "learning_rate": 9.910076936531503e-06, "loss": 1.1947, "step": 4595 }, { "epoch": 0.46, "grad_norm": 44.91419841319632, "learning_rate": 9.909522339159511e-06, "loss": 1.2802, "step": 4600 }, { "epoch": 0.46, "grad_norm": 21.381704300692164, "learning_rate": 9.908966052425573e-06, "loss": 1.2412, "step": 4605 }, { "epoch": 0.46, "grad_norm": 133.4976979325375, "learning_rate": 9.908408076521104e-06, "loss": 1.2974, "step": 4610 }, { "epoch": 0.47, "grad_norm": 104.40804477999271, "learning_rate": 9.907848411638102e-06, "loss": 1.2562, "step": 4615 }, { "epoch": 0.47, "grad_norm": 43.1718486242428, "learning_rate": 9.90728705796915e-06, "loss": 1.2858, "step": 4620 }, { "epoch": 0.47, "grad_norm": 10.430405281503546, "learning_rate": 9.90672401570741e-06, "loss": 1.2349, "step": 4625 }, { "epoch": 0.47, "grad_norm": 28.408859457680723, "learning_rate": 9.906159285046622e-06, "loss": 1.2663, "step": 4630 }, { "epoch": 0.47, "grad_norm": 35.80208086924776, "learning_rate": 9.905592866181108e-06, "loss": 1.2238, "step": 4635 }, { "epoch": 0.47, "grad_norm": 55.64747717171744, "learning_rate": 9.905024759305777e-06, "loss": 1.2932, "step": 4640 }, { "epoch": 0.47, "grad_norm": 25.653632126635838, "learning_rate": 9.904454964616108e-06, "loss": 1.2248, "step": 4645 }, { "epoch": 0.47, "grad_norm": 89.7506396373891, "learning_rate": 9.903883482308172e-06, "loss": 1.2288, "step": 4650 }, { "epoch": 0.47, "grad_norm": 84.92599413538284, "learning_rate": 9.903310312578613e-06, "loss": 1.2751, "step": 4655 }, { "epoch": 0.47, "grad_norm": 40.33396768812129, "learning_rate": 9.902735455624658e-06, "loss": 1.2513, "step": 4660 }, { "epoch": 0.47, "grad_norm": 44.693246410015014, "learning_rate": 9.902158911644117e-06, "loss": 1.222, "step": 4665 }, { "epoch": 0.47, "grad_norm": 36.40728587256452, "learning_rate": 9.901580680835377e-06, "loss": 1.247, "step": 4670 }, { "epoch": 0.47, "grad_norm": 11.767226773863197, "learning_rate": 9.901000763397404e-06, "loss": 1.2207, "step": 4675 }, { "epoch": 0.47, "grad_norm": 33.84826992392133, "learning_rate": 9.900419159529751e-06, "loss": 1.2387, "step": 4680 }, { "epoch": 0.47, "grad_norm": 21.084937111619066, "learning_rate": 9.899835869432544e-06, "loss": 1.2599, "step": 4685 }, { "epoch": 0.47, "grad_norm": 13.903378794691731, "learning_rate": 9.899250893306496e-06, "loss": 1.2094, "step": 4690 }, { "epoch": 0.47, "grad_norm": 12.651747205568551, "learning_rate": 9.898664231352894e-06, "loss": 1.278, "step": 4695 }, { "epoch": 0.47, "grad_norm": 18.91091257408942, "learning_rate": 9.898075883773609e-06, "loss": 1.2486, "step": 4700 }, { "epoch": 0.47, "grad_norm": 15.666073931133331, "learning_rate": 9.897485850771092e-06, "loss": 1.2789, "step": 4705 }, { "epoch": 0.47, "grad_norm": 20.233336917533183, "learning_rate": 9.89689413254837e-06, "loss": 1.2588, "step": 4710 }, { "epoch": 0.48, "grad_norm": 48.842048881189655, "learning_rate": 9.896300729309051e-06, "loss": 1.1803, "step": 4715 }, { "epoch": 0.48, "grad_norm": 48.32400165414345, "learning_rate": 9.895705641257332e-06, "loss": 1.2072, "step": 4720 }, { "epoch": 0.48, "grad_norm": 15.576826950538285, "learning_rate": 9.895108868597976e-06, "loss": 1.2672, "step": 4725 }, { "epoch": 0.48, "grad_norm": 57.056727168245324, "learning_rate": 9.894510411536335e-06, "loss": 1.2345, "step": 4730 }, { "epoch": 0.48, "grad_norm": 45.82864349568885, "learning_rate": 9.893910270278335e-06, "loss": 1.2594, "step": 4735 }, { "epoch": 0.48, "grad_norm": 53.472664284359006, "learning_rate": 9.893308445030485e-06, "loss": 1.2658, "step": 4740 }, { "epoch": 0.48, "grad_norm": 51.09707449624854, "learning_rate": 9.892704935999872e-06, "loss": 1.2381, "step": 4745 }, { "epoch": 0.48, "grad_norm": 52.06497551634407, "learning_rate": 9.892099743394165e-06, "loss": 1.2305, "step": 4750 }, { "epoch": 0.48, "grad_norm": 19.917155144998816, "learning_rate": 9.89149286742161e-06, "loss": 1.248, "step": 4755 }, { "epoch": 0.48, "grad_norm": 33.93858162026787, "learning_rate": 9.89088430829103e-06, "loss": 1.1996, "step": 4760 }, { "epoch": 0.48, "grad_norm": 24.056306625196044, "learning_rate": 9.890274066211828e-06, "loss": 1.2323, "step": 4765 }, { "epoch": 0.48, "grad_norm": 36.916877403006154, "learning_rate": 9.889662141393994e-06, "loss": 1.2677, "step": 4770 }, { "epoch": 0.48, "grad_norm": 49.26865152084359, "learning_rate": 9.889048534048088e-06, "loss": 1.2934, "step": 4775 }, { "epoch": 0.48, "grad_norm": 36.128655809311724, "learning_rate": 9.888433244385248e-06, "loss": 1.2324, "step": 4780 }, { "epoch": 0.48, "grad_norm": 33.15784894308101, "learning_rate": 9.8878162726172e-06, "loss": 1.2033, "step": 4785 }, { "epoch": 0.48, "grad_norm": 9.0821031794577, "learning_rate": 9.887197618956239e-06, "loss": 1.2938, "step": 4790 }, { "epoch": 0.48, "grad_norm": 18.63229833800865, "learning_rate": 9.886577283615247e-06, "loss": 1.2084, "step": 4795 }, { "epoch": 0.48, "grad_norm": 34.363919504880776, "learning_rate": 9.885955266807677e-06, "loss": 1.2019, "step": 4800 }, { "epoch": 0.48, "grad_norm": 23.32077401063675, "learning_rate": 9.885331568747569e-06, "loss": 1.2544, "step": 4805 }, { "epoch": 0.48, "grad_norm": 27.802670446738542, "learning_rate": 9.884706189649532e-06, "loss": 1.1967, "step": 4810 }, { "epoch": 0.49, "grad_norm": 8.71809098143742, "learning_rate": 9.88407912972876e-06, "loss": 1.2215, "step": 4815 }, { "epoch": 0.49, "grad_norm": 14.621038663696202, "learning_rate": 9.883450389201025e-06, "loss": 1.2621, "step": 4820 }, { "epoch": 0.49, "grad_norm": 34.66887135697982, "learning_rate": 9.882819968282678e-06, "loss": 1.2593, "step": 4825 }, { "epoch": 0.49, "grad_norm": 18.02489044655485, "learning_rate": 9.88218786719064e-06, "loss": 1.2477, "step": 4830 }, { "epoch": 0.49, "grad_norm": 11.21532524157301, "learning_rate": 9.881554086142422e-06, "loss": 1.278, "step": 4835 }, { "epoch": 0.49, "grad_norm": 39.71229395992522, "learning_rate": 9.880918625356103e-06, "loss": 1.2432, "step": 4840 }, { "epoch": 0.49, "grad_norm": 38.8748764564693, "learning_rate": 9.88028148505035e-06, "loss": 1.24, "step": 4845 }, { "epoch": 0.49, "grad_norm": 19.839325112997184, "learning_rate": 9.879642665444397e-06, "loss": 1.2117, "step": 4850 }, { "epoch": 0.49, "grad_norm": 79.07900220844354, "learning_rate": 9.879002166758065e-06, "loss": 1.2215, "step": 4855 }, { "epoch": 0.49, "grad_norm": 23.67166694685957, "learning_rate": 9.878359989211747e-06, "loss": 1.2823, "step": 4860 }, { "epoch": 0.49, "grad_norm": 11.273654423274305, "learning_rate": 9.877716133026415e-06, "loss": 1.3155, "step": 4865 }, { "epoch": 0.49, "grad_norm": 8.417875140663172, "learning_rate": 9.87707059842362e-06, "loss": 1.187, "step": 4870 }, { "epoch": 0.49, "grad_norm": 9.901533696869453, "learning_rate": 9.876423385625492e-06, "loss": 1.2881, "step": 4875 }, { "epoch": 0.49, "grad_norm": 8.667402310523359, "learning_rate": 9.875774494854734e-06, "loss": 1.263, "step": 4880 }, { "epoch": 0.49, "grad_norm": 8.67718694052718, "learning_rate": 9.875123926334629e-06, "loss": 1.2246, "step": 4885 }, { "epoch": 0.49, "grad_norm": 18.615445503902517, "learning_rate": 9.874471680289037e-06, "loss": 1.2496, "step": 4890 }, { "epoch": 0.49, "grad_norm": 10.202333722795672, "learning_rate": 9.873817756942396e-06, "loss": 1.2405, "step": 4895 }, { "epoch": 0.49, "grad_norm": 9.941122551177786, "learning_rate": 9.873162156519718e-06, "loss": 1.2547, "step": 4900 }, { "epoch": 0.49, "grad_norm": 8.333362690140278, "learning_rate": 9.872504879246598e-06, "loss": 1.257, "step": 4905 }, { "epoch": 0.5, "grad_norm": 22.836006708807865, "learning_rate": 9.871845925349204e-06, "loss": 1.2134, "step": 4910 }, { "epoch": 0.5, "grad_norm": 29.536939423968438, "learning_rate": 9.871185295054278e-06, "loss": 1.217, "step": 4915 }, { "epoch": 0.5, "grad_norm": 25.05820903087772, "learning_rate": 9.870522988589146e-06, "loss": 1.2439, "step": 4920 }, { "epoch": 0.5, "grad_norm": 23.98113010207636, "learning_rate": 9.869859006181705e-06, "loss": 1.1887, "step": 4925 }, { "epoch": 0.5, "grad_norm": 23.53745644035191, "learning_rate": 9.86919334806043e-06, "loss": 1.233, "step": 4930 }, { "epoch": 0.5, "grad_norm": 15.311372888712697, "learning_rate": 9.868526014454377e-06, "loss": 1.2674, "step": 4935 }, { "epoch": 0.5, "grad_norm": 12.363781317584587, "learning_rate": 9.867857005593172e-06, "loss": 1.2111, "step": 4940 }, { "epoch": 0.5, "grad_norm": 12.179319863792337, "learning_rate": 9.86718632170702e-06, "loss": 1.2123, "step": 4945 }, { "epoch": 0.5, "grad_norm": 6.505244786041763, "learning_rate": 9.866513963026703e-06, "loss": 1.2404, "step": 4950 }, { "epoch": 0.5, "grad_norm": 6.854134017392649, "learning_rate": 9.865839929783578e-06, "loss": 1.2601, "step": 4955 }, { "epoch": 0.5, "grad_norm": 9.191892520579293, "learning_rate": 9.865164222209583e-06, "loss": 1.2635, "step": 4960 }, { "epoch": 0.5, "grad_norm": 6.751273585672897, "learning_rate": 9.864486840537225e-06, "loss": 1.1951, "step": 4965 }, { "epoch": 0.5, "grad_norm": 44.06145173052812, "learning_rate": 9.863807784999591e-06, "loss": 1.2202, "step": 4970 }, { "epoch": 0.5, "grad_norm": 11.736756270544875, "learning_rate": 9.863127055830343e-06, "loss": 1.2326, "step": 4975 }, { "epoch": 0.5, "grad_norm": 11.145462317727578, "learning_rate": 9.862444653263723e-06, "loss": 1.2715, "step": 4980 }, { "epoch": 0.5, "grad_norm": 14.699736433964649, "learning_rate": 9.861760577534538e-06, "loss": 1.2768, "step": 4985 }, { "epoch": 0.5, "grad_norm": 8.020798950138495, "learning_rate": 9.861074828878184e-06, "loss": 1.254, "step": 4990 }, { "epoch": 0.5, "grad_norm": 20.66731748752754, "learning_rate": 9.860387407530625e-06, "loss": 1.2259, "step": 4995 }, { "epoch": 0.5, "grad_norm": 9.659673761001347, "learning_rate": 9.859698313728399e-06, "loss": 1.2653, "step": 5000 }, { "epoch": 0.5, "grad_norm": 10.695606595819575, "learning_rate": 9.859007547708626e-06, "loss": 1.2337, "step": 5005 }, { "epoch": 0.51, "grad_norm": 11.328135911399114, "learning_rate": 9.858315109708998e-06, "loss": 1.2404, "step": 5010 }, { "epoch": 0.51, "grad_norm": 15.11731390695412, "learning_rate": 9.857620999967778e-06, "loss": 1.2627, "step": 5015 }, { "epoch": 0.51, "grad_norm": 20.37594952930069, "learning_rate": 9.856925218723814e-06, "loss": 1.2325, "step": 5020 }, { "epoch": 0.51, "grad_norm": 23.392796017990797, "learning_rate": 9.85622776621652e-06, "loss": 1.2049, "step": 5025 }, { "epoch": 0.51, "grad_norm": 46.26122007063707, "learning_rate": 9.85552864268589e-06, "loss": 1.2259, "step": 5030 }, { "epoch": 0.51, "grad_norm": 14.075205946208943, "learning_rate": 9.854827848372492e-06, "loss": 1.2915, "step": 5035 }, { "epoch": 0.51, "grad_norm": 8.041877762390996, "learning_rate": 9.854125383517468e-06, "loss": 1.2339, "step": 5040 }, { "epoch": 0.51, "grad_norm": 18.90087269271391, "learning_rate": 9.853421248362536e-06, "loss": 1.2436, "step": 5045 }, { "epoch": 0.51, "grad_norm": 10.086319639257932, "learning_rate": 9.852715443149988e-06, "loss": 1.2494, "step": 5050 }, { "epoch": 0.51, "grad_norm": 14.108264527082493, "learning_rate": 9.852007968122691e-06, "loss": 1.213, "step": 5055 }, { "epoch": 0.51, "grad_norm": 6.833632396667184, "learning_rate": 9.851298823524086e-06, "loss": 1.2191, "step": 5060 }, { "epoch": 0.51, "grad_norm": 16.616780902137627, "learning_rate": 9.85058800959819e-06, "loss": 1.2347, "step": 5065 }, { "epoch": 0.51, "grad_norm": 53.27961975081988, "learning_rate": 9.849875526589592e-06, "loss": 1.2327, "step": 5070 }, { "epoch": 0.51, "grad_norm": 9.000337199179235, "learning_rate": 9.849161374743456e-06, "loss": 1.2462, "step": 5075 }, { "epoch": 0.51, "grad_norm": 12.697995505646016, "learning_rate": 9.848445554305525e-06, "loss": 1.255, "step": 5080 }, { "epoch": 0.51, "grad_norm": 10.537173940270334, "learning_rate": 9.847728065522109e-06, "loss": 1.2691, "step": 5085 }, { "epoch": 0.51, "grad_norm": 8.881764103267859, "learning_rate": 9.847008908640096e-06, "loss": 1.2048, "step": 5090 }, { "epoch": 0.51, "grad_norm": 10.444448898642385, "learning_rate": 9.846288083906945e-06, "loss": 1.2537, "step": 5095 }, { "epoch": 0.51, "grad_norm": 23.459583080559593, "learning_rate": 9.845565591570696e-06, "loss": 1.2382, "step": 5100 }, { "epoch": 0.51, "grad_norm": 12.53518476827716, "learning_rate": 9.844841431879953e-06, "loss": 1.2616, "step": 5105 }, { "epoch": 0.52, "grad_norm": 7.742081524029113, "learning_rate": 9.8441156050839e-06, "loss": 1.2426, "step": 5110 }, { "epoch": 0.52, "grad_norm": 7.190738080721399, "learning_rate": 9.843388111432295e-06, "loss": 1.2427, "step": 5115 }, { "epoch": 0.52, "grad_norm": 16.59878231203815, "learning_rate": 9.842658951175468e-06, "loss": 1.2566, "step": 5120 }, { "epoch": 0.52, "grad_norm": 8.645578107701345, "learning_rate": 9.84192812456432e-06, "loss": 1.1824, "step": 5125 }, { "epoch": 0.52, "grad_norm": 8.559822039639393, "learning_rate": 9.841195631850329e-06, "loss": 1.2168, "step": 5130 }, { "epoch": 0.52, "grad_norm": 9.02789966316085, "learning_rate": 9.840461473285545e-06, "loss": 1.2199, "step": 5135 }, { "epoch": 0.52, "grad_norm": 7.593998708996688, "learning_rate": 9.83972564912259e-06, "loss": 1.2275, "step": 5140 }, { "epoch": 0.52, "grad_norm": 15.418060402702537, "learning_rate": 9.838988159614662e-06, "loss": 1.2532, "step": 5145 }, { "epoch": 0.52, "grad_norm": 7.902976645073543, "learning_rate": 9.838249005015532e-06, "loss": 1.2601, "step": 5150 }, { "epoch": 0.52, "grad_norm": 8.003923073859536, "learning_rate": 9.837508185579539e-06, "loss": 1.2247, "step": 5155 }, { "epoch": 0.52, "grad_norm": 27.50304902900661, "learning_rate": 9.8367657015616e-06, "loss": 1.318, "step": 5160 }, { "epoch": 0.52, "grad_norm": 10.691673740264005, "learning_rate": 9.836021553217204e-06, "loss": 1.2053, "step": 5165 }, { "epoch": 0.52, "grad_norm": 39.807525373760335, "learning_rate": 9.835275740802407e-06, "loss": 1.2445, "step": 5170 }, { "epoch": 0.52, "grad_norm": 54.93907310262677, "learning_rate": 9.834528264573848e-06, "loss": 1.2496, "step": 5175 }, { "epoch": 0.52, "grad_norm": 29.959308679991757, "learning_rate": 9.833779124788732e-06, "loss": 1.213, "step": 5180 }, { "epoch": 0.52, "grad_norm": 30.341779174554887, "learning_rate": 9.833028321704833e-06, "loss": 1.241, "step": 5185 }, { "epoch": 0.52, "grad_norm": 7.11403595375024, "learning_rate": 9.832275855580506e-06, "loss": 1.2206, "step": 5190 }, { "epoch": 0.52, "grad_norm": 8.211458334912457, "learning_rate": 9.831521726674673e-06, "loss": 1.2358, "step": 5195 }, { "epoch": 0.52, "grad_norm": 55.873465004227356, "learning_rate": 9.83076593524683e-06, "loss": 1.2454, "step": 5200 }, { "epoch": 0.52, "grad_norm": 14.95501751361242, "learning_rate": 9.830008481557039e-06, "loss": 1.265, "step": 5205 }, { "epoch": 0.53, "grad_norm": 26.831116982855633, "learning_rate": 9.829249365865944e-06, "loss": 1.1869, "step": 5210 }, { "epoch": 0.53, "grad_norm": 13.272876439024357, "learning_rate": 9.828488588434755e-06, "loss": 1.2621, "step": 5215 }, { "epoch": 0.53, "grad_norm": 14.744067112409168, "learning_rate": 9.827726149525254e-06, "loss": 1.2149, "step": 5220 }, { "epoch": 0.53, "grad_norm": 18.587763110752242, "learning_rate": 9.826962049399797e-06, "loss": 1.1707, "step": 5225 }, { "epoch": 0.53, "grad_norm": 46.80257048909383, "learning_rate": 9.826196288321308e-06, "loss": 1.2159, "step": 5230 }, { "epoch": 0.53, "grad_norm": 30.462258854027745, "learning_rate": 9.825428866553286e-06, "loss": 1.2172, "step": 5235 }, { "epoch": 0.53, "grad_norm": 12.872961826576143, "learning_rate": 9.824659784359801e-06, "loss": 1.2454, "step": 5240 }, { "epoch": 0.53, "grad_norm": 18.107407756464607, "learning_rate": 9.823889042005491e-06, "loss": 1.2975, "step": 5245 }, { "epoch": 0.53, "grad_norm": 32.772802733577855, "learning_rate": 9.82311663975557e-06, "loss": 1.2728, "step": 5250 }, { "epoch": 0.53, "grad_norm": 16.778002093988857, "learning_rate": 9.822342577875818e-06, "loss": 1.2828, "step": 5255 }, { "epoch": 0.53, "grad_norm": 20.179978174318997, "learning_rate": 9.821566856632591e-06, "loss": 1.2224, "step": 5260 }, { "epoch": 0.53, "grad_norm": 14.651830319926578, "learning_rate": 9.820789476292815e-06, "loss": 1.2582, "step": 5265 }, { "epoch": 0.53, "grad_norm": 23.417263418666796, "learning_rate": 9.820010437123985e-06, "loss": 1.2399, "step": 5270 }, { "epoch": 0.53, "grad_norm": 16.117470831131186, "learning_rate": 9.819229739394167e-06, "loss": 1.2411, "step": 5275 }, { "epoch": 0.53, "grad_norm": 28.815770897493955, "learning_rate": 9.818447383371999e-06, "loss": 1.2659, "step": 5280 }, { "epoch": 0.53, "grad_norm": 12.109159903975375, "learning_rate": 9.817663369326687e-06, "loss": 1.2177, "step": 5285 }, { "epoch": 0.53, "grad_norm": 16.63369205165027, "learning_rate": 9.816877697528011e-06, "loss": 1.2485, "step": 5290 }, { "epoch": 0.53, "grad_norm": 11.89167586242028, "learning_rate": 9.81609036824632e-06, "loss": 1.2576, "step": 5295 }, { "epoch": 0.53, "grad_norm": 36.62989419884296, "learning_rate": 9.815301381752535e-06, "loss": 1.2026, "step": 5300 }, { "epoch": 0.53, "grad_norm": 13.802536810868858, "learning_rate": 9.814510738318142e-06, "loss": 1.2953, "step": 5305 }, { "epoch": 0.54, "grad_norm": 8.092988919751438, "learning_rate": 9.813718438215202e-06, "loss": 1.2312, "step": 5310 }, { "epoch": 0.54, "grad_norm": 14.4301077730165, "learning_rate": 9.812924481716347e-06, "loss": 1.2257, "step": 5315 }, { "epoch": 0.54, "grad_norm": 7.0347139548726805, "learning_rate": 9.812128869094773e-06, "loss": 1.2851, "step": 5320 }, { "epoch": 0.54, "grad_norm": 15.166163833725394, "learning_rate": 9.811331600624251e-06, "loss": 1.1624, "step": 5325 }, { "epoch": 0.54, "grad_norm": 9.087763297173032, "learning_rate": 9.810532676579123e-06, "loss": 1.2585, "step": 5330 }, { "epoch": 0.54, "grad_norm": 17.94304547276539, "learning_rate": 9.809732097234295e-06, "loss": 1.2302, "step": 5335 }, { "epoch": 0.54, "grad_norm": 7.76779948855747, "learning_rate": 9.808929862865244e-06, "loss": 1.2467, "step": 5340 }, { "epoch": 0.54, "grad_norm": 27.146992171033162, "learning_rate": 9.808125973748021e-06, "loss": 1.239, "step": 5345 }, { "epoch": 0.54, "grad_norm": 69.52604053781388, "learning_rate": 9.807320430159245e-06, "loss": 1.2823, "step": 5350 }, { "epoch": 0.54, "grad_norm": 22.185999195630405, "learning_rate": 9.806513232376097e-06, "loss": 1.2242, "step": 5355 }, { "epoch": 0.54, "grad_norm": 39.44914138709512, "learning_rate": 9.805704380676338e-06, "loss": 1.2085, "step": 5360 }, { "epoch": 0.54, "grad_norm": 12.255961127537361, "learning_rate": 9.804893875338292e-06, "loss": 1.1902, "step": 5365 }, { "epoch": 0.54, "grad_norm": 31.670780533498856, "learning_rate": 9.804081716640852e-06, "loss": 1.2313, "step": 5370 }, { "epoch": 0.54, "grad_norm": 13.131983864366022, "learning_rate": 9.803267904863483e-06, "loss": 1.215, "step": 5375 }, { "epoch": 0.54, "grad_norm": 10.312170515341023, "learning_rate": 9.802452440286215e-06, "loss": 1.2263, "step": 5380 }, { "epoch": 0.54, "grad_norm": 15.20883072214561, "learning_rate": 9.801635323189648e-06, "loss": 1.3343, "step": 5385 }, { "epoch": 0.54, "grad_norm": 21.616483837568865, "learning_rate": 9.800816553854952e-06, "loss": 1.2645, "step": 5390 }, { "epoch": 0.54, "grad_norm": 8.544994897236405, "learning_rate": 9.799996132563867e-06, "loss": 1.2552, "step": 5395 }, { "epoch": 0.54, "grad_norm": 22.379944292734105, "learning_rate": 9.799174059598697e-06, "loss": 1.2214, "step": 5400 }, { "epoch": 0.54, "grad_norm": 28.820684171774843, "learning_rate": 9.798350335242318e-06, "loss": 1.2612, "step": 5405 }, { "epoch": 0.55, "grad_norm": 17.531294494626554, "learning_rate": 9.797524959778169e-06, "loss": 1.2082, "step": 5410 }, { "epoch": 0.55, "grad_norm": 7.5919105729720595, "learning_rate": 9.796697933490265e-06, "loss": 1.2545, "step": 5415 }, { "epoch": 0.55, "grad_norm": 7.756273626218609, "learning_rate": 9.795869256663183e-06, "loss": 1.2734, "step": 5420 }, { "epoch": 0.55, "grad_norm": 29.31089089420677, "learning_rate": 9.79503892958207e-06, "loss": 1.2371, "step": 5425 }, { "epoch": 0.55, "grad_norm": 19.237086210719905, "learning_rate": 9.79420695253264e-06, "loss": 1.1942, "step": 5430 }, { "epoch": 0.55, "grad_norm": 43.10425467271122, "learning_rate": 9.79337332580118e-06, "loss": 1.2834, "step": 5435 }, { "epoch": 0.55, "grad_norm": 14.207577647989709, "learning_rate": 9.792538049674536e-06, "loss": 1.2432, "step": 5440 }, { "epoch": 0.55, "grad_norm": 15.049823339098804, "learning_rate": 9.791701124440123e-06, "loss": 1.3175, "step": 5445 }, { "epoch": 0.55, "grad_norm": 44.32935270103052, "learning_rate": 9.790862550385933e-06, "loss": 1.2502, "step": 5450 }, { "epoch": 0.55, "grad_norm": 36.82462495372997, "learning_rate": 9.790022327800515e-06, "loss": 1.2129, "step": 5455 }, { "epoch": 0.55, "grad_norm": 26.76749530527562, "learning_rate": 9.789180456972989e-06, "loss": 1.2581, "step": 5460 }, { "epoch": 0.55, "grad_norm": 71.981834353475, "learning_rate": 9.788336938193041e-06, "loss": 1.2926, "step": 5465 }, { "epoch": 0.55, "grad_norm": 69.81416038234114, "learning_rate": 9.787491771750925e-06, "loss": 1.3102, "step": 5470 }, { "epoch": 0.55, "grad_norm": 53.959547812668724, "learning_rate": 9.786644957937466e-06, "loss": 1.3059, "step": 5475 }, { "epoch": 0.55, "grad_norm": 124.99102839739543, "learning_rate": 9.785796497044047e-06, "loss": 1.2209, "step": 5480 }, { "epoch": 0.55, "grad_norm": 35.57188832728497, "learning_rate": 9.784946389362624e-06, "loss": 1.248, "step": 5485 }, { "epoch": 0.55, "grad_norm": 98.88287778453991, "learning_rate": 9.784094635185718e-06, "loss": 1.3008, "step": 5490 }, { "epoch": 0.55, "grad_norm": 120.06635252889072, "learning_rate": 9.783241234806417e-06, "loss": 1.2917, "step": 5495 }, { "epoch": 0.55, "grad_norm": 47.268439703258316, "learning_rate": 9.782386188518378e-06, "loss": 1.3227, "step": 5500 }, { "epoch": 0.56, "grad_norm": 16.03822977666455, "learning_rate": 9.781529496615819e-06, "loss": 1.2413, "step": 5505 }, { "epoch": 0.56, "grad_norm": 17.75968374946111, "learning_rate": 9.780671159393525e-06, "loss": 1.2753, "step": 5510 }, { "epoch": 0.56, "grad_norm": 12.653797529576986, "learning_rate": 9.779811177146854e-06, "loss": 1.1932, "step": 5515 }, { "epoch": 0.56, "grad_norm": 21.42135697910021, "learning_rate": 9.778949550171719e-06, "loss": 1.2838, "step": 5520 }, { "epoch": 0.56, "grad_norm": 19.442380055841458, "learning_rate": 9.77808627876461e-06, "loss": 1.2718, "step": 5525 }, { "epoch": 0.56, "grad_norm": 25.571013974374363, "learning_rate": 9.777221363222576e-06, "loss": 1.2893, "step": 5530 }, { "epoch": 0.56, "grad_norm": 11.107924256021452, "learning_rate": 9.776354803843233e-06, "loss": 1.223, "step": 5535 }, { "epoch": 0.56, "grad_norm": 22.823538370422042, "learning_rate": 9.775486600924765e-06, "loss": 1.205, "step": 5540 }, { "epoch": 0.56, "grad_norm": 10.079573929956554, "learning_rate": 9.774616754765918e-06, "loss": 1.2629, "step": 5545 }, { "epoch": 0.56, "grad_norm": 19.370464473154634, "learning_rate": 9.773745265666006e-06, "loss": 1.2299, "step": 5550 }, { "epoch": 0.56, "grad_norm": 10.160721101979744, "learning_rate": 9.772872133924907e-06, "loss": 1.2204, "step": 5555 }, { "epoch": 0.56, "grad_norm": 24.96434652482283, "learning_rate": 9.771997359843066e-06, "loss": 1.2244, "step": 5560 }, { "epoch": 0.56, "grad_norm": 27.007797934357004, "learning_rate": 9.771120943721492e-06, "loss": 1.2095, "step": 5565 }, { "epoch": 0.56, "grad_norm": 7.687110756133366, "learning_rate": 9.770242885861757e-06, "loss": 1.1947, "step": 5570 }, { "epoch": 0.56, "grad_norm": 7.129469968585911, "learning_rate": 9.769363186566e-06, "loss": 1.2495, "step": 5575 }, { "epoch": 0.56, "grad_norm": 15.770422506749258, "learning_rate": 9.76848184613693e-06, "loss": 1.2225, "step": 5580 }, { "epoch": 0.56, "grad_norm": 22.39907626509249, "learning_rate": 9.767598864877808e-06, "loss": 1.1946, "step": 5585 }, { "epoch": 0.56, "grad_norm": 23.564102543690783, "learning_rate": 9.76671424309247e-06, "loss": 1.2236, "step": 5590 }, { "epoch": 0.56, "grad_norm": 13.498061833519296, "learning_rate": 9.765827981085314e-06, "loss": 1.2342, "step": 5595 }, { "epoch": 0.56, "grad_norm": 14.157141212968256, "learning_rate": 9.764940079161302e-06, "loss": 1.265, "step": 5600 }, { "epoch": 0.57, "grad_norm": 11.234968885987945, "learning_rate": 9.76405053762596e-06, "loss": 1.2341, "step": 5605 }, { "epoch": 0.57, "grad_norm": 10.741718694760706, "learning_rate": 9.76315935678538e-06, "loss": 1.1982, "step": 5610 }, { "epoch": 0.57, "grad_norm": 8.162597237447422, "learning_rate": 9.76226653694621e-06, "loss": 1.2361, "step": 5615 }, { "epoch": 0.57, "grad_norm": 21.02779574214582, "learning_rate": 9.761372078415675e-06, "loss": 1.2287, "step": 5620 }, { "epoch": 0.57, "grad_norm": 18.851378631057123, "learning_rate": 9.760475981501558e-06, "loss": 1.2333, "step": 5625 }, { "epoch": 0.57, "grad_norm": 9.78763285674776, "learning_rate": 9.7595782465122e-06, "loss": 1.2298, "step": 5630 }, { "epoch": 0.57, "grad_norm": 7.69984737716517, "learning_rate": 9.758678873756515e-06, "loss": 1.2666, "step": 5635 }, { "epoch": 0.57, "grad_norm": 11.042149705146223, "learning_rate": 9.757777863543973e-06, "loss": 1.2436, "step": 5640 }, { "epoch": 0.57, "grad_norm": 8.11977096613918, "learning_rate": 9.756875216184614e-06, "loss": 1.2466, "step": 5645 }, { "epoch": 0.57, "grad_norm": 9.664705978775967, "learning_rate": 9.755970931989035e-06, "loss": 1.2387, "step": 5650 }, { "epoch": 0.57, "grad_norm": 13.252407580225176, "learning_rate": 9.755065011268401e-06, "loss": 1.2514, "step": 5655 }, { "epoch": 0.57, "grad_norm": 11.59180227816643, "learning_rate": 9.754157454334439e-06, "loss": 1.2036, "step": 5660 }, { "epoch": 0.57, "grad_norm": 12.266010042715896, "learning_rate": 9.753248261499437e-06, "loss": 1.2018, "step": 5665 }, { "epoch": 0.57, "grad_norm": 19.68909605489335, "learning_rate": 9.752337433076248e-06, "loss": 1.2048, "step": 5670 }, { "epoch": 0.57, "grad_norm": 44.27561117531063, "learning_rate": 9.751424969378286e-06, "loss": 1.2434, "step": 5675 }, { "epoch": 0.57, "grad_norm": 15.319303141711027, "learning_rate": 9.750510870719532e-06, "loss": 1.1879, "step": 5680 }, { "epoch": 0.57, "grad_norm": 25.15578221338594, "learning_rate": 9.749595137414525e-06, "loss": 1.2388, "step": 5685 }, { "epoch": 0.57, "grad_norm": 14.321282031004424, "learning_rate": 9.748677769778368e-06, "loss": 1.2015, "step": 5690 }, { "epoch": 0.57, "grad_norm": 27.291663057795933, "learning_rate": 9.747758768126724e-06, "loss": 1.2821, "step": 5695 }, { "epoch": 0.57, "grad_norm": 8.251920040152498, "learning_rate": 9.746838132775823e-06, "loss": 1.1904, "step": 5700 }, { "epoch": 0.58, "grad_norm": 8.673801034324477, "learning_rate": 9.745915864042455e-06, "loss": 1.212, "step": 5705 }, { "epoch": 0.58, "grad_norm": 8.009051061546915, "learning_rate": 9.744991962243971e-06, "loss": 1.2011, "step": 5710 }, { "epoch": 0.58, "grad_norm": 16.415166435895298, "learning_rate": 9.744066427698285e-06, "loss": 1.2115, "step": 5715 }, { "epoch": 0.58, "grad_norm": 7.864972467275857, "learning_rate": 9.743139260723871e-06, "loss": 1.2226, "step": 5720 }, { "epoch": 0.58, "grad_norm": 9.83282651933836, "learning_rate": 9.74221046163977e-06, "loss": 1.3015, "step": 5725 }, { "epoch": 0.58, "grad_norm": 20.517980595445845, "learning_rate": 9.741280030765576e-06, "loss": 1.2031, "step": 5730 }, { "epoch": 0.58, "grad_norm": 10.334819861708224, "learning_rate": 9.740347968421453e-06, "loss": 1.2512, "step": 5735 }, { "epoch": 0.58, "grad_norm": 19.394363447308308, "learning_rate": 9.739414274928121e-06, "loss": 1.3025, "step": 5740 }, { "epoch": 0.58, "grad_norm": 9.250141257436102, "learning_rate": 9.738478950606864e-06, "loss": 1.2673, "step": 5745 }, { "epoch": 0.58, "grad_norm": 25.409589436587748, "learning_rate": 9.737541995779526e-06, "loss": 1.1938, "step": 5750 }, { "epoch": 0.58, "grad_norm": 8.857813684638622, "learning_rate": 9.736603410768513e-06, "loss": 1.1694, "step": 5755 }, { "epoch": 0.58, "grad_norm": 7.8243029109924285, "learning_rate": 9.735663195896789e-06, "loss": 1.1978, "step": 5760 }, { "epoch": 0.58, "grad_norm": 7.954461871946666, "learning_rate": 9.734721351487881e-06, "loss": 1.2346, "step": 5765 }, { "epoch": 0.58, "grad_norm": 6.316762828559021, "learning_rate": 9.73377787786588e-06, "loss": 1.2735, "step": 5770 }, { "epoch": 0.58, "grad_norm": 13.68701531419754, "learning_rate": 9.732832775355434e-06, "loss": 1.2395, "step": 5775 }, { "epoch": 0.58, "grad_norm": 21.106624359950015, "learning_rate": 9.731886044281748e-06, "loss": 1.2553, "step": 5780 }, { "epoch": 0.58, "grad_norm": 26.419039922771095, "learning_rate": 9.730937684970594e-06, "loss": 1.1864, "step": 5785 }, { "epoch": 0.58, "grad_norm": 14.47606213050339, "learning_rate": 9.729987697748303e-06, "loss": 1.2524, "step": 5790 }, { "epoch": 0.58, "grad_norm": 11.793629060293263, "learning_rate": 9.72903608294176e-06, "loss": 1.2426, "step": 5795 }, { "epoch": 0.58, "grad_norm": 7.5793849168018435, "learning_rate": 9.72808284087842e-06, "loss": 1.2307, "step": 5800 }, { "epoch": 0.59, "grad_norm": 32.128228531886656, "learning_rate": 9.727127971886289e-06, "loss": 1.2282, "step": 5805 }, { "epoch": 0.59, "grad_norm": 9.432836679461422, "learning_rate": 9.726171476293937e-06, "loss": 1.2244, "step": 5810 }, { "epoch": 0.59, "grad_norm": 19.641558757565917, "learning_rate": 9.725213354430496e-06, "loss": 1.2558, "step": 5815 }, { "epoch": 0.59, "grad_norm": 26.33319425625301, "learning_rate": 9.724253606625651e-06, "loss": 1.2333, "step": 5820 }, { "epoch": 0.59, "grad_norm": 51.359947539006086, "learning_rate": 9.723292233209653e-06, "loss": 1.2724, "step": 5825 }, { "epoch": 0.59, "grad_norm": 32.29157944242356, "learning_rate": 9.72232923451331e-06, "loss": 1.2224, "step": 5830 }, { "epoch": 0.59, "grad_norm": 8.222712003517096, "learning_rate": 9.721364610867988e-06, "loss": 1.2052, "step": 5835 }, { "epoch": 0.59, "grad_norm": 12.545359419012799, "learning_rate": 9.72039836260561e-06, "loss": 1.2601, "step": 5840 }, { "epoch": 0.59, "grad_norm": 10.516808421680913, "learning_rate": 9.719430490058666e-06, "loss": 1.2155, "step": 5845 }, { "epoch": 0.59, "grad_norm": 22.072674421278542, "learning_rate": 9.718460993560197e-06, "loss": 1.2381, "step": 5850 }, { "epoch": 0.59, "grad_norm": 26.640128221824423, "learning_rate": 9.717489873443807e-06, "loss": 1.2868, "step": 5855 }, { "epoch": 0.59, "grad_norm": 15.269708168787664, "learning_rate": 9.716517130043658e-06, "loss": 1.2279, "step": 5860 }, { "epoch": 0.59, "grad_norm": 16.005739462293075, "learning_rate": 9.715542763694469e-06, "loss": 1.2376, "step": 5865 }, { "epoch": 0.59, "grad_norm": 12.050689181115803, "learning_rate": 9.714566774731518e-06, "loss": 1.2021, "step": 5870 }, { "epoch": 0.59, "grad_norm": 12.528858851920072, "learning_rate": 9.713589163490645e-06, "loss": 1.2167, "step": 5875 }, { "epoch": 0.59, "grad_norm": 9.533475071584181, "learning_rate": 9.71260993030824e-06, "loss": 1.2132, "step": 5880 }, { "epoch": 0.59, "grad_norm": 11.651551555020754, "learning_rate": 9.71162907552126e-06, "loss": 1.2411, "step": 5885 }, { "epoch": 0.59, "grad_norm": 9.091995680595648, "learning_rate": 9.710646599467215e-06, "loss": 1.3136, "step": 5890 }, { "epoch": 0.59, "grad_norm": 7.703057172358534, "learning_rate": 9.709662502484177e-06, "loss": 1.2139, "step": 5895 }, { "epoch": 0.59, "grad_norm": 7.568655647470476, "learning_rate": 9.708676784910767e-06, "loss": 1.2019, "step": 5900 }, { "epoch": 0.6, "grad_norm": 9.499941306187813, "learning_rate": 9.707689447086174e-06, "loss": 1.2124, "step": 5905 }, { "epoch": 0.6, "grad_norm": 10.596113261143561, "learning_rate": 9.706700489350137e-06, "loss": 1.2569, "step": 5910 }, { "epoch": 0.6, "grad_norm": 8.712435257088414, "learning_rate": 9.705709912042959e-06, "loss": 1.2312, "step": 5915 }, { "epoch": 0.6, "grad_norm": 15.448047445432763, "learning_rate": 9.704717715505494e-06, "loss": 1.2094, "step": 5920 }, { "epoch": 0.6, "grad_norm": 10.309107814440093, "learning_rate": 9.703723900079156e-06, "loss": 1.2249, "step": 5925 }, { "epoch": 0.6, "grad_norm": 7.665381140645497, "learning_rate": 9.702728466105918e-06, "loss": 1.2193, "step": 5930 }, { "epoch": 0.6, "grad_norm": 7.949250005352762, "learning_rate": 9.701731413928305e-06, "loss": 1.2466, "step": 5935 }, { "epoch": 0.6, "grad_norm": 44.720747917861885, "learning_rate": 9.700732743889402e-06, "loss": 1.2538, "step": 5940 }, { "epoch": 0.6, "grad_norm": 37.26154099664765, "learning_rate": 9.699732456332855e-06, "loss": 1.2931, "step": 5945 }, { "epoch": 0.6, "grad_norm": 60.81777030005192, "learning_rate": 9.698730551602857e-06, "loss": 1.229, "step": 5950 }, { "epoch": 0.6, "grad_norm": 32.24160414710352, "learning_rate": 9.697727030044165e-06, "loss": 1.2347, "step": 5955 }, { "epoch": 0.6, "grad_norm": 7.549838623118922, "learning_rate": 9.696721892002086e-06, "loss": 1.2633, "step": 5960 }, { "epoch": 0.6, "grad_norm": 8.840685661279561, "learning_rate": 9.695715137822491e-06, "loss": 1.2155, "step": 5965 }, { "epoch": 0.6, "grad_norm": 45.466885900243135, "learning_rate": 9.694706767851803e-06, "loss": 1.2183, "step": 5970 }, { "epoch": 0.6, "grad_norm": 47.74491389796742, "learning_rate": 9.693696782436999e-06, "loss": 1.2244, "step": 5975 }, { "epoch": 0.6, "grad_norm": 38.78516801788493, "learning_rate": 9.692685181925616e-06, "loss": 1.2745, "step": 5980 }, { "epoch": 0.6, "grad_norm": 8.448991128639108, "learning_rate": 9.691671966665743e-06, "loss": 1.2304, "step": 5985 }, { "epoch": 0.6, "grad_norm": 8.417128118229789, "learning_rate": 9.690657137006028e-06, "loss": 1.2243, "step": 5990 }, { "epoch": 0.6, "grad_norm": 11.355053182742228, "learning_rate": 9.689640693295673e-06, "loss": 1.2213, "step": 5995 }, { "epoch": 0.6, "grad_norm": 10.028209869242007, "learning_rate": 9.688622635884434e-06, "loss": 1.2339, "step": 6000 }, { "epoch": 0.61, "grad_norm": 10.668657050541347, "learning_rate": 9.687602965122624e-06, "loss": 1.2313, "step": 6005 }, { "epoch": 0.61, "grad_norm": 11.902714837352619, "learning_rate": 9.686581681361112e-06, "loss": 1.2749, "step": 6010 }, { "epoch": 0.61, "grad_norm": 14.917042572282254, "learning_rate": 9.685558784951318e-06, "loss": 1.1797, "step": 6015 }, { "epoch": 0.61, "grad_norm": 15.440952998521315, "learning_rate": 9.684534276245222e-06, "loss": 1.2392, "step": 6020 }, { "epoch": 0.61, "grad_norm": 28.600345521150423, "learning_rate": 9.683508155595355e-06, "loss": 1.2236, "step": 6025 }, { "epoch": 0.61, "grad_norm": 55.11319802817678, "learning_rate": 9.682480423354805e-06, "loss": 1.206, "step": 6030 }, { "epoch": 0.61, "grad_norm": 60.53880107700804, "learning_rate": 9.681451079877214e-06, "loss": 1.2893, "step": 6035 }, { "epoch": 0.61, "grad_norm": 21.035566080355625, "learning_rate": 9.680420125516779e-06, "loss": 1.2402, "step": 6040 }, { "epoch": 0.61, "grad_norm": 15.617150535642468, "learning_rate": 9.679387560628247e-06, "loss": 1.206, "step": 6045 }, { "epoch": 0.61, "grad_norm": 18.093646024119007, "learning_rate": 9.678353385566926e-06, "loss": 1.2368, "step": 6050 }, { "epoch": 0.61, "grad_norm": 18.733954738163668, "learning_rate": 9.677317600688674e-06, "loss": 1.2561, "step": 6055 }, { "epoch": 0.61, "grad_norm": 11.341747092645615, "learning_rate": 9.676280206349902e-06, "loss": 1.2127, "step": 6060 }, { "epoch": 0.61, "grad_norm": 8.062867324416539, "learning_rate": 9.675241202907577e-06, "loss": 1.2512, "step": 6065 }, { "epoch": 0.61, "grad_norm": 7.630871536778502, "learning_rate": 9.674200590719219e-06, "loss": 1.1599, "step": 6070 }, { "epoch": 0.61, "grad_norm": 10.54550227669717, "learning_rate": 9.673158370142902e-06, "loss": 1.158, "step": 6075 }, { "epoch": 0.61, "grad_norm": 17.44815630750628, "learning_rate": 9.672114541537255e-06, "loss": 1.2572, "step": 6080 }, { "epoch": 0.61, "grad_norm": 8.579969741515313, "learning_rate": 9.671069105261457e-06, "loss": 1.1934, "step": 6085 }, { "epoch": 0.61, "grad_norm": 6.899566769739347, "learning_rate": 9.67002206167524e-06, "loss": 1.26, "step": 6090 }, { "epoch": 0.61, "grad_norm": 8.65903029035593, "learning_rate": 9.66897341113889e-06, "loss": 1.2176, "step": 6095 }, { "epoch": 0.62, "grad_norm": 11.406093802458411, "learning_rate": 9.667923154013252e-06, "loss": 1.2592, "step": 6100 }, { "epoch": 0.62, "grad_norm": 16.11693527850338, "learning_rate": 9.666871290659715e-06, "loss": 1.219, "step": 6105 }, { "epoch": 0.62, "grad_norm": 15.848587868111656, "learning_rate": 9.665817821440223e-06, "loss": 1.2077, "step": 6110 }, { "epoch": 0.62, "grad_norm": 11.238908843851462, "learning_rate": 9.664762746717274e-06, "loss": 1.2573, "step": 6115 }, { "epoch": 0.62, "grad_norm": 12.335079682624038, "learning_rate": 9.66370606685392e-06, "loss": 1.2615, "step": 6120 }, { "epoch": 0.62, "grad_norm": 12.274431760147126, "learning_rate": 9.662647782213763e-06, "loss": 1.2101, "step": 6125 }, { "epoch": 0.62, "grad_norm": 9.259435803547051, "learning_rate": 9.661587893160957e-06, "loss": 1.2227, "step": 6130 }, { "epoch": 0.62, "grad_norm": 18.325180356389684, "learning_rate": 9.66052640006021e-06, "loss": 1.2377, "step": 6135 }, { "epoch": 0.62, "grad_norm": 10.132396290240596, "learning_rate": 9.659463303276779e-06, "loss": 1.2625, "step": 6140 }, { "epoch": 0.62, "grad_norm": 7.713191427068931, "learning_rate": 9.658398603176478e-06, "loss": 1.2974, "step": 6145 }, { "epoch": 0.62, "grad_norm": 17.338937889405493, "learning_rate": 9.657332300125665e-06, "loss": 1.2185, "step": 6150 }, { "epoch": 0.62, "grad_norm": 21.81946959153514, "learning_rate": 9.656264394491256e-06, "loss": 1.1804, "step": 6155 }, { "epoch": 0.62, "grad_norm": 17.97123339745943, "learning_rate": 9.655194886640715e-06, "loss": 1.2721, "step": 6160 }, { "epoch": 0.62, "grad_norm": 20.83391204567327, "learning_rate": 9.654123776942061e-06, "loss": 1.2028, "step": 6165 }, { "epoch": 0.62, "grad_norm": 18.73028944994978, "learning_rate": 9.653051065763862e-06, "loss": 1.3421, "step": 6170 }, { "epoch": 0.62, "grad_norm": 53.7545975218364, "learning_rate": 9.651976753475234e-06, "loss": 1.2165, "step": 6175 }, { "epoch": 0.62, "grad_norm": 77.10926771492795, "learning_rate": 9.650900840445848e-06, "loss": 1.2407, "step": 6180 }, { "epoch": 0.62, "grad_norm": 14.79236867444254, "learning_rate": 9.649823327045924e-06, "loss": 1.2756, "step": 6185 }, { "epoch": 0.62, "grad_norm": 17.08348453792601, "learning_rate": 9.648744213646236e-06, "loss": 1.2452, "step": 6190 }, { "epoch": 0.62, "grad_norm": 37.571486457997665, "learning_rate": 9.647663500618105e-06, "loss": 1.2712, "step": 6195 }, { "epoch": 0.63, "grad_norm": 62.85049842589145, "learning_rate": 9.6465811883334e-06, "loss": 1.2964, "step": 6200 }, { "epoch": 0.63, "grad_norm": 38.57779119365313, "learning_rate": 9.645497277164547e-06, "loss": 1.2494, "step": 6205 }, { "epoch": 0.63, "grad_norm": 20.42096778530192, "learning_rate": 9.644411767484518e-06, "loss": 1.2838, "step": 6210 }, { "epoch": 0.63, "grad_norm": 22.478502839401152, "learning_rate": 9.643324659666835e-06, "loss": 1.2418, "step": 6215 }, { "epoch": 0.63, "grad_norm": 22.76077350421602, "learning_rate": 9.642235954085572e-06, "loss": 1.2485, "step": 6220 }, { "epoch": 0.63, "grad_norm": 16.03143192056874, "learning_rate": 9.641145651115353e-06, "loss": 1.2521, "step": 6225 }, { "epoch": 0.63, "grad_norm": 11.175516763267444, "learning_rate": 9.640053751131346e-06, "loss": 1.2065, "step": 6230 }, { "epoch": 0.63, "grad_norm": 28.09344221326846, "learning_rate": 9.638960254509275e-06, "loss": 1.2107, "step": 6235 }, { "epoch": 0.63, "grad_norm": 13.04588546999759, "learning_rate": 9.637865161625413e-06, "loss": 1.2309, "step": 6240 }, { "epoch": 0.63, "grad_norm": 10.280353040372706, "learning_rate": 9.636768472856576e-06, "loss": 1.2752, "step": 6245 }, { "epoch": 0.63, "grad_norm": 11.312458044057468, "learning_rate": 9.635670188580137e-06, "loss": 1.2841, "step": 6250 }, { "epoch": 0.63, "grad_norm": 18.780242029323798, "learning_rate": 9.634570309174014e-06, "loss": 1.265, "step": 6255 }, { "epoch": 0.63, "grad_norm": 10.44977312785592, "learning_rate": 9.633468835016675e-06, "loss": 1.2838, "step": 6260 }, { "epoch": 0.63, "grad_norm": 17.267491130984855, "learning_rate": 9.632365766487135e-06, "loss": 1.2211, "step": 6265 }, { "epoch": 0.63, "grad_norm": 35.17393142764635, "learning_rate": 9.631261103964958e-06, "loss": 1.2804, "step": 6270 }, { "epoch": 0.63, "grad_norm": 37.53819962798613, "learning_rate": 9.63015484783026e-06, "loss": 1.2865, "step": 6275 }, { "epoch": 0.63, "grad_norm": 34.51336920403124, "learning_rate": 9.6290469984637e-06, "loss": 1.2361, "step": 6280 }, { "epoch": 0.63, "grad_norm": 26.10458429883579, "learning_rate": 9.62793755624649e-06, "loss": 1.2768, "step": 6285 }, { "epoch": 0.63, "grad_norm": 34.46040241880211, "learning_rate": 9.626826521560387e-06, "loss": 1.2453, "step": 6290 }, { "epoch": 0.63, "grad_norm": 88.14491517650183, "learning_rate": 9.625713894787696e-06, "loss": 1.2459, "step": 6295 }, { "epoch": 0.64, "grad_norm": 76.31252087250449, "learning_rate": 9.624599676311273e-06, "loss": 1.2018, "step": 6300 }, { "epoch": 0.64, "grad_norm": 28.64535600337029, "learning_rate": 9.623483866514517e-06, "loss": 1.2601, "step": 6305 }, { "epoch": 0.64, "grad_norm": 18.035308610589407, "learning_rate": 9.622366465781378e-06, "loss": 1.2424, "step": 6310 }, { "epoch": 0.64, "grad_norm": 12.908505676676313, "learning_rate": 9.621247474496357e-06, "loss": 1.2472, "step": 6315 }, { "epoch": 0.64, "grad_norm": 18.115144941515627, "learning_rate": 9.620126893044491e-06, "loss": 1.2438, "step": 6320 }, { "epoch": 0.64, "grad_norm": 37.6967841689743, "learning_rate": 9.619004721811372e-06, "loss": 1.2521, "step": 6325 }, { "epoch": 0.64, "grad_norm": 23.183543834786384, "learning_rate": 9.617880961183143e-06, "loss": 1.2517, "step": 6330 }, { "epoch": 0.64, "grad_norm": 21.772660530997452, "learning_rate": 9.616755611546484e-06, "loss": 1.2042, "step": 6335 }, { "epoch": 0.64, "grad_norm": 8.213710289509011, "learning_rate": 9.615628673288629e-06, "loss": 1.2467, "step": 6340 }, { "epoch": 0.64, "grad_norm": 17.00506589316768, "learning_rate": 9.614500146797356e-06, "loss": 1.2252, "step": 6345 }, { "epoch": 0.64, "grad_norm": 7.687034743993428, "learning_rate": 9.61337003246099e-06, "loss": 1.2222, "step": 6350 }, { "epoch": 0.64, "grad_norm": 10.589184643064405, "learning_rate": 9.612238330668401e-06, "loss": 1.1862, "step": 6355 }, { "epoch": 0.64, "grad_norm": 8.600735030357516, "learning_rate": 9.61110504180901e-06, "loss": 1.2781, "step": 6360 }, { "epoch": 0.64, "grad_norm": 7.895062163861428, "learning_rate": 9.609970166272777e-06, "loss": 1.2641, "step": 6365 }, { "epoch": 0.64, "grad_norm": 9.94101027761609, "learning_rate": 9.608833704450213e-06, "loss": 1.1981, "step": 6370 }, { "epoch": 0.64, "grad_norm": 8.047485735007022, "learning_rate": 9.607695656732375e-06, "loss": 1.2508, "step": 6375 }, { "epoch": 0.64, "grad_norm": 7.586597449322517, "learning_rate": 9.60655602351086e-06, "loss": 1.2631, "step": 6380 }, { "epoch": 0.64, "grad_norm": 8.42166296432965, "learning_rate": 9.605414805177817e-06, "loss": 1.2241, "step": 6385 }, { "epoch": 0.64, "grad_norm": 7.152314852090806, "learning_rate": 9.604272002125938e-06, "loss": 1.2114, "step": 6390 }, { "epoch": 0.64, "grad_norm": 9.068297972056287, "learning_rate": 9.603127614748461e-06, "loss": 1.2552, "step": 6395 }, { "epoch": 0.65, "grad_norm": 19.653506664350946, "learning_rate": 9.601981643439168e-06, "loss": 1.259, "step": 6400 }, { "epoch": 0.65, "grad_norm": 41.18724376017987, "learning_rate": 9.600834088592388e-06, "loss": 1.2159, "step": 6405 }, { "epoch": 0.65, "grad_norm": 24.876109952152202, "learning_rate": 9.599684950602991e-06, "loss": 1.2188, "step": 6410 }, { "epoch": 0.65, "grad_norm": 22.76694558163489, "learning_rate": 9.598534229866398e-06, "loss": 1.2662, "step": 6415 }, { "epoch": 0.65, "grad_norm": 17.60411778830573, "learning_rate": 9.597381926778567e-06, "loss": 1.2528, "step": 6420 }, { "epoch": 0.65, "grad_norm": 24.16780324595837, "learning_rate": 9.596228041736007e-06, "loss": 1.2151, "step": 6425 }, { "epoch": 0.65, "grad_norm": 16.10966313520157, "learning_rate": 9.595072575135767e-06, "loss": 1.2488, "step": 6430 }, { "epoch": 0.65, "grad_norm": 12.37016194782002, "learning_rate": 9.593915527375443e-06, "loss": 1.2451, "step": 6435 }, { "epoch": 0.65, "grad_norm": 30.760442676404576, "learning_rate": 9.592756898853173e-06, "loss": 1.1768, "step": 6440 }, { "epoch": 0.65, "grad_norm": 7.9327806692135825, "learning_rate": 9.591596689967642e-06, "loss": 1.239, "step": 6445 }, { "epoch": 0.65, "grad_norm": 7.935892823030629, "learning_rate": 9.590434901118073e-06, "loss": 1.2067, "step": 6450 }, { "epoch": 0.65, "grad_norm": 10.38240903059879, "learning_rate": 9.58927153270424e-06, "loss": 1.2602, "step": 6455 }, { "epoch": 0.65, "grad_norm": 15.125354639455605, "learning_rate": 9.588106585126457e-06, "loss": 1.2293, "step": 6460 }, { "epoch": 0.65, "grad_norm": 9.382238333633055, "learning_rate": 9.58694005878558e-06, "loss": 1.3058, "step": 6465 }, { "epoch": 0.65, "grad_norm": 26.545333628441504, "learning_rate": 9.58577195408301e-06, "loss": 1.2527, "step": 6470 }, { "epoch": 0.65, "grad_norm": 9.298505320684617, "learning_rate": 9.584602271420688e-06, "loss": 1.275, "step": 6475 }, { "epoch": 0.65, "grad_norm": 26.275263338563576, "learning_rate": 9.583431011201105e-06, "loss": 1.251, "step": 6480 }, { "epoch": 0.65, "grad_norm": 9.408209269007875, "learning_rate": 9.58225817382729e-06, "loss": 1.2217, "step": 6485 }, { "epoch": 0.65, "grad_norm": 30.065555129882664, "learning_rate": 9.581083759702813e-06, "loss": 1.2567, "step": 6490 }, { "epoch": 0.65, "grad_norm": 27.407127676629674, "learning_rate": 9.579907769231789e-06, "loss": 1.2801, "step": 6495 }, { "epoch": 0.66, "grad_norm": 28.56496645801653, "learning_rate": 9.578730202818875e-06, "loss": 1.2509, "step": 6500 }, { "epoch": 0.66, "grad_norm": 17.31986863958474, "learning_rate": 9.577551060869274e-06, "loss": 1.2163, "step": 6505 }, { "epoch": 0.66, "grad_norm": 9.653806228046228, "learning_rate": 9.576370343788723e-06, "loss": 1.2312, "step": 6510 }, { "epoch": 0.66, "grad_norm": 13.001146930549856, "learning_rate": 9.57518805198351e-06, "loss": 1.2487, "step": 6515 }, { "epoch": 0.66, "grad_norm": 11.961593528674674, "learning_rate": 9.574004185860456e-06, "loss": 1.2569, "step": 6520 }, { "epoch": 0.66, "grad_norm": 36.483473798179794, "learning_rate": 9.57281874582693e-06, "loss": 1.2644, "step": 6525 }, { "epoch": 0.66, "grad_norm": 39.88278761239571, "learning_rate": 9.571631732290842e-06, "loss": 1.2123, "step": 6530 }, { "epoch": 0.66, "grad_norm": 30.69636858528973, "learning_rate": 9.570443145660643e-06, "loss": 1.2888, "step": 6535 }, { "epoch": 0.66, "grad_norm": 16.863130870943063, "learning_rate": 9.56925298634532e-06, "loss": 1.2528, "step": 6540 }, { "epoch": 0.66, "grad_norm": 23.332506732937507, "learning_rate": 9.568061254754411e-06, "loss": 1.244, "step": 6545 }, { "epoch": 0.66, "grad_norm": 21.000864386202174, "learning_rate": 9.566867951297985e-06, "loss": 1.1825, "step": 6550 }, { "epoch": 0.66, "grad_norm": 24.943736587546084, "learning_rate": 9.56567307638666e-06, "loss": 1.2365, "step": 6555 }, { "epoch": 0.66, "grad_norm": 26.197989638859376, "learning_rate": 9.56447663043159e-06, "loss": 1.2024, "step": 6560 }, { "epoch": 0.66, "grad_norm": 27.33670350537627, "learning_rate": 9.56327861384447e-06, "loss": 1.2344, "step": 6565 }, { "epoch": 0.66, "grad_norm": 54.36381056789312, "learning_rate": 9.56207902703754e-06, "loss": 1.2252, "step": 6570 }, { "epoch": 0.66, "grad_norm": 24.11108123688655, "learning_rate": 9.560877870423571e-06, "loss": 1.255, "step": 6575 }, { "epoch": 0.66, "grad_norm": 26.91323668325551, "learning_rate": 9.559675144415884e-06, "loss": 1.2607, "step": 6580 }, { "epoch": 0.66, "grad_norm": 18.21113053277181, "learning_rate": 9.558470849428336e-06, "loss": 1.2771, "step": 6585 }, { "epoch": 0.66, "grad_norm": 21.156681839865353, "learning_rate": 9.557264985875322e-06, "loss": 1.2141, "step": 6590 }, { "epoch": 0.66, "grad_norm": 14.857850606300715, "learning_rate": 9.556057554171779e-06, "loss": 1.2938, "step": 6595 }, { "epoch": 0.67, "grad_norm": 36.77440472712633, "learning_rate": 9.554848554733183e-06, "loss": 1.2396, "step": 6600 }, { "epoch": 0.67, "grad_norm": 32.7688105252039, "learning_rate": 9.55363798797555e-06, "loss": 1.2142, "step": 6605 }, { "epoch": 0.67, "grad_norm": 18.13894292347681, "learning_rate": 9.552425854315434e-06, "loss": 1.2874, "step": 6610 }, { "epoch": 0.67, "grad_norm": 26.93915633805332, "learning_rate": 9.55121215416993e-06, "loss": 1.2832, "step": 6615 }, { "epoch": 0.67, "grad_norm": 18.665374233208155, "learning_rate": 9.549996887956669e-06, "loss": 1.2471, "step": 6620 }, { "epoch": 0.67, "grad_norm": 14.125730711800594, "learning_rate": 9.548780056093826e-06, "loss": 1.2374, "step": 6625 }, { "epoch": 0.67, "grad_norm": 18.90762837288638, "learning_rate": 9.547561659000112e-06, "loss": 1.2515, "step": 6630 }, { "epoch": 0.67, "grad_norm": 22.7067280652705, "learning_rate": 9.546341697094772e-06, "loss": 1.2299, "step": 6635 }, { "epoch": 0.67, "grad_norm": 24.480892629449738, "learning_rate": 9.545120170797596e-06, "loss": 1.2745, "step": 6640 }, { "epoch": 0.67, "grad_norm": 13.729366330188203, "learning_rate": 9.543897080528912e-06, "loss": 1.2389, "step": 6645 }, { "epoch": 0.67, "grad_norm": 9.27919718275337, "learning_rate": 9.542672426709582e-06, "loss": 1.2478, "step": 6650 }, { "epoch": 0.67, "grad_norm": 29.536054001370225, "learning_rate": 9.54144620976101e-06, "loss": 1.208, "step": 6655 }, { "epoch": 0.67, "grad_norm": 9.512285015383293, "learning_rate": 9.540218430105133e-06, "loss": 1.227, "step": 6660 }, { "epoch": 0.67, "grad_norm": 7.868686602501446, "learning_rate": 9.53898908816443e-06, "loss": 1.2358, "step": 6665 }, { "epoch": 0.67, "grad_norm": 6.812767303201554, "learning_rate": 9.537758184361919e-06, "loss": 1.2234, "step": 6670 }, { "epoch": 0.67, "grad_norm": 6.52552803155819, "learning_rate": 9.536525719121151e-06, "loss": 1.256, "step": 6675 }, { "epoch": 0.67, "grad_norm": 8.192657871379609, "learning_rate": 9.535291692866214e-06, "loss": 1.2279, "step": 6680 }, { "epoch": 0.67, "grad_norm": 7.97303139318268, "learning_rate": 9.534056106021739e-06, "loss": 1.2419, "step": 6685 }, { "epoch": 0.67, "grad_norm": 8.820777015333706, "learning_rate": 9.532818959012885e-06, "loss": 1.2114, "step": 6690 }, { "epoch": 0.68, "grad_norm": 18.200670156862532, "learning_rate": 9.53158025226536e-06, "loss": 1.2167, "step": 6695 }, { "epoch": 0.68, "grad_norm": 19.482322599930992, "learning_rate": 9.530339986205398e-06, "loss": 1.2416, "step": 6700 }, { "epoch": 0.68, "grad_norm": 23.067243107377486, "learning_rate": 9.529098161259774e-06, "loss": 1.2141, "step": 6705 }, { "epoch": 0.68, "grad_norm": 6.4198088592960145, "learning_rate": 9.527854777855797e-06, "loss": 1.2327, "step": 6710 }, { "epoch": 0.68, "grad_norm": 8.785443206784343, "learning_rate": 9.526609836421316e-06, "loss": 1.226, "step": 6715 }, { "epoch": 0.68, "grad_norm": 20.46431809622519, "learning_rate": 9.525363337384715e-06, "loss": 1.2453, "step": 6720 }, { "epoch": 0.68, "grad_norm": 36.130670249795706, "learning_rate": 9.52411528117491e-06, "loss": 1.2554, "step": 6725 }, { "epoch": 0.68, "grad_norm": 22.559710806107194, "learning_rate": 9.522865668221357e-06, "loss": 1.2525, "step": 6730 }, { "epoch": 0.68, "grad_norm": 6.352798022234042, "learning_rate": 9.52161449895405e-06, "loss": 1.2245, "step": 6735 }, { "epoch": 0.68, "grad_norm": 15.847839472599194, "learning_rate": 9.52036177380351e-06, "loss": 1.256, "step": 6740 }, { "epoch": 0.68, "grad_norm": 9.453488936634951, "learning_rate": 9.519107493200803e-06, "loss": 1.2318, "step": 6745 }, { "epoch": 0.68, "grad_norm": 11.633528345767568, "learning_rate": 9.517851657577523e-06, "loss": 1.2229, "step": 6750 }, { "epoch": 0.68, "grad_norm": 10.626581824133057, "learning_rate": 9.516594267365804e-06, "loss": 1.2158, "step": 6755 }, { "epoch": 0.68, "grad_norm": 11.393721825328813, "learning_rate": 9.51533532299831e-06, "loss": 1.2719, "step": 6760 }, { "epoch": 0.68, "grad_norm": 9.769313015181671, "learning_rate": 9.514074824908245e-06, "loss": 1.3028, "step": 6765 }, { "epoch": 0.68, "grad_norm": 20.33308617227934, "learning_rate": 9.512812773529343e-06, "loss": 1.2591, "step": 6770 }, { "epoch": 0.68, "grad_norm": 11.166131697654846, "learning_rate": 9.511549169295877e-06, "loss": 1.1871, "step": 6775 }, { "epoch": 0.68, "grad_norm": 14.136358746864397, "learning_rate": 9.51028401264265e-06, "loss": 1.2101, "step": 6780 }, { "epoch": 0.68, "grad_norm": 13.390290412429662, "learning_rate": 9.509017304005003e-06, "loss": 1.195, "step": 6785 }, { "epoch": 0.68, "grad_norm": 9.224827613504818, "learning_rate": 9.507749043818806e-06, "loss": 1.218, "step": 6790 }, { "epoch": 0.69, "grad_norm": 47.04593173534263, "learning_rate": 9.506479232520472e-06, "loss": 1.2103, "step": 6795 }, { "epoch": 0.69, "grad_norm": 14.859674831678463, "learning_rate": 9.505207870546935e-06, "loss": 1.1938, "step": 6800 }, { "epoch": 0.69, "grad_norm": 31.86713786138509, "learning_rate": 9.503934958335674e-06, "loss": 1.1739, "step": 6805 }, { "epoch": 0.69, "grad_norm": 45.09655897499265, "learning_rate": 9.502660496324695e-06, "loss": 1.2677, "step": 6810 }, { "epoch": 0.69, "grad_norm": 22.36183306586514, "learning_rate": 9.501384484952542e-06, "loss": 1.2735, "step": 6815 }, { "epoch": 0.69, "grad_norm": 9.55254057571248, "learning_rate": 9.500106924658286e-06, "loss": 1.2156, "step": 6820 }, { "epoch": 0.69, "grad_norm": 9.712501636130115, "learning_rate": 9.498827815881535e-06, "loss": 1.2817, "step": 6825 }, { "epoch": 0.69, "grad_norm": 18.22147105174755, "learning_rate": 9.497547159062429e-06, "loss": 1.1614, "step": 6830 }, { "epoch": 0.69, "grad_norm": 16.77428145176956, "learning_rate": 9.496264954641642e-06, "loss": 1.184, "step": 6835 }, { "epoch": 0.69, "grad_norm": 21.482036685862674, "learning_rate": 9.494981203060377e-06, "loss": 1.2329, "step": 6840 }, { "epoch": 0.69, "grad_norm": 30.82452357899338, "learning_rate": 9.493695904760374e-06, "loss": 1.2574, "step": 6845 }, { "epoch": 0.69, "grad_norm": 49.07622638487179, "learning_rate": 9.492409060183902e-06, "loss": 1.215, "step": 6850 }, { "epoch": 0.69, "grad_norm": 27.852350571005495, "learning_rate": 9.491120669773764e-06, "loss": 1.253, "step": 6855 }, { "epoch": 0.69, "grad_norm": 21.832099841552093, "learning_rate": 9.489830733973294e-06, "loss": 1.2532, "step": 6860 }, { "epoch": 0.69, "grad_norm": 9.263626058875476, "learning_rate": 9.488539253226355e-06, "loss": 1.2408, "step": 6865 }, { "epoch": 0.69, "grad_norm": 14.705639458146393, "learning_rate": 9.487246227977344e-06, "loss": 1.2532, "step": 6870 }, { "epoch": 0.69, "grad_norm": 20.862808964869487, "learning_rate": 9.485951658671195e-06, "loss": 1.2053, "step": 6875 }, { "epoch": 0.69, "grad_norm": 25.52529554846823, "learning_rate": 9.484655545753365e-06, "loss": 1.2055, "step": 6880 }, { "epoch": 0.69, "grad_norm": 27.135176135004095, "learning_rate": 9.483357889669844e-06, "loss": 1.2336, "step": 6885 }, { "epoch": 0.69, "grad_norm": 13.919200196550955, "learning_rate": 9.482058690867155e-06, "loss": 1.222, "step": 6890 }, { "epoch": 0.7, "grad_norm": 8.228121962617985, "learning_rate": 9.480757949792352e-06, "loss": 1.2375, "step": 6895 }, { "epoch": 0.7, "grad_norm": 38.56471924185, "learning_rate": 9.479455666893017e-06, "loss": 1.2782, "step": 6900 }, { "epoch": 0.7, "grad_norm": 16.839326193627322, "learning_rate": 9.478151842617266e-06, "loss": 1.2418, "step": 6905 }, { "epoch": 0.7, "grad_norm": 25.0169370675844, "learning_rate": 9.476846477413744e-06, "loss": 1.2698, "step": 6910 }, { "epoch": 0.7, "grad_norm": 20.44364527322342, "learning_rate": 9.475539571731623e-06, "loss": 1.2483, "step": 6915 }, { "epoch": 0.7, "grad_norm": 33.7266187137183, "learning_rate": 9.474231126020611e-06, "loss": 1.2221, "step": 6920 }, { "epoch": 0.7, "grad_norm": 20.670169197792678, "learning_rate": 9.472921140730942e-06, "loss": 1.2104, "step": 6925 }, { "epoch": 0.7, "grad_norm": 7.691279573789632, "learning_rate": 9.47160961631338e-06, "loss": 1.245, "step": 6930 }, { "epoch": 0.7, "grad_norm": 8.08452734229773, "learning_rate": 9.470296553219221e-06, "loss": 1.2224, "step": 6935 }, { "epoch": 0.7, "grad_norm": 9.076040025569132, "learning_rate": 9.468981951900288e-06, "loss": 1.2418, "step": 6940 }, { "epoch": 0.7, "grad_norm": 8.83344240330592, "learning_rate": 9.467665812808933e-06, "loss": 1.232, "step": 6945 }, { "epoch": 0.7, "grad_norm": 13.906164830629537, "learning_rate": 9.466348136398038e-06, "loss": 1.2528, "step": 6950 }, { "epoch": 0.7, "grad_norm": 7.360240095907476, "learning_rate": 9.465028923121016e-06, "loss": 1.2909, "step": 6955 }, { "epoch": 0.7, "grad_norm": 9.91868384199132, "learning_rate": 9.463708173431808e-06, "loss": 1.2611, "step": 6960 }, { "epoch": 0.7, "grad_norm": 42.551322031638165, "learning_rate": 9.462385887784878e-06, "loss": 1.2037, "step": 6965 }, { "epoch": 0.7, "grad_norm": 15.858468890492471, "learning_rate": 9.461062066635227e-06, "loss": 1.2246, "step": 6970 }, { "epoch": 0.7, "grad_norm": 46.32101587841062, "learning_rate": 9.45973671043838e-06, "loss": 1.2431, "step": 6975 }, { "epoch": 0.7, "grad_norm": 27.18892845859127, "learning_rate": 9.45840981965039e-06, "loss": 1.1757, "step": 6980 }, { "epoch": 0.7, "grad_norm": 41.3387922376486, "learning_rate": 9.457081394727839e-06, "loss": 1.2043, "step": 6985 }, { "epoch": 0.7, "grad_norm": 74.34153904649125, "learning_rate": 9.455751436127838e-06, "loss": 1.2271, "step": 6990 }, { "epoch": 0.71, "grad_norm": 47.880845285006515, "learning_rate": 9.454419944308023e-06, "loss": 1.289, "step": 6995 }, { "epoch": 0.71, "grad_norm": 8.31019170814755, "learning_rate": 9.45308691972656e-06, "loss": 1.2575, "step": 7000 }, { "epoch": 0.71, "grad_norm": 35.09321220970617, "learning_rate": 9.451752362842142e-06, "loss": 1.2396, "step": 7005 }, { "epoch": 0.71, "grad_norm": 45.95642775325224, "learning_rate": 9.450416274113984e-06, "loss": 1.2274, "step": 7010 }, { "epoch": 0.71, "grad_norm": 20.437604930753526, "learning_rate": 9.44907865400184e-06, "loss": 1.2589, "step": 7015 }, { "epoch": 0.71, "grad_norm": 110.20869446046459, "learning_rate": 9.447739502965981e-06, "loss": 1.26, "step": 7020 }, { "epoch": 0.71, "grad_norm": 55.010558102721724, "learning_rate": 9.446398821467207e-06, "loss": 1.27, "step": 7025 }, { "epoch": 0.71, "grad_norm": 37.421245925011334, "learning_rate": 9.445056609966843e-06, "loss": 1.2753, "step": 7030 }, { "epoch": 0.71, "grad_norm": 58.58172405386906, "learning_rate": 9.443712868926747e-06, "loss": 1.2242, "step": 7035 }, { "epoch": 0.71, "grad_norm": 108.11313414129204, "learning_rate": 9.442367598809296e-06, "loss": 1.3262, "step": 7040 }, { "epoch": 0.71, "grad_norm": 53.60255208092043, "learning_rate": 9.441020800077398e-06, "loss": 1.2836, "step": 7045 }, { "epoch": 0.71, "grad_norm": 39.63944842158645, "learning_rate": 9.439672473194484e-06, "loss": 1.2709, "step": 7050 }, { "epoch": 0.71, "grad_norm": 58.28398000888765, "learning_rate": 9.438322618624514e-06, "loss": 1.284, "step": 7055 }, { "epoch": 0.71, "grad_norm": 47.30731552146769, "learning_rate": 9.436971236831966e-06, "loss": 1.2984, "step": 7060 }, { "epoch": 0.71, "grad_norm": 35.50017341642059, "learning_rate": 9.435618328281856e-06, "loss": 1.2511, "step": 7065 }, { "epoch": 0.71, "grad_norm": 19.540606098701666, "learning_rate": 9.434263893439717e-06, "loss": 1.2829, "step": 7070 }, { "epoch": 0.71, "grad_norm": 15.46016735290377, "learning_rate": 9.432907932771604e-06, "loss": 1.2504, "step": 7075 }, { "epoch": 0.71, "grad_norm": 10.716698511129454, "learning_rate": 9.431550446744109e-06, "loss": 1.2351, "step": 7080 }, { "epoch": 0.71, "grad_norm": 22.396806981332013, "learning_rate": 9.430191435824335e-06, "loss": 1.2902, "step": 7085 }, { "epoch": 0.71, "grad_norm": 6.093918197190177, "learning_rate": 9.42883090047992e-06, "loss": 1.2596, "step": 7090 }, { "epoch": 0.72, "grad_norm": 13.438683461999606, "learning_rate": 9.427468841179025e-06, "loss": 1.2259, "step": 7095 }, { "epoch": 0.72, "grad_norm": 20.475889676398804, "learning_rate": 9.426105258390326e-06, "loss": 1.1868, "step": 7100 }, { "epoch": 0.72, "grad_norm": 9.25807562795457, "learning_rate": 9.424740152583037e-06, "loss": 1.2288, "step": 7105 }, { "epoch": 0.72, "grad_norm": 14.92935578434538, "learning_rate": 9.423373524226888e-06, "loss": 1.2415, "step": 7110 }, { "epoch": 0.72, "grad_norm": 17.035920194295393, "learning_rate": 9.422005373792134e-06, "loss": 1.214, "step": 7115 }, { "epoch": 0.72, "grad_norm": 8.952893664109181, "learning_rate": 9.420635701749553e-06, "loss": 1.2441, "step": 7120 }, { "epoch": 0.72, "grad_norm": 10.24312828879759, "learning_rate": 9.41926450857045e-06, "loss": 1.2545, "step": 7125 }, { "epoch": 0.72, "grad_norm": 17.895593284970484, "learning_rate": 9.41789179472665e-06, "loss": 1.2717, "step": 7130 }, { "epoch": 0.72, "grad_norm": 25.915344582786382, "learning_rate": 9.416517560690505e-06, "loss": 1.2114, "step": 7135 }, { "epoch": 0.72, "grad_norm": 28.063154100394737, "learning_rate": 9.415141806934885e-06, "loss": 1.1602, "step": 7140 }, { "epoch": 0.72, "grad_norm": 60.96186172132934, "learning_rate": 9.413764533933186e-06, "loss": 1.2729, "step": 7145 }, { "epoch": 0.72, "grad_norm": 34.2103281919088, "learning_rate": 9.412385742159325e-06, "loss": 1.1829, "step": 7150 }, { "epoch": 0.72, "grad_norm": 31.55595638712618, "learning_rate": 9.411005432087745e-06, "loss": 1.2225, "step": 7155 }, { "epoch": 0.72, "grad_norm": 16.44544536704509, "learning_rate": 9.409623604193409e-06, "loss": 1.1842, "step": 7160 }, { "epoch": 0.72, "grad_norm": 19.01930018142275, "learning_rate": 9.408240258951803e-06, "loss": 1.234, "step": 7165 }, { "epoch": 0.72, "grad_norm": 7.31729099988006, "learning_rate": 9.406855396838934e-06, "loss": 1.2533, "step": 7170 }, { "epoch": 0.72, "grad_norm": 12.758603793971947, "learning_rate": 9.405469018331333e-06, "loss": 1.2281, "step": 7175 }, { "epoch": 0.72, "grad_norm": 9.729019158963341, "learning_rate": 9.404081123906048e-06, "loss": 1.2155, "step": 7180 }, { "epoch": 0.72, "grad_norm": 7.591024723043679, "learning_rate": 9.402691714040658e-06, "loss": 1.2419, "step": 7185 }, { "epoch": 0.72, "grad_norm": 9.408729873579752, "learning_rate": 9.401300789213251e-06, "loss": 1.2615, "step": 7190 }, { "epoch": 0.73, "grad_norm": 5.981977669792066, "learning_rate": 9.399908349902448e-06, "loss": 1.2266, "step": 7195 }, { "epoch": 0.73, "grad_norm": 20.35521316100864, "learning_rate": 9.398514396587383e-06, "loss": 1.2822, "step": 7200 }, { "epoch": 0.73, "grad_norm": 21.621038660870372, "learning_rate": 9.397118929747716e-06, "loss": 1.2243, "step": 7205 }, { "epoch": 0.73, "grad_norm": 7.1169999232885415, "learning_rate": 9.395721949863626e-06, "loss": 1.2136, "step": 7210 }, { "epoch": 0.73, "grad_norm": 14.05921741295788, "learning_rate": 9.39432345741581e-06, "loss": 1.2538, "step": 7215 }, { "epoch": 0.73, "grad_norm": 15.0163186370097, "learning_rate": 9.39292345288549e-06, "loss": 1.1999, "step": 7220 }, { "epoch": 0.73, "grad_norm": 9.509276797106333, "learning_rate": 9.391521936754405e-06, "loss": 1.1838, "step": 7225 }, { "epoch": 0.73, "grad_norm": 9.682679767995246, "learning_rate": 9.390118909504816e-06, "loss": 1.2345, "step": 7230 }, { "epoch": 0.73, "grad_norm": 8.298106706947552, "learning_rate": 9.388714371619504e-06, "loss": 1.2146, "step": 7235 }, { "epoch": 0.73, "grad_norm": 32.68014420140746, "learning_rate": 9.387308323581767e-06, "loss": 1.2208, "step": 7240 }, { "epoch": 0.73, "grad_norm": 32.94240281632185, "learning_rate": 9.385900765875428e-06, "loss": 1.2308, "step": 7245 }, { "epoch": 0.73, "grad_norm": 11.20941662216896, "learning_rate": 9.384491698984824e-06, "loss": 1.2556, "step": 7250 }, { "epoch": 0.73, "grad_norm": 44.34305604752469, "learning_rate": 9.383081123394812e-06, "loss": 1.2736, "step": 7255 }, { "epoch": 0.73, "grad_norm": 25.712638034601298, "learning_rate": 9.381669039590774e-06, "loss": 1.2039, "step": 7260 }, { "epoch": 0.73, "grad_norm": 48.74925265761191, "learning_rate": 9.380255448058605e-06, "loss": 1.2437, "step": 7265 }, { "epoch": 0.73, "grad_norm": 57.63732263239281, "learning_rate": 9.378840349284719e-06, "loss": 1.2407, "step": 7270 }, { "epoch": 0.73, "grad_norm": 8.103386894214017, "learning_rate": 9.377423743756052e-06, "loss": 1.2584, "step": 7275 }, { "epoch": 0.73, "grad_norm": 23.587086106979573, "learning_rate": 9.376005631960054e-06, "loss": 1.1959, "step": 7280 }, { "epoch": 0.73, "grad_norm": 47.89273622586558, "learning_rate": 9.374586014384698e-06, "loss": 1.2304, "step": 7285 }, { "epoch": 0.73, "grad_norm": 14.207434380149833, "learning_rate": 9.373164891518474e-06, "loss": 1.2026, "step": 7290 }, { "epoch": 0.74, "grad_norm": 25.565965293698454, "learning_rate": 9.371742263850386e-06, "loss": 1.2605, "step": 7295 }, { "epoch": 0.74, "grad_norm": 12.987314783270103, "learning_rate": 9.370318131869962e-06, "loss": 1.2852, "step": 7300 }, { "epoch": 0.74, "grad_norm": 8.170501768501227, "learning_rate": 9.368892496067242e-06, "loss": 1.2447, "step": 7305 }, { "epoch": 0.74, "grad_norm": 24.983723833865042, "learning_rate": 9.367465356932786e-06, "loss": 1.2606, "step": 7310 }, { "epoch": 0.74, "grad_norm": 10.967512176058351, "learning_rate": 9.366036714957673e-06, "loss": 1.2554, "step": 7315 }, { "epoch": 0.74, "grad_norm": 7.222476940885444, "learning_rate": 9.364606570633496e-06, "loss": 1.2277, "step": 7320 }, { "epoch": 0.74, "grad_norm": 8.599376103937072, "learning_rate": 9.363174924452368e-06, "loss": 1.2546, "step": 7325 }, { "epoch": 0.74, "grad_norm": 8.74402561354435, "learning_rate": 9.361741776906914e-06, "loss": 1.2431, "step": 7330 }, { "epoch": 0.74, "grad_norm": 9.350739519335928, "learning_rate": 9.360307128490282e-06, "loss": 1.2834, "step": 7335 }, { "epoch": 0.74, "grad_norm": 58.73070927096001, "learning_rate": 9.358870979696132e-06, "loss": 1.2686, "step": 7340 }, { "epoch": 0.74, "grad_norm": 9.974912177329822, "learning_rate": 9.35743333101864e-06, "loss": 1.1898, "step": 7345 }, { "epoch": 0.74, "grad_norm": 17.78339694738643, "learning_rate": 9.355994182952501e-06, "loss": 1.2043, "step": 7350 }, { "epoch": 0.74, "grad_norm": 18.31437657156583, "learning_rate": 9.354553535992923e-06, "loss": 1.2304, "step": 7355 }, { "epoch": 0.74, "grad_norm": 20.665349554537844, "learning_rate": 9.353111390635634e-06, "loss": 1.189, "step": 7360 }, { "epoch": 0.74, "grad_norm": 19.05751323569656, "learning_rate": 9.351667747376874e-06, "loss": 1.2734, "step": 7365 }, { "epoch": 0.74, "grad_norm": 8.933435798473003, "learning_rate": 9.350222606713396e-06, "loss": 1.1759, "step": 7370 }, { "epoch": 0.74, "grad_norm": 8.62867231923937, "learning_rate": 9.348775969142475e-06, "loss": 1.2595, "step": 7375 }, { "epoch": 0.74, "grad_norm": 8.329043149847656, "learning_rate": 9.347327835161897e-06, "loss": 1.2506, "step": 7380 }, { "epoch": 0.74, "grad_norm": 8.351497862234899, "learning_rate": 9.345878205269962e-06, "loss": 1.253, "step": 7385 }, { "epoch": 0.75, "grad_norm": 11.719251752013907, "learning_rate": 9.344427079965487e-06, "loss": 1.2536, "step": 7390 }, { "epoch": 0.75, "grad_norm": 6.942666744527387, "learning_rate": 9.342974459747804e-06, "loss": 1.26, "step": 7395 }, { "epoch": 0.75, "grad_norm": 18.285466703612645, "learning_rate": 9.341520345116759e-06, "loss": 1.18, "step": 7400 }, { "epoch": 0.75, "grad_norm": 30.12228972307043, "learning_rate": 9.34006473657271e-06, "loss": 1.2179, "step": 7405 }, { "epoch": 0.75, "grad_norm": 12.21589668238483, "learning_rate": 9.338607634616528e-06, "loss": 1.2136, "step": 7410 }, { "epoch": 0.75, "grad_norm": 12.842802840102594, "learning_rate": 9.337149039749603e-06, "loss": 1.1953, "step": 7415 }, { "epoch": 0.75, "grad_norm": 10.58284469305697, "learning_rate": 9.335688952473836e-06, "loss": 1.2499, "step": 7420 }, { "epoch": 0.75, "grad_norm": 11.622043712284013, "learning_rate": 9.334227373291642e-06, "loss": 1.2617, "step": 7425 }, { "epoch": 0.75, "grad_norm": 11.883722967582399, "learning_rate": 9.33276430270595e-06, "loss": 1.2706, "step": 7430 }, { "epoch": 0.75, "grad_norm": 10.60015415599963, "learning_rate": 9.331299741220196e-06, "loss": 1.2676, "step": 7435 }, { "epoch": 0.75, "grad_norm": 13.111803702436317, "learning_rate": 9.329833689338342e-06, "loss": 1.2369, "step": 7440 }, { "epoch": 0.75, "grad_norm": 17.055944336562472, "learning_rate": 9.32836614756485e-06, "loss": 1.259, "step": 7445 }, { "epoch": 0.75, "grad_norm": 28.08428867001668, "learning_rate": 9.326897116404698e-06, "loss": 1.2645, "step": 7450 }, { "epoch": 0.75, "grad_norm": 7.352690250363281, "learning_rate": 9.325426596363382e-06, "loss": 1.2373, "step": 7455 }, { "epoch": 0.75, "grad_norm": 11.900462142361196, "learning_rate": 9.323954587946907e-06, "loss": 1.186, "step": 7460 }, { "epoch": 0.75, "grad_norm": 9.566500090186759, "learning_rate": 9.322481091661788e-06, "loss": 1.2952, "step": 7465 }, { "epoch": 0.75, "grad_norm": 9.033891289696, "learning_rate": 9.321006108015053e-06, "loss": 1.272, "step": 7470 }, { "epoch": 0.75, "grad_norm": 14.899753735922808, "learning_rate": 9.319529637514244e-06, "loss": 1.2528, "step": 7475 }, { "epoch": 0.75, "grad_norm": 9.255310223264777, "learning_rate": 9.318051680667412e-06, "loss": 1.2085, "step": 7480 }, { "epoch": 0.75, "grad_norm": 17.864126836858343, "learning_rate": 9.316572237983119e-06, "loss": 1.2642, "step": 7485 }, { "epoch": 0.76, "grad_norm": 6.551752495088672, "learning_rate": 9.315091309970444e-06, "loss": 1.2041, "step": 7490 }, { "epoch": 0.76, "grad_norm": 22.80093972121296, "learning_rate": 9.31360889713897e-06, "loss": 1.2334, "step": 7495 }, { "epoch": 0.76, "grad_norm": 28.90301544894272, "learning_rate": 9.312124999998796e-06, "loss": 1.2479, "step": 7500 }, { "epoch": 0.76, "grad_norm": 23.575320478557227, "learning_rate": 9.310639619060525e-06, "loss": 1.1864, "step": 7505 }, { "epoch": 0.76, "grad_norm": 27.62402399391801, "learning_rate": 9.30915275483528e-06, "loss": 1.2981, "step": 7510 }, { "epoch": 0.76, "grad_norm": 29.52538342623024, "learning_rate": 9.307664407834687e-06, "loss": 1.2367, "step": 7515 }, { "epoch": 0.76, "grad_norm": 27.250936315036952, "learning_rate": 9.306174578570886e-06, "loss": 1.2476, "step": 7520 }, { "epoch": 0.76, "grad_norm": 29.535063998267166, "learning_rate": 9.304683267556526e-06, "loss": 1.253, "step": 7525 }, { "epoch": 0.76, "grad_norm": 28.251195584812745, "learning_rate": 9.303190475304765e-06, "loss": 1.2189, "step": 7530 }, { "epoch": 0.76, "grad_norm": 14.902149710792362, "learning_rate": 9.301696202329271e-06, "loss": 1.2406, "step": 7535 }, { "epoch": 0.76, "grad_norm": 40.43104040021645, "learning_rate": 9.300200449144222e-06, "loss": 1.2063, "step": 7540 }, { "epoch": 0.76, "grad_norm": 10.089939602148963, "learning_rate": 9.298703216264306e-06, "loss": 1.2279, "step": 7545 }, { "epoch": 0.76, "grad_norm": 16.76947542440218, "learning_rate": 9.29720450420472e-06, "loss": 1.2208, "step": 7550 }, { "epoch": 0.76, "grad_norm": 8.613305081206319, "learning_rate": 9.295704313481167e-06, "loss": 1.2397, "step": 7555 }, { "epoch": 0.76, "grad_norm": 29.797823991883156, "learning_rate": 9.294202644609863e-06, "loss": 1.1968, "step": 7560 }, { "epoch": 0.76, "grad_norm": 9.489259290104659, "learning_rate": 9.292699498107529e-06, "loss": 1.2035, "step": 7565 }, { "epoch": 0.76, "grad_norm": 17.944090986437367, "learning_rate": 9.291194874491401e-06, "loss": 1.2632, "step": 7570 }, { "epoch": 0.76, "grad_norm": 21.959976388311894, "learning_rate": 9.289688774279213e-06, "loss": 1.2442, "step": 7575 }, { "epoch": 0.76, "grad_norm": 11.984896080723992, "learning_rate": 9.288181197989215e-06, "loss": 1.24, "step": 7580 }, { "epoch": 0.76, "grad_norm": 11.296258510485956, "learning_rate": 9.286672146140162e-06, "loss": 1.1646, "step": 7585 }, { "epoch": 0.77, "grad_norm": 8.409430913426574, "learning_rate": 9.28516161925132e-06, "loss": 1.2424, "step": 7590 }, { "epoch": 0.77, "grad_norm": 7.815724731539182, "learning_rate": 9.283649617842455e-06, "loss": 1.1497, "step": 7595 }, { "epoch": 0.77, "grad_norm": 9.245525477882799, "learning_rate": 9.282136142433849e-06, "loss": 1.2134, "step": 7600 }, { "epoch": 0.77, "grad_norm": 8.055682706845232, "learning_rate": 9.280621193546286e-06, "loss": 1.2681, "step": 7605 }, { "epoch": 0.77, "grad_norm": 9.08566928740703, "learning_rate": 9.279104771701059e-06, "loss": 1.2022, "step": 7610 }, { "epoch": 0.77, "grad_norm": 8.766664575908408, "learning_rate": 9.277586877419967e-06, "loss": 1.2152, "step": 7615 }, { "epoch": 0.77, "grad_norm": 13.087774417016558, "learning_rate": 9.276067511225318e-06, "loss": 1.2306, "step": 7620 }, { "epoch": 0.77, "grad_norm": 29.673522022975185, "learning_rate": 9.274546673639919e-06, "loss": 1.2166, "step": 7625 }, { "epoch": 0.77, "grad_norm": 51.539591817311056, "learning_rate": 9.273024365187093e-06, "loss": 1.2284, "step": 7630 }, { "epoch": 0.77, "grad_norm": 12.140019429296594, "learning_rate": 9.271500586390666e-06, "loss": 1.2434, "step": 7635 }, { "epoch": 0.77, "grad_norm": 26.036718584654587, "learning_rate": 9.269975337774967e-06, "loss": 1.1555, "step": 7640 }, { "epoch": 0.77, "grad_norm": 12.191433366044787, "learning_rate": 9.268448619864832e-06, "loss": 1.2142, "step": 7645 }, { "epoch": 0.77, "grad_norm": 16.33184538805689, "learning_rate": 9.266920433185603e-06, "loss": 1.2552, "step": 7650 }, { "epoch": 0.77, "grad_norm": 25.21325091607401, "learning_rate": 9.265390778263129e-06, "loss": 1.2298, "step": 7655 }, { "epoch": 0.77, "grad_norm": 7.156688433644524, "learning_rate": 9.263859655623761e-06, "loss": 1.2132, "step": 7660 }, { "epoch": 0.77, "grad_norm": 11.300367798956218, "learning_rate": 9.262327065794358e-06, "loss": 1.2519, "step": 7665 }, { "epoch": 0.77, "grad_norm": 9.169919324760624, "learning_rate": 9.260793009302284e-06, "loss": 1.2031, "step": 7670 }, { "epoch": 0.77, "grad_norm": 25.815209942981188, "learning_rate": 9.259257486675404e-06, "loss": 1.2059, "step": 7675 }, { "epoch": 0.77, "grad_norm": 23.533425674532772, "learning_rate": 9.25772049844209e-06, "loss": 1.2389, "step": 7680 }, { "epoch": 0.77, "grad_norm": 8.829696761260031, "learning_rate": 9.256182045131222e-06, "loss": 1.1899, "step": 7685 }, { "epoch": 0.78, "grad_norm": 11.084671135829076, "learning_rate": 9.254642127272175e-06, "loss": 1.2283, "step": 7690 }, { "epoch": 0.78, "grad_norm": 11.66814246775726, "learning_rate": 9.253100745394836e-06, "loss": 1.2656, "step": 7695 }, { "epoch": 0.78, "grad_norm": 8.733947935037307, "learning_rate": 9.251557900029593e-06, "loss": 1.2077, "step": 7700 }, { "epoch": 0.78, "grad_norm": 9.323735837842106, "learning_rate": 9.250013591707339e-06, "loss": 1.2519, "step": 7705 }, { "epoch": 0.78, "grad_norm": 10.768122637522222, "learning_rate": 9.248467820959467e-06, "loss": 1.2063, "step": 7710 }, { "epoch": 0.78, "grad_norm": 17.952374205143265, "learning_rate": 9.246920588317873e-06, "loss": 1.2455, "step": 7715 }, { "epoch": 0.78, "grad_norm": 22.129056751142848, "learning_rate": 9.245371894314962e-06, "loss": 1.2426, "step": 7720 }, { "epoch": 0.78, "grad_norm": 14.67588349421575, "learning_rate": 9.243821739483638e-06, "loss": 1.227, "step": 7725 }, { "epoch": 0.78, "grad_norm": 77.09409480945052, "learning_rate": 9.242270124357306e-06, "loss": 1.1896, "step": 7730 }, { "epoch": 0.78, "grad_norm": 17.986491603701044, "learning_rate": 9.240717049469874e-06, "loss": 1.2003, "step": 7735 }, { "epoch": 0.78, "grad_norm": 29.014583020401524, "learning_rate": 9.239162515355759e-06, "loss": 1.2147, "step": 7740 }, { "epoch": 0.78, "grad_norm": 9.372813185319659, "learning_rate": 9.23760652254987e-06, "loss": 1.2414, "step": 7745 }, { "epoch": 0.78, "grad_norm": 11.178809016800125, "learning_rate": 9.236049071587623e-06, "loss": 1.2296, "step": 7750 }, { "epoch": 0.78, "grad_norm": 20.288685383202317, "learning_rate": 9.234490163004938e-06, "loss": 1.2335, "step": 7755 }, { "epoch": 0.78, "grad_norm": 38.59795381962454, "learning_rate": 9.232929797338231e-06, "loss": 1.2161, "step": 7760 }, { "epoch": 0.78, "grad_norm": 11.725619510243467, "learning_rate": 9.231367975124425e-06, "loss": 1.2327, "step": 7765 }, { "epoch": 0.78, "grad_norm": 19.0956332935042, "learning_rate": 9.229804696900938e-06, "loss": 1.2172, "step": 7770 }, { "epoch": 0.78, "grad_norm": 7.363008188568402, "learning_rate": 9.228239963205697e-06, "loss": 1.2235, "step": 7775 }, { "epoch": 0.78, "grad_norm": 12.919405963264193, "learning_rate": 9.226673774577123e-06, "loss": 1.2012, "step": 7780 }, { "epoch": 0.78, "grad_norm": 9.370760427375297, "learning_rate": 9.225106131554138e-06, "loss": 1.2742, "step": 7785 }, { "epoch": 0.79, "grad_norm": 10.574753334317899, "learning_rate": 9.22353703467617e-06, "loss": 1.2251, "step": 7790 }, { "epoch": 0.79, "grad_norm": 9.919204055521005, "learning_rate": 9.221966484483143e-06, "loss": 1.2137, "step": 7795 }, { "epoch": 0.79, "grad_norm": 11.117727775224042, "learning_rate": 9.22039448151548e-06, "loss": 1.2701, "step": 7800 }, { "epoch": 0.79, "grad_norm": 8.821250378982779, "learning_rate": 9.218821026314106e-06, "loss": 1.2434, "step": 7805 }, { "epoch": 0.79, "grad_norm": 10.83726533956292, "learning_rate": 9.217246119420449e-06, "loss": 1.2364, "step": 7810 }, { "epoch": 0.79, "grad_norm": 25.25069526526832, "learning_rate": 9.215669761376428e-06, "loss": 1.2415, "step": 7815 }, { "epoch": 0.79, "grad_norm": 27.832030681337162, "learning_rate": 9.214091952724469e-06, "loss": 1.2075, "step": 7820 }, { "epoch": 0.79, "grad_norm": 31.352547290909595, "learning_rate": 9.212512694007494e-06, "loss": 1.2429, "step": 7825 }, { "epoch": 0.79, "grad_norm": 59.52508502860254, "learning_rate": 9.210931985768924e-06, "loss": 1.2324, "step": 7830 }, { "epoch": 0.79, "grad_norm": 42.29197901671513, "learning_rate": 9.209349828552681e-06, "loss": 1.2324, "step": 7835 }, { "epoch": 0.79, "grad_norm": 58.09267680215089, "learning_rate": 9.207766222903182e-06, "loss": 1.281, "step": 7840 }, { "epoch": 0.79, "grad_norm": 11.99424188929102, "learning_rate": 9.206181169365345e-06, "loss": 1.211, "step": 7845 }, { "epoch": 0.79, "grad_norm": 37.79255731226824, "learning_rate": 9.204594668484584e-06, "loss": 1.2649, "step": 7850 }, { "epoch": 0.79, "grad_norm": 32.152434144982294, "learning_rate": 9.203006720806813e-06, "loss": 1.288, "step": 7855 }, { "epoch": 0.79, "grad_norm": 11.160381783938107, "learning_rate": 9.201417326878444e-06, "loss": 1.2589, "step": 7860 }, { "epoch": 0.79, "grad_norm": 24.820525431369962, "learning_rate": 9.199826487246386e-06, "loss": 1.2355, "step": 7865 }, { "epoch": 0.79, "grad_norm": 10.610850830909609, "learning_rate": 9.198234202458045e-06, "loss": 1.2876, "step": 7870 }, { "epoch": 0.79, "grad_norm": 12.256979389282636, "learning_rate": 9.196640473061325e-06, "loss": 1.2339, "step": 7875 }, { "epoch": 0.79, "grad_norm": 7.769905681918171, "learning_rate": 9.195045299604626e-06, "loss": 1.237, "step": 7880 }, { "epoch": 0.79, "grad_norm": 6.99103804897849, "learning_rate": 9.193448682636846e-06, "loss": 1.2418, "step": 7885 }, { "epoch": 0.8, "grad_norm": 10.75457384662508, "learning_rate": 9.19185062270738e-06, "loss": 1.2511, "step": 7890 }, { "epoch": 0.8, "grad_norm": 29.757096051723074, "learning_rate": 9.190251120366118e-06, "loss": 1.2335, "step": 7895 }, { "epoch": 0.8, "grad_norm": 42.73464975981768, "learning_rate": 9.188650176163448e-06, "loss": 1.2636, "step": 7900 }, { "epoch": 0.8, "grad_norm": 15.765238752923903, "learning_rate": 9.187047790650252e-06, "loss": 1.2848, "step": 7905 }, { "epoch": 0.8, "grad_norm": 9.11348257275226, "learning_rate": 9.185443964377911e-06, "loss": 1.2391, "step": 7910 }, { "epoch": 0.8, "grad_norm": 9.159478032563705, "learning_rate": 9.1838386978983e-06, "loss": 1.2208, "step": 7915 }, { "epoch": 0.8, "grad_norm": 18.609278312997958, "learning_rate": 9.18223199176379e-06, "loss": 1.2904, "step": 7920 }, { "epoch": 0.8, "grad_norm": 25.709645643557472, "learning_rate": 9.180623846527244e-06, "loss": 1.2852, "step": 7925 }, { "epoch": 0.8, "grad_norm": 11.165928500885084, "learning_rate": 9.179014262742027e-06, "loss": 1.2282, "step": 7930 }, { "epoch": 0.8, "grad_norm": 12.092276441061669, "learning_rate": 9.177403240961993e-06, "loss": 1.2677, "step": 7935 }, { "epoch": 0.8, "grad_norm": 6.391862355681053, "learning_rate": 9.175790781741493e-06, "loss": 1.2481, "step": 7940 }, { "epoch": 0.8, "grad_norm": 6.279028816900641, "learning_rate": 9.174176885635373e-06, "loss": 1.244, "step": 7945 }, { "epoch": 0.8, "grad_norm": 8.923061253265221, "learning_rate": 9.172561553198974e-06, "loss": 1.184, "step": 7950 }, { "epoch": 0.8, "grad_norm": 15.690954537377602, "learning_rate": 9.17094478498813e-06, "loss": 1.2314, "step": 7955 }, { "epoch": 0.8, "grad_norm": 6.831051956873171, "learning_rate": 9.16932658155917e-06, "loss": 1.2684, "step": 7960 }, { "epoch": 0.8, "grad_norm": 11.512000309343122, "learning_rate": 9.167706943468916e-06, "loss": 1.217, "step": 7965 }, { "epoch": 0.8, "grad_norm": 9.104503401851176, "learning_rate": 9.16608587127468e-06, "loss": 1.1921, "step": 7970 }, { "epoch": 0.8, "grad_norm": 31.74318596256183, "learning_rate": 9.164463365534277e-06, "loss": 1.2401, "step": 7975 }, { "epoch": 0.8, "grad_norm": 10.413034406609874, "learning_rate": 9.162839426806007e-06, "loss": 1.2366, "step": 7980 }, { "epoch": 0.81, "grad_norm": 16.185415863724284, "learning_rate": 9.161214055648667e-06, "loss": 1.2459, "step": 7985 }, { "epoch": 0.81, "grad_norm": 16.188366129864246, "learning_rate": 9.159587252621545e-06, "loss": 1.2086, "step": 7990 }, { "epoch": 0.81, "grad_norm": 17.707374452769816, "learning_rate": 9.157959018284421e-06, "loss": 1.1813, "step": 7995 }, { "epoch": 0.81, "grad_norm": 16.354744724838543, "learning_rate": 9.15632935319757e-06, "loss": 1.2413, "step": 8000 }, { "epoch": 0.81, "grad_norm": 22.372889845701312, "learning_rate": 9.15469825792176e-06, "loss": 1.2657, "step": 8005 }, { "epoch": 0.81, "grad_norm": 9.73702140543741, "learning_rate": 9.153065733018247e-06, "loss": 1.2369, "step": 8010 }, { "epoch": 0.81, "grad_norm": 10.564815088861721, "learning_rate": 9.15143177904878e-06, "loss": 1.2148, "step": 8015 }, { "epoch": 0.81, "grad_norm": 16.606622739126877, "learning_rate": 9.149796396575606e-06, "loss": 1.2042, "step": 8020 }, { "epoch": 0.81, "grad_norm": 12.368792590493465, "learning_rate": 9.148159586161454e-06, "loss": 1.2278, "step": 8025 }, { "epoch": 0.81, "grad_norm": 13.079034496857842, "learning_rate": 9.146521348369549e-06, "loss": 1.236, "step": 8030 }, { "epoch": 0.81, "grad_norm": 14.571565172228567, "learning_rate": 9.14488168376361e-06, "loss": 1.2529, "step": 8035 }, { "epoch": 0.81, "grad_norm": 8.846307319390768, "learning_rate": 9.143240592907842e-06, "loss": 1.1753, "step": 8040 }, { "epoch": 0.81, "grad_norm": 12.865132020941735, "learning_rate": 9.141598076366942e-06, "loss": 1.1693, "step": 8045 }, { "epoch": 0.81, "grad_norm": 8.827255680234257, "learning_rate": 9.139954134706102e-06, "loss": 1.2104, "step": 8050 }, { "epoch": 0.81, "grad_norm": 30.57885664997976, "learning_rate": 9.138308768490998e-06, "loss": 1.3031, "step": 8055 }, { "epoch": 0.81, "grad_norm": 6.607095764384038, "learning_rate": 9.136661978287799e-06, "loss": 1.2254, "step": 8060 }, { "epoch": 0.81, "grad_norm": 10.89714796529194, "learning_rate": 9.135013764663163e-06, "loss": 1.2506, "step": 8065 }, { "epoch": 0.81, "grad_norm": 6.236539667441939, "learning_rate": 9.133364128184242e-06, "loss": 1.2887, "step": 8070 }, { "epoch": 0.81, "grad_norm": 19.598284133793374, "learning_rate": 9.131713069418671e-06, "loss": 1.218, "step": 8075 }, { "epoch": 0.81, "grad_norm": 11.89993729959541, "learning_rate": 9.130060588934578e-06, "loss": 1.2071, "step": 8080 }, { "epoch": 0.82, "grad_norm": 11.775313601119954, "learning_rate": 9.128406687300582e-06, "loss": 1.2395, "step": 8085 }, { "epoch": 0.82, "grad_norm": 27.60652757738097, "learning_rate": 9.12675136508579e-06, "loss": 1.1755, "step": 8090 }, { "epoch": 0.82, "grad_norm": 31.59229218037076, "learning_rate": 9.125094622859791e-06, "loss": 1.2039, "step": 8095 }, { "epoch": 0.82, "grad_norm": 38.65978472538587, "learning_rate": 9.123436461192674e-06, "loss": 1.2323, "step": 8100 }, { "epoch": 0.82, "grad_norm": 65.80980214245291, "learning_rate": 9.12177688065501e-06, "loss": 1.2492, "step": 8105 }, { "epoch": 0.82, "grad_norm": 59.28577568454184, "learning_rate": 9.120115881817857e-06, "loss": 1.2893, "step": 8110 }, { "epoch": 0.82, "grad_norm": 27.688683401321523, "learning_rate": 9.118453465252764e-06, "loss": 1.2641, "step": 8115 }, { "epoch": 0.82, "grad_norm": 13.70332351800936, "learning_rate": 9.116789631531769e-06, "loss": 1.181, "step": 8120 }, { "epoch": 0.82, "grad_norm": 60.407514738702325, "learning_rate": 9.115124381227392e-06, "loss": 1.2316, "step": 8125 }, { "epoch": 0.82, "grad_norm": 18.859070082123754, "learning_rate": 9.113457714912646e-06, "loss": 1.2146, "step": 8130 }, { "epoch": 0.82, "grad_norm": 42.63500580644595, "learning_rate": 9.111789633161029e-06, "loss": 1.2345, "step": 8135 }, { "epoch": 0.82, "grad_norm": 23.14253982357684, "learning_rate": 9.110120136546528e-06, "loss": 1.2374, "step": 8140 }, { "epoch": 0.82, "grad_norm": 6.973157486553424, "learning_rate": 9.108449225643612e-06, "loss": 1.2557, "step": 8145 }, { "epoch": 0.82, "grad_norm": 21.157793547602015, "learning_rate": 9.10677690102724e-06, "loss": 1.2004, "step": 8150 }, { "epoch": 0.82, "grad_norm": 18.801783703870495, "learning_rate": 9.105103163272862e-06, "loss": 1.2679, "step": 8155 }, { "epoch": 0.82, "grad_norm": 13.444503620625873, "learning_rate": 9.103428012956406e-06, "loss": 1.228, "step": 8160 }, { "epoch": 0.82, "grad_norm": 26.61347370151708, "learning_rate": 9.101751450654289e-06, "loss": 1.2171, "step": 8165 }, { "epoch": 0.82, "grad_norm": 27.953381232035486, "learning_rate": 9.100073476943415e-06, "loss": 1.2415, "step": 8170 }, { "epoch": 0.82, "grad_norm": 20.37772713019263, "learning_rate": 9.098394092401174e-06, "loss": 1.2158, "step": 8175 }, { "epoch": 0.82, "grad_norm": 16.193271631825727, "learning_rate": 9.096713297605439e-06, "loss": 1.2632, "step": 8180 }, { "epoch": 0.83, "grad_norm": 11.45725368852007, "learning_rate": 9.095031093134574e-06, "loss": 1.2415, "step": 8185 }, { "epoch": 0.83, "grad_norm": 25.74841599073145, "learning_rate": 9.093347479567419e-06, "loss": 1.2874, "step": 8190 }, { "epoch": 0.83, "grad_norm": 7.071323002953288, "learning_rate": 9.091662457483305e-06, "loss": 1.2536, "step": 8195 }, { "epoch": 0.83, "grad_norm": 23.679119240128742, "learning_rate": 9.08997602746205e-06, "loss": 1.2502, "step": 8200 }, { "epoch": 0.83, "grad_norm": 11.481301766638277, "learning_rate": 9.088288190083949e-06, "loss": 1.2335, "step": 8205 }, { "epoch": 0.83, "grad_norm": 11.789345499913125, "learning_rate": 9.086598945929787e-06, "loss": 1.2299, "step": 8210 }, { "epoch": 0.83, "grad_norm": 31.442603486590052, "learning_rate": 9.08490829558083e-06, "loss": 1.2039, "step": 8215 }, { "epoch": 0.83, "grad_norm": 31.453182996449176, "learning_rate": 9.083216239618831e-06, "loss": 1.2333, "step": 8220 }, { "epoch": 0.83, "grad_norm": 16.40557681087287, "learning_rate": 9.081522778626022e-06, "loss": 1.2445, "step": 8225 }, { "epoch": 0.83, "grad_norm": 41.965222999552864, "learning_rate": 9.079827913185126e-06, "loss": 1.2503, "step": 8230 }, { "epoch": 0.83, "grad_norm": 33.22209321149795, "learning_rate": 9.07813164387934e-06, "loss": 1.2313, "step": 8235 }, { "epoch": 0.83, "grad_norm": 81.53473188534082, "learning_rate": 9.07643397129235e-06, "loss": 1.2446, "step": 8240 }, { "epoch": 0.83, "grad_norm": 120.40449446440331, "learning_rate": 9.074734896008326e-06, "loss": 1.2962, "step": 8245 }, { "epoch": 0.83, "grad_norm": 65.36493591941586, "learning_rate": 9.073034418611915e-06, "loss": 1.2235, "step": 8250 }, { "epoch": 0.83, "grad_norm": 40.4010764969845, "learning_rate": 9.071332539688248e-06, "loss": 1.2348, "step": 8255 }, { "epoch": 0.83, "grad_norm": 9.64350332332248, "learning_rate": 9.069629259822947e-06, "loss": 1.2535, "step": 8260 }, { "epoch": 0.83, "grad_norm": 46.141321205756675, "learning_rate": 9.067924579602102e-06, "loss": 1.2254, "step": 8265 }, { "epoch": 0.83, "grad_norm": 22.800453773851473, "learning_rate": 9.066218499612296e-06, "loss": 1.2383, "step": 8270 }, { "epoch": 0.83, "grad_norm": 32.825239418145735, "learning_rate": 9.064511020440587e-06, "loss": 1.1774, "step": 8275 }, { "epoch": 0.83, "grad_norm": 13.849725814097734, "learning_rate": 9.062802142674519e-06, "loss": 1.2256, "step": 8280 }, { "epoch": 0.84, "grad_norm": 36.209761027309284, "learning_rate": 9.061091866902112e-06, "loss": 1.171, "step": 8285 }, { "epoch": 0.84, "grad_norm": 25.1646410223101, "learning_rate": 9.059380193711873e-06, "loss": 1.277, "step": 8290 }, { "epoch": 0.84, "grad_norm": 17.127930781671427, "learning_rate": 9.057667123692788e-06, "loss": 1.2592, "step": 8295 }, { "epoch": 0.84, "grad_norm": 17.664642683604065, "learning_rate": 9.05595265743432e-06, "loss": 1.2532, "step": 8300 }, { "epoch": 0.84, "grad_norm": 11.241837976622543, "learning_rate": 9.054236795526416e-06, "loss": 1.2686, "step": 8305 }, { "epoch": 0.84, "grad_norm": 8.854922348067305, "learning_rate": 9.052519538559505e-06, "loss": 1.247, "step": 8310 }, { "epoch": 0.84, "grad_norm": 8.39256460774842, "learning_rate": 9.050800887124492e-06, "loss": 1.2109, "step": 8315 }, { "epoch": 0.84, "grad_norm": 8.011338239652282, "learning_rate": 9.04908084181276e-06, "loss": 1.242, "step": 8320 }, { "epoch": 0.84, "grad_norm": 9.290562496694669, "learning_rate": 9.047359403216177e-06, "loss": 1.2043, "step": 8325 }, { "epoch": 0.84, "grad_norm": 20.502784890291515, "learning_rate": 9.04563657192709e-06, "loss": 1.2437, "step": 8330 }, { "epoch": 0.84, "grad_norm": 19.85465910133973, "learning_rate": 9.043912348538324e-06, "loss": 1.2447, "step": 8335 }, { "epoch": 0.84, "grad_norm": 21.084864859510795, "learning_rate": 9.04218673364318e-06, "loss": 1.2518, "step": 8340 }, { "epoch": 0.84, "grad_norm": 8.3528319487047, "learning_rate": 9.040459727835442e-06, "loss": 1.1677, "step": 8345 }, { "epoch": 0.84, "grad_norm": 9.499465506508274, "learning_rate": 9.038731331709371e-06, "loss": 1.2745, "step": 8350 }, { "epoch": 0.84, "grad_norm": 22.8566905996232, "learning_rate": 9.037001545859706e-06, "loss": 1.226, "step": 8355 }, { "epoch": 0.84, "grad_norm": 20.300693565803126, "learning_rate": 9.035270370881666e-06, "loss": 1.1771, "step": 8360 }, { "epoch": 0.84, "grad_norm": 22.637425395129974, "learning_rate": 9.033537807370943e-06, "loss": 1.1876, "step": 8365 }, { "epoch": 0.84, "grad_norm": 91.96974249962282, "learning_rate": 9.031803855923715e-06, "loss": 1.2614, "step": 8370 }, { "epoch": 0.84, "grad_norm": 69.88547886649809, "learning_rate": 9.03006851713663e-06, "loss": 1.2672, "step": 8375 }, { "epoch": 0.84, "grad_norm": 10.499419152260945, "learning_rate": 9.028331791606819e-06, "loss": 1.1971, "step": 8380 }, { "epoch": 0.85, "grad_norm": 18.395960923241695, "learning_rate": 9.026593679931885e-06, "loss": 1.2513, "step": 8385 }, { "epoch": 0.85, "grad_norm": 11.08015527528984, "learning_rate": 9.02485418270991e-06, "loss": 1.2314, "step": 8390 }, { "epoch": 0.85, "grad_norm": 23.025894946322406, "learning_rate": 9.023113300539457e-06, "loss": 1.2254, "step": 8395 }, { "epoch": 0.85, "grad_norm": 93.79636474761914, "learning_rate": 9.021371034019559e-06, "loss": 1.2282, "step": 8400 }, { "epoch": 0.85, "grad_norm": 28.079331258559648, "learning_rate": 9.019627383749728e-06, "loss": 1.2048, "step": 8405 }, { "epoch": 0.85, "grad_norm": 60.348869043870465, "learning_rate": 9.017882350329955e-06, "loss": 1.3066, "step": 8410 }, { "epoch": 0.85, "grad_norm": 33.47748076849988, "learning_rate": 9.016135934360703e-06, "loss": 1.2841, "step": 8415 }, { "epoch": 0.85, "grad_norm": 32.28164414314165, "learning_rate": 9.014388136442912e-06, "loss": 1.2336, "step": 8420 }, { "epoch": 0.85, "grad_norm": 12.344102265445358, "learning_rate": 9.012638957177994e-06, "loss": 1.2027, "step": 8425 }, { "epoch": 0.85, "grad_norm": 7.961658100350237, "learning_rate": 9.010888397167848e-06, "loss": 1.227, "step": 8430 }, { "epoch": 0.85, "grad_norm": 30.993547565692637, "learning_rate": 9.009136457014833e-06, "loss": 1.1944, "step": 8435 }, { "epoch": 0.85, "grad_norm": 10.533667423823333, "learning_rate": 9.007383137321793e-06, "loss": 1.1939, "step": 8440 }, { "epoch": 0.85, "grad_norm": 20.641505087389255, "learning_rate": 9.005628438692042e-06, "loss": 1.1807, "step": 8445 }, { "epoch": 0.85, "grad_norm": 27.084097419574157, "learning_rate": 9.003872361729371e-06, "loss": 1.2487, "step": 8450 }, { "epoch": 0.85, "grad_norm": 55.487939119016765, "learning_rate": 9.002114907038045e-06, "loss": 1.2511, "step": 8455 }, { "epoch": 0.85, "grad_norm": 38.647380685218614, "learning_rate": 9.000356075222803e-06, "loss": 1.2469, "step": 8460 }, { "epoch": 0.85, "grad_norm": 18.655068951279034, "learning_rate": 8.998595866888855e-06, "loss": 1.2072, "step": 8465 }, { "epoch": 0.85, "grad_norm": 14.370284045382212, "learning_rate": 8.996834282641889e-06, "loss": 1.2035, "step": 8470 }, { "epoch": 0.85, "grad_norm": 10.700022848670333, "learning_rate": 8.995071323088063e-06, "loss": 1.1962, "step": 8475 }, { "epoch": 0.85, "grad_norm": 19.48844904276957, "learning_rate": 8.99330698883401e-06, "loss": 1.2056, "step": 8480 }, { "epoch": 0.86, "grad_norm": 20.600346419580166, "learning_rate": 8.991541280486838e-06, "loss": 1.2016, "step": 8485 }, { "epoch": 0.86, "grad_norm": 8.50302243666553, "learning_rate": 8.989774198654123e-06, "loss": 1.2685, "step": 8490 }, { "epoch": 0.86, "grad_norm": 8.320021532955366, "learning_rate": 8.988005743943916e-06, "loss": 1.228, "step": 8495 }, { "epoch": 0.86, "grad_norm": 8.707022281835492, "learning_rate": 8.986235916964742e-06, "loss": 1.2113, "step": 8500 }, { "epoch": 0.86, "grad_norm": 11.868031314799666, "learning_rate": 8.984464718325596e-06, "loss": 1.2128, "step": 8505 }, { "epoch": 0.86, "grad_norm": 14.321884298163736, "learning_rate": 8.982692148635947e-06, "loss": 1.1707, "step": 8510 }, { "epoch": 0.86, "grad_norm": 11.031382634589063, "learning_rate": 8.980918208505734e-06, "loss": 1.1996, "step": 8515 }, { "epoch": 0.86, "grad_norm": 7.593011499567609, "learning_rate": 8.979142898545366e-06, "loss": 1.2297, "step": 8520 }, { "epoch": 0.86, "grad_norm": 17.85682476258015, "learning_rate": 8.977366219365728e-06, "loss": 1.2096, "step": 8525 }, { "epoch": 0.86, "grad_norm": 17.337151667033524, "learning_rate": 8.975588171578172e-06, "loss": 1.2184, "step": 8530 }, { "epoch": 0.86, "grad_norm": 19.574082092030107, "learning_rate": 8.973808755794525e-06, "loss": 1.2287, "step": 8535 }, { "epoch": 0.86, "grad_norm": 10.340481711809604, "learning_rate": 8.972027972627078e-06, "loss": 1.1706, "step": 8540 }, { "epoch": 0.86, "grad_norm": 16.560002968647215, "learning_rate": 8.970245822688601e-06, "loss": 1.2048, "step": 8545 }, { "epoch": 0.86, "grad_norm": 14.707635187199964, "learning_rate": 8.968462306592328e-06, "loss": 1.195, "step": 8550 }, { "epoch": 0.86, "grad_norm": 11.796462917778843, "learning_rate": 8.966677424951966e-06, "loss": 1.168, "step": 8555 }, { "epoch": 0.86, "grad_norm": 6.260137171097879, "learning_rate": 8.964891178381691e-06, "loss": 1.2253, "step": 8560 }, { "epoch": 0.86, "grad_norm": 25.65853338757763, "learning_rate": 8.963103567496148e-06, "loss": 1.344, "step": 8565 }, { "epoch": 0.86, "grad_norm": 7.153776235558424, "learning_rate": 8.961314592910452e-06, "loss": 1.1815, "step": 8570 }, { "epoch": 0.86, "grad_norm": 21.907940778880246, "learning_rate": 8.959524255240189e-06, "loss": 1.2651, "step": 8575 }, { "epoch": 0.87, "grad_norm": 22.521581700435902, "learning_rate": 8.95773255510141e-06, "loss": 1.1989, "step": 8580 }, { "epoch": 0.87, "grad_norm": 7.53693385733085, "learning_rate": 8.95593949311064e-06, "loss": 1.1989, "step": 8585 }, { "epoch": 0.87, "grad_norm": 34.65238445292161, "learning_rate": 8.95414506988487e-06, "loss": 1.2189, "step": 8590 }, { "epoch": 0.87, "grad_norm": 13.09106999665172, "learning_rate": 8.952349286041556e-06, "loss": 1.2075, "step": 8595 }, { "epoch": 0.87, "grad_norm": 46.468132754859965, "learning_rate": 8.950552142198628e-06, "loss": 1.1656, "step": 8600 }, { "epoch": 0.87, "grad_norm": 26.328771913319503, "learning_rate": 8.948753638974482e-06, "loss": 1.2187, "step": 8605 }, { "epoch": 0.87, "grad_norm": 9.830332243757532, "learning_rate": 8.94695377698798e-06, "loss": 1.2091, "step": 8610 }, { "epoch": 0.87, "grad_norm": 53.041771160912965, "learning_rate": 8.945152556858452e-06, "loss": 1.2352, "step": 8615 }, { "epoch": 0.87, "grad_norm": 49.80951360537134, "learning_rate": 8.9433499792057e-06, "loss": 1.262, "step": 8620 }, { "epoch": 0.87, "grad_norm": 95.03977633541976, "learning_rate": 8.941546044649985e-06, "loss": 1.2122, "step": 8625 }, { "epoch": 0.87, "grad_norm": 18.152155839898636, "learning_rate": 8.93974075381204e-06, "loss": 1.1913, "step": 8630 }, { "epoch": 0.87, "grad_norm": 16.884864637091642, "learning_rate": 8.937934107313065e-06, "loss": 1.1907, "step": 8635 }, { "epoch": 0.87, "grad_norm": 21.758968553236418, "learning_rate": 8.936126105774725e-06, "loss": 1.2776, "step": 8640 }, { "epoch": 0.87, "grad_norm": 65.77897117650656, "learning_rate": 8.93431674981915e-06, "loss": 1.2564, "step": 8645 }, { "epoch": 0.87, "grad_norm": 37.48469408393138, "learning_rate": 8.93250604006894e-06, "loss": 1.2701, "step": 8650 }, { "epoch": 0.87, "grad_norm": 20.5819566041243, "learning_rate": 8.930693977147157e-06, "loss": 1.2082, "step": 8655 }, { "epoch": 0.87, "grad_norm": 11.838012542117974, "learning_rate": 8.928880561677333e-06, "loss": 1.2151, "step": 8660 }, { "epoch": 0.87, "grad_norm": 28.934816659757903, "learning_rate": 8.927065794283458e-06, "loss": 1.2108, "step": 8665 }, { "epoch": 0.87, "grad_norm": 30.015851158231182, "learning_rate": 8.925249675589995e-06, "loss": 1.207, "step": 8670 }, { "epoch": 0.87, "grad_norm": 18.05708344697593, "learning_rate": 8.923432206221868e-06, "loss": 1.2508, "step": 8675 }, { "epoch": 0.88, "grad_norm": 13.071404346946036, "learning_rate": 8.921613386804465e-06, "loss": 1.25, "step": 8680 }, { "epoch": 0.88, "grad_norm": 10.1873966208491, "learning_rate": 8.919793217963642e-06, "loss": 1.1895, "step": 8685 }, { "epoch": 0.88, "grad_norm": 86.5409264169609, "learning_rate": 8.917971700325717e-06, "loss": 1.2177, "step": 8690 }, { "epoch": 0.88, "grad_norm": 48.8292154136072, "learning_rate": 8.91614883451747e-06, "loss": 1.2553, "step": 8695 }, { "epoch": 0.88, "grad_norm": 10.72765388759314, "learning_rate": 8.914324621166151e-06, "loss": 1.2569, "step": 8700 }, { "epoch": 0.88, "grad_norm": 14.99381671321517, "learning_rate": 8.912499060899469e-06, "loss": 1.2478, "step": 8705 }, { "epoch": 0.88, "grad_norm": 23.038501634929837, "learning_rate": 8.910672154345596e-06, "loss": 1.2592, "step": 8710 }, { "epoch": 0.88, "grad_norm": 86.91259519907479, "learning_rate": 8.90884390213317e-06, "loss": 1.2319, "step": 8715 }, { "epoch": 0.88, "grad_norm": 72.00559372046911, "learning_rate": 8.907014304891291e-06, "loss": 1.1971, "step": 8720 }, { "epoch": 0.88, "grad_norm": 52.695079478540215, "learning_rate": 8.905183363249521e-06, "loss": 1.2671, "step": 8725 }, { "epoch": 0.88, "grad_norm": 21.294026987914467, "learning_rate": 8.903351077837885e-06, "loss": 1.2787, "step": 8730 }, { "epoch": 0.88, "grad_norm": 26.77369152335356, "learning_rate": 8.901517449286871e-06, "loss": 1.241, "step": 8735 }, { "epoch": 0.88, "grad_norm": 50.896197869777964, "learning_rate": 8.89968247822743e-06, "loss": 1.2331, "step": 8740 }, { "epoch": 0.88, "grad_norm": 31.150932876356467, "learning_rate": 8.897846165290974e-06, "loss": 1.2359, "step": 8745 }, { "epoch": 0.88, "grad_norm": 17.45636798929398, "learning_rate": 8.896008511109373e-06, "loss": 1.2213, "step": 8750 }, { "epoch": 0.88, "grad_norm": 34.80863089297944, "learning_rate": 8.894169516314966e-06, "loss": 1.2084, "step": 8755 }, { "epoch": 0.88, "grad_norm": 5.824283701844934, "learning_rate": 8.892329181540547e-06, "loss": 1.2012, "step": 8760 }, { "epoch": 0.88, "grad_norm": 20.875788115200002, "learning_rate": 8.890487507419375e-06, "loss": 1.1967, "step": 8765 }, { "epoch": 0.88, "grad_norm": 10.419811862747704, "learning_rate": 8.888644494585167e-06, "loss": 1.2338, "step": 8770 }, { "epoch": 0.88, "grad_norm": 11.537355999648662, "learning_rate": 8.886800143672103e-06, "loss": 1.2357, "step": 8775 }, { "epoch": 0.89, "grad_norm": 16.75738643127998, "learning_rate": 8.884954455314822e-06, "loss": 1.2745, "step": 8780 }, { "epoch": 0.89, "grad_norm": 7.229991642182542, "learning_rate": 8.883107430148422e-06, "loss": 1.2192, "step": 8785 }, { "epoch": 0.89, "grad_norm": 9.311212308408923, "learning_rate": 8.881259068808466e-06, "loss": 1.239, "step": 8790 }, { "epoch": 0.89, "grad_norm": 9.739353701546275, "learning_rate": 8.879409371930969e-06, "loss": 1.1639, "step": 8795 }, { "epoch": 0.89, "grad_norm": 20.381728011219263, "learning_rate": 8.877558340152414e-06, "loss": 1.2314, "step": 8800 }, { "epoch": 0.89, "grad_norm": 10.386860710130472, "learning_rate": 8.875705974109738e-06, "loss": 1.2133, "step": 8805 }, { "epoch": 0.89, "grad_norm": 19.316580467726308, "learning_rate": 8.873852274440337e-06, "loss": 1.234, "step": 8810 }, { "epoch": 0.89, "grad_norm": 6.727198524509611, "learning_rate": 8.87199724178207e-06, "loss": 1.2077, "step": 8815 }, { "epoch": 0.89, "grad_norm": 9.958332741842913, "learning_rate": 8.87014087677325e-06, "loss": 1.2547, "step": 8820 }, { "epoch": 0.89, "grad_norm": 13.067840849747313, "learning_rate": 8.868283180052648e-06, "loss": 1.227, "step": 8825 }, { "epoch": 0.89, "grad_norm": 23.86742366801994, "learning_rate": 8.866424152259501e-06, "loss": 1.2002, "step": 8830 }, { "epoch": 0.89, "grad_norm": 10.579342887032047, "learning_rate": 8.864563794033496e-06, "loss": 1.226, "step": 8835 }, { "epoch": 0.89, "grad_norm": 32.68986457966961, "learning_rate": 8.86270210601478e-06, "loss": 1.2414, "step": 8840 }, { "epoch": 0.89, "grad_norm": 14.087093156037234, "learning_rate": 8.860839088843958e-06, "loss": 1.2746, "step": 8845 }, { "epoch": 0.89, "grad_norm": 22.212447048092486, "learning_rate": 8.858974743162095e-06, "loss": 1.2399, "step": 8850 }, { "epoch": 0.89, "grad_norm": 9.496678463979974, "learning_rate": 8.857109069610706e-06, "loss": 1.1913, "step": 8855 }, { "epoch": 0.89, "grad_norm": 16.26558139753938, "learning_rate": 8.85524206883177e-06, "loss": 1.2259, "step": 8860 }, { "epoch": 0.89, "grad_norm": 15.567739935203726, "learning_rate": 8.853373741467724e-06, "loss": 1.2014, "step": 8865 }, { "epoch": 0.89, "grad_norm": 7.243019396023581, "learning_rate": 8.85150408816145e-06, "loss": 1.1706, "step": 8870 }, { "epoch": 0.89, "grad_norm": 12.90760732651893, "learning_rate": 8.8496331095563e-06, "loss": 1.231, "step": 8875 }, { "epoch": 0.9, "grad_norm": 6.272240575737805, "learning_rate": 8.847760806296072e-06, "loss": 1.1923, "step": 8880 }, { "epoch": 0.9, "grad_norm": 9.876889907445083, "learning_rate": 8.845887179025029e-06, "loss": 1.2276, "step": 8885 }, { "epoch": 0.9, "grad_norm": 9.256098634241363, "learning_rate": 8.844012228387879e-06, "loss": 1.2183, "step": 8890 }, { "epoch": 0.9, "grad_norm": 8.472058437265845, "learning_rate": 8.84213595502979e-06, "loss": 1.2006, "step": 8895 }, { "epoch": 0.9, "grad_norm": 10.871714887277207, "learning_rate": 8.840258359596392e-06, "loss": 1.2144, "step": 8900 }, { "epoch": 0.9, "grad_norm": 9.80383694900924, "learning_rate": 8.83837944273376e-06, "loss": 1.2478, "step": 8905 }, { "epoch": 0.9, "grad_norm": 6.231795176525273, "learning_rate": 8.836499205088426e-06, "loss": 1.1868, "step": 8910 }, { "epoch": 0.9, "grad_norm": 10.804885651323739, "learning_rate": 8.834617647307383e-06, "loss": 1.24, "step": 8915 }, { "epoch": 0.9, "grad_norm": 6.386285962788818, "learning_rate": 8.832734770038068e-06, "loss": 1.2363, "step": 8920 }, { "epoch": 0.9, "grad_norm": 7.478128391048558, "learning_rate": 8.830850573928378e-06, "loss": 1.2555, "step": 8925 }, { "epoch": 0.9, "grad_norm": 8.276309193456074, "learning_rate": 8.828965059626666e-06, "loss": 1.2382, "step": 8930 }, { "epoch": 0.9, "grad_norm": 20.538374321553064, "learning_rate": 8.827078227781734e-06, "loss": 1.2285, "step": 8935 }, { "epoch": 0.9, "grad_norm": 12.22973284606407, "learning_rate": 8.825190079042839e-06, "loss": 1.1935, "step": 8940 }, { "epoch": 0.9, "grad_norm": 10.121712987282244, "learning_rate": 8.823300614059692e-06, "loss": 1.2082, "step": 8945 }, { "epoch": 0.9, "grad_norm": 18.410891172450526, "learning_rate": 8.821409833482454e-06, "loss": 1.1824, "step": 8950 }, { "epoch": 0.9, "grad_norm": 23.697063264981985, "learning_rate": 8.819517737961742e-06, "loss": 1.2121, "step": 8955 }, { "epoch": 0.9, "grad_norm": 22.61499589372635, "learning_rate": 8.817624328148627e-06, "loss": 1.2562, "step": 8960 }, { "epoch": 0.9, "grad_norm": 62.12866389191985, "learning_rate": 8.815729604694624e-06, "loss": 1.24, "step": 8965 }, { "epoch": 0.9, "grad_norm": 15.910338303371752, "learning_rate": 8.813833568251708e-06, "loss": 1.2244, "step": 8970 }, { "epoch": 0.9, "grad_norm": 40.97912228130415, "learning_rate": 8.811936219472306e-06, "loss": 1.2409, "step": 8975 }, { "epoch": 0.91, "grad_norm": 185.10887018584373, "learning_rate": 8.810037559009292e-06, "loss": 1.2355, "step": 8980 }, { "epoch": 0.91, "grad_norm": 124.19054332408055, "learning_rate": 8.808137587515992e-06, "loss": 1.2855, "step": 8985 }, { "epoch": 0.91, "grad_norm": 68.21812846024108, "learning_rate": 8.806236305646185e-06, "loss": 1.3029, "step": 8990 }, { "epoch": 0.91, "grad_norm": 26.761045112155376, "learning_rate": 8.8043337140541e-06, "loss": 1.3163, "step": 8995 }, { "epoch": 0.91, "grad_norm": 49.033990563804736, "learning_rate": 8.80242981339442e-06, "loss": 1.2956, "step": 9000 }, { "epoch": 0.91, "grad_norm": 21.560566087023556, "learning_rate": 8.800524604322272e-06, "loss": 1.2232, "step": 9005 }, { "epoch": 0.91, "grad_norm": 16.567735923502728, "learning_rate": 8.798618087493237e-06, "loss": 1.2081, "step": 9010 }, { "epoch": 0.91, "grad_norm": 14.993297853077632, "learning_rate": 8.796710263563345e-06, "loss": 1.2857, "step": 9015 }, { "epoch": 0.91, "grad_norm": 13.15594183914007, "learning_rate": 8.794801133189079e-06, "loss": 1.2217, "step": 9020 }, { "epoch": 0.91, "grad_norm": 7.100897474881501, "learning_rate": 8.792890697027367e-06, "loss": 1.1967, "step": 9025 }, { "epoch": 0.91, "grad_norm": 14.349787053381577, "learning_rate": 8.79097895573559e-06, "loss": 1.2492, "step": 9030 }, { "epoch": 0.91, "grad_norm": 54.46800846028436, "learning_rate": 8.789065909971574e-06, "loss": 1.2935, "step": 9035 }, { "epoch": 0.91, "grad_norm": 58.03382374458775, "learning_rate": 8.787151560393597e-06, "loss": 1.2109, "step": 9040 }, { "epoch": 0.91, "grad_norm": 15.560042500814026, "learning_rate": 8.785235907660385e-06, "loss": 1.2293, "step": 9045 }, { "epoch": 0.91, "grad_norm": 7.94549857333933, "learning_rate": 8.783318952431114e-06, "loss": 1.2127, "step": 9050 }, { "epoch": 0.91, "grad_norm": 19.386889362913447, "learning_rate": 8.781400695365405e-06, "loss": 1.235, "step": 9055 }, { "epoch": 0.91, "grad_norm": 10.863732384837983, "learning_rate": 8.779481137123327e-06, "loss": 1.2258, "step": 9060 }, { "epoch": 0.91, "grad_norm": 34.821485508785, "learning_rate": 8.7775602783654e-06, "loss": 1.2098, "step": 9065 }, { "epoch": 0.91, "grad_norm": 21.310302561173884, "learning_rate": 8.77563811975259e-06, "loss": 1.2393, "step": 9070 }, { "epoch": 0.91, "grad_norm": 7.004806784226558, "learning_rate": 8.77371466194631e-06, "loss": 1.2415, "step": 9075 }, { "epoch": 0.92, "grad_norm": 23.93245509241067, "learning_rate": 8.771789905608416e-06, "loss": 1.2573, "step": 9080 }, { "epoch": 0.92, "grad_norm": 25.861977789777615, "learning_rate": 8.769863851401219e-06, "loss": 1.2743, "step": 9085 }, { "epoch": 0.92, "grad_norm": 10.755736619167987, "learning_rate": 8.767936499987473e-06, "loss": 1.2376, "step": 9090 }, { "epoch": 0.92, "grad_norm": 17.211424345701793, "learning_rate": 8.766007852030373e-06, "loss": 1.2578, "step": 9095 }, { "epoch": 0.92, "grad_norm": 7.148920549885833, "learning_rate": 8.764077908193571e-06, "loss": 1.1645, "step": 9100 }, { "epoch": 0.92, "grad_norm": 7.930321153879788, "learning_rate": 8.762146669141156e-06, "loss": 1.1894, "step": 9105 }, { "epoch": 0.92, "grad_norm": 7.029191550269419, "learning_rate": 8.760214135537663e-06, "loss": 1.1963, "step": 9110 }, { "epoch": 0.92, "grad_norm": 9.684012600037251, "learning_rate": 8.758280308048079e-06, "loss": 1.2488, "step": 9115 }, { "epoch": 0.92, "grad_norm": 25.043411822630535, "learning_rate": 8.75634518733783e-06, "loss": 1.2259, "step": 9120 }, { "epoch": 0.92, "grad_norm": 20.978717883249782, "learning_rate": 8.754408774072791e-06, "loss": 1.2128, "step": 9125 }, { "epoch": 0.92, "grad_norm": 13.364554753385073, "learning_rate": 8.752471068919277e-06, "loss": 1.274, "step": 9130 }, { "epoch": 0.92, "grad_norm": 7.852807191673204, "learning_rate": 8.750532072544053e-06, "loss": 1.269, "step": 9135 }, { "epoch": 0.92, "grad_norm": 6.0768275552660755, "learning_rate": 8.748591785614327e-06, "loss": 1.2448, "step": 9140 }, { "epoch": 0.92, "grad_norm": 7.895062035573548, "learning_rate": 8.746650208797745e-06, "loss": 1.237, "step": 9145 }, { "epoch": 0.92, "grad_norm": 18.688327529636332, "learning_rate": 8.744707342762406e-06, "loss": 1.2442, "step": 9150 }, { "epoch": 0.92, "grad_norm": 11.773096278719505, "learning_rate": 8.742763188176845e-06, "loss": 1.2289, "step": 9155 }, { "epoch": 0.92, "grad_norm": 7.539377156107374, "learning_rate": 8.740817745710049e-06, "loss": 1.2787, "step": 9160 }, { "epoch": 0.92, "grad_norm": 30.669054910974296, "learning_rate": 8.738871016031438e-06, "loss": 1.1643, "step": 9165 }, { "epoch": 0.92, "grad_norm": 13.535314129564306, "learning_rate": 8.73692299981088e-06, "loss": 1.2147, "step": 9170 }, { "epoch": 0.93, "grad_norm": 8.407433647498465, "learning_rate": 8.734973697718689e-06, "loss": 1.241, "step": 9175 }, { "epoch": 0.93, "grad_norm": 11.41579698367922, "learning_rate": 8.733023110425616e-06, "loss": 1.2248, "step": 9180 }, { "epoch": 0.93, "grad_norm": 17.82990665062719, "learning_rate": 8.731071238602855e-06, "loss": 1.2382, "step": 9185 }, { "epoch": 0.93, "grad_norm": 6.384785763412171, "learning_rate": 8.729118082922044e-06, "loss": 1.2981, "step": 9190 }, { "epoch": 0.93, "grad_norm": 12.22399926884024, "learning_rate": 8.727163644055263e-06, "loss": 1.2077, "step": 9195 }, { "epoch": 0.93, "grad_norm": 10.248519938538731, "learning_rate": 8.725207922675032e-06, "loss": 1.1737, "step": 9200 }, { "epoch": 0.93, "grad_norm": 20.2802850223925, "learning_rate": 8.723250919454313e-06, "loss": 1.2105, "step": 9205 }, { "epoch": 0.93, "grad_norm": 9.164284527748045, "learning_rate": 8.721292635066509e-06, "loss": 1.1661, "step": 9210 }, { "epoch": 0.93, "grad_norm": 7.118906619701703, "learning_rate": 8.719333070185463e-06, "loss": 1.1894, "step": 9215 }, { "epoch": 0.93, "grad_norm": 8.555578503418722, "learning_rate": 8.71737222548546e-06, "loss": 1.2029, "step": 9220 }, { "epoch": 0.93, "grad_norm": 10.287993256735366, "learning_rate": 8.715410101641225e-06, "loss": 1.239, "step": 9225 }, { "epoch": 0.93, "grad_norm": 13.616509952185027, "learning_rate": 8.71344669932792e-06, "loss": 1.2277, "step": 9230 }, { "epoch": 0.93, "grad_norm": 10.266653583169361, "learning_rate": 8.711482019221157e-06, "loss": 1.2479, "step": 9235 }, { "epoch": 0.93, "grad_norm": 12.831512190806212, "learning_rate": 8.709516061996973e-06, "loss": 1.2383, "step": 9240 }, { "epoch": 0.93, "grad_norm": 7.612810498848723, "learning_rate": 8.707548828331856e-06, "loss": 1.1593, "step": 9245 }, { "epoch": 0.93, "grad_norm": 12.042277080028542, "learning_rate": 8.705580318902728e-06, "loss": 1.2517, "step": 9250 }, { "epoch": 0.93, "grad_norm": 7.223576413834543, "learning_rate": 8.703610534386952e-06, "loss": 1.2395, "step": 9255 }, { "epoch": 0.93, "grad_norm": 14.846067819244304, "learning_rate": 8.70163947546233e-06, "loss": 1.1662, "step": 9260 }, { "epoch": 0.93, "grad_norm": 19.750262940528504, "learning_rate": 8.699667142807096e-06, "loss": 1.2339, "step": 9265 }, { "epoch": 0.93, "grad_norm": 6.201453699126599, "learning_rate": 8.697693537099935e-06, "loss": 1.218, "step": 9270 }, { "epoch": 0.94, "grad_norm": 14.547911380471197, "learning_rate": 8.695718659019957e-06, "loss": 1.2112, "step": 9275 }, { "epoch": 0.94, "grad_norm": 10.642260834594786, "learning_rate": 8.69374250924672e-06, "loss": 1.2476, "step": 9280 }, { "epoch": 0.94, "grad_norm": 17.92848949067963, "learning_rate": 8.691765088460214e-06, "loss": 1.193, "step": 9285 }, { "epoch": 0.94, "grad_norm": 27.8880184994152, "learning_rate": 8.689786397340866e-06, "loss": 1.2467, "step": 9290 }, { "epoch": 0.94, "grad_norm": 33.71047112323762, "learning_rate": 8.687806436569544e-06, "loss": 1.2282, "step": 9295 }, { "epoch": 0.94, "grad_norm": 30.28245393486485, "learning_rate": 8.685825206827549e-06, "loss": 1.1944, "step": 9300 }, { "epoch": 0.94, "grad_norm": 11.01931733205294, "learning_rate": 8.68384270879662e-06, "loss": 1.1915, "step": 9305 }, { "epoch": 0.94, "grad_norm": 9.301290633165818, "learning_rate": 8.681858943158934e-06, "loss": 1.218, "step": 9310 }, { "epoch": 0.94, "grad_norm": 59.894527194127896, "learning_rate": 8.6798739105971e-06, "loss": 1.2276, "step": 9315 }, { "epoch": 0.94, "grad_norm": 60.59704754802048, "learning_rate": 8.677887611794171e-06, "loss": 1.2136, "step": 9320 }, { "epoch": 0.94, "grad_norm": 31.229948653849934, "learning_rate": 8.675900047433628e-06, "loss": 1.2148, "step": 9325 }, { "epoch": 0.94, "grad_norm": 9.751502635285522, "learning_rate": 8.673911218199387e-06, "loss": 1.2416, "step": 9330 }, { "epoch": 0.94, "grad_norm": 27.156635876069526, "learning_rate": 8.671921124775807e-06, "loss": 1.2311, "step": 9335 }, { "epoch": 0.94, "grad_norm": 28.13243527055898, "learning_rate": 8.669929767847673e-06, "loss": 1.2309, "step": 9340 }, { "epoch": 0.94, "grad_norm": 21.92555759390989, "learning_rate": 8.667937148100211e-06, "loss": 1.2145, "step": 9345 }, { "epoch": 0.94, "grad_norm": 16.51310034385813, "learning_rate": 8.665943266219081e-06, "loss": 1.273, "step": 9350 }, { "epoch": 0.94, "grad_norm": 60.47307916368799, "learning_rate": 8.663948122890376e-06, "loss": 1.2216, "step": 9355 }, { "epoch": 0.94, "grad_norm": 27.728669094488204, "learning_rate": 8.661951718800618e-06, "loss": 1.2355, "step": 9360 }, { "epoch": 0.94, "grad_norm": 12.484033499295984, "learning_rate": 8.659954054636774e-06, "loss": 1.2516, "step": 9365 }, { "epoch": 0.94, "grad_norm": 8.625017951128116, "learning_rate": 8.657955131086234e-06, "loss": 1.1943, "step": 9370 }, { "epoch": 0.95, "grad_norm": 8.072922518170913, "learning_rate": 8.655954948836826e-06, "loss": 1.2521, "step": 9375 }, { "epoch": 0.95, "grad_norm": 20.147834103743097, "learning_rate": 8.653953508576813e-06, "loss": 1.2457, "step": 9380 }, { "epoch": 0.95, "grad_norm": 8.187688426230986, "learning_rate": 8.65195081099489e-06, "loss": 1.2233, "step": 9385 }, { "epoch": 0.95, "grad_norm": 5.52875596393606, "learning_rate": 8.649946856780178e-06, "loss": 1.1896, "step": 9390 }, { "epoch": 0.95, "grad_norm": 5.634859897158842, "learning_rate": 8.64794164662224e-06, "loss": 1.2319, "step": 9395 }, { "epoch": 0.95, "grad_norm": 7.724500236711942, "learning_rate": 8.645935181211065e-06, "loss": 1.185, "step": 9400 }, { "epoch": 0.95, "grad_norm": 6.001379200664274, "learning_rate": 8.643927461237076e-06, "loss": 1.1959, "step": 9405 }, { "epoch": 0.95, "grad_norm": 21.132559590260563, "learning_rate": 8.641918487391129e-06, "loss": 1.2037, "step": 9410 }, { "epoch": 0.95, "grad_norm": 8.29694004153204, "learning_rate": 8.639908260364506e-06, "loss": 1.2135, "step": 9415 }, { "epoch": 0.95, "grad_norm": 16.94645524463928, "learning_rate": 8.637896780848932e-06, "loss": 1.1843, "step": 9420 }, { "epoch": 0.95, "grad_norm": 33.47190874015566, "learning_rate": 8.635884049536548e-06, "loss": 1.2311, "step": 9425 }, { "epoch": 0.95, "grad_norm": 23.42170464750877, "learning_rate": 8.633870067119934e-06, "loss": 1.2217, "step": 9430 }, { "epoch": 0.95, "grad_norm": 18.017418447170066, "learning_rate": 8.631854834292102e-06, "loss": 1.2402, "step": 9435 }, { "epoch": 0.95, "grad_norm": 13.972512670568745, "learning_rate": 8.629838351746488e-06, "loss": 1.1981, "step": 9440 }, { "epoch": 0.95, "grad_norm": 6.461769001581415, "learning_rate": 8.627820620176967e-06, "loss": 1.2776, "step": 9445 }, { "epoch": 0.95, "grad_norm": 10.786493970019889, "learning_rate": 8.625801640277835e-06, "loss": 1.1868, "step": 9450 }, { "epoch": 0.95, "grad_norm": 29.018649268371522, "learning_rate": 8.62378141274382e-06, "loss": 1.1648, "step": 9455 }, { "epoch": 0.95, "grad_norm": 45.83096642377824, "learning_rate": 8.621759938270085e-06, "loss": 1.204, "step": 9460 }, { "epoch": 0.95, "grad_norm": 37.56036797387887, "learning_rate": 8.619737217552213e-06, "loss": 1.2293, "step": 9465 }, { "epoch": 0.95, "grad_norm": 17.85963464956369, "learning_rate": 8.617713251286221e-06, "loss": 1.2041, "step": 9470 }, { "epoch": 0.96, "grad_norm": 28.08967115530905, "learning_rate": 8.615688040168554e-06, "loss": 1.2017, "step": 9475 }, { "epoch": 0.96, "grad_norm": 12.488225184800497, "learning_rate": 8.613661584896088e-06, "loss": 1.1837, "step": 9480 }, { "epoch": 0.96, "grad_norm": 12.232954988608356, "learning_rate": 8.61163388616612e-06, "loss": 1.2455, "step": 9485 }, { "epoch": 0.96, "grad_norm": 9.295162473894017, "learning_rate": 8.609604944676382e-06, "loss": 1.1843, "step": 9490 }, { "epoch": 0.96, "grad_norm": 6.202453090548019, "learning_rate": 8.607574761125029e-06, "loss": 1.1921, "step": 9495 }, { "epoch": 0.96, "grad_norm": 11.589082214759323, "learning_rate": 8.605543336210648e-06, "loss": 1.2447, "step": 9500 }, { "epoch": 0.96, "grad_norm": 21.868788541707016, "learning_rate": 8.603510670632248e-06, "loss": 1.1866, "step": 9505 }, { "epoch": 0.96, "grad_norm": 15.43176697786205, "learning_rate": 8.601476765089267e-06, "loss": 1.2228, "step": 9510 }, { "epoch": 0.96, "grad_norm": 25.64496682281605, "learning_rate": 8.599441620281573e-06, "loss": 1.188, "step": 9515 }, { "epoch": 0.96, "grad_norm": 29.764310160845312, "learning_rate": 8.597405236909454e-06, "loss": 1.2362, "step": 9520 }, { "epoch": 0.96, "grad_norm": 32.23508806917358, "learning_rate": 8.59536761567363e-06, "loss": 1.1883, "step": 9525 }, { "epoch": 0.96, "grad_norm": 17.162439336224594, "learning_rate": 8.593328757275244e-06, "loss": 1.2109, "step": 9530 }, { "epoch": 0.96, "grad_norm": 7.8497243633031735, "learning_rate": 8.591288662415863e-06, "loss": 1.1849, "step": 9535 }, { "epoch": 0.96, "grad_norm": 7.325698207035451, "learning_rate": 8.589247331797482e-06, "loss": 1.2294, "step": 9540 }, { "epoch": 0.96, "grad_norm": 8.684046114236223, "learning_rate": 8.587204766122524e-06, "loss": 1.2249, "step": 9545 }, { "epoch": 0.96, "grad_norm": 5.780997127520526, "learning_rate": 8.585160966093832e-06, "loss": 1.1899, "step": 9550 }, { "epoch": 0.96, "grad_norm": 10.076217961018143, "learning_rate": 8.583115932414677e-06, "loss": 1.1961, "step": 9555 }, { "epoch": 0.96, "grad_norm": 11.558273083924005, "learning_rate": 8.58106966578875e-06, "loss": 1.1607, "step": 9560 }, { "epoch": 0.96, "grad_norm": 10.86979617003217, "learning_rate": 8.579022166920172e-06, "loss": 1.2633, "step": 9565 }, { "epoch": 0.96, "grad_norm": 25.659561988607937, "learning_rate": 8.576973436513485e-06, "loss": 1.1857, "step": 9570 }, { "epoch": 0.97, "grad_norm": 46.048303619796755, "learning_rate": 8.574923475273656e-06, "loss": 1.2248, "step": 9575 }, { "epoch": 0.97, "grad_norm": 7.727155163521066, "learning_rate": 8.572872283906071e-06, "loss": 1.2353, "step": 9580 }, { "epoch": 0.97, "grad_norm": 10.94732184496867, "learning_rate": 8.570819863116546e-06, "loss": 1.1864, "step": 9585 }, { "epoch": 0.97, "grad_norm": 20.209059290959434, "learning_rate": 8.56876621361132e-06, "loss": 1.2085, "step": 9590 }, { "epoch": 0.97, "grad_norm": 15.880908707190672, "learning_rate": 8.566711336097045e-06, "loss": 1.2717, "step": 9595 }, { "epoch": 0.97, "grad_norm": 12.070944510809447, "learning_rate": 8.564655231280808e-06, "loss": 1.2236, "step": 9600 }, { "epoch": 0.97, "grad_norm": 34.890685751008064, "learning_rate": 8.562597899870107e-06, "loss": 1.1806, "step": 9605 }, { "epoch": 0.97, "grad_norm": 17.757711801324973, "learning_rate": 8.560539342572874e-06, "loss": 1.1937, "step": 9610 }, { "epoch": 0.97, "grad_norm": 26.934656558915368, "learning_rate": 8.558479560097455e-06, "loss": 1.2282, "step": 9615 }, { "epoch": 0.97, "grad_norm": 10.051031400680982, "learning_rate": 8.556418553152617e-06, "loss": 1.199, "step": 9620 }, { "epoch": 0.97, "grad_norm": 6.064718410605512, "learning_rate": 8.55435632244755e-06, "loss": 1.1695, "step": 9625 }, { "epoch": 0.97, "grad_norm": 7.585082014770853, "learning_rate": 8.552292868691869e-06, "loss": 1.184, "step": 9630 }, { "epoch": 0.97, "grad_norm": 10.378246934152191, "learning_rate": 8.550228192595604e-06, "loss": 1.2185, "step": 9635 }, { "epoch": 0.97, "grad_norm": 12.97285764971995, "learning_rate": 8.54816229486921e-06, "loss": 1.195, "step": 9640 }, { "epoch": 0.97, "grad_norm": 12.008911383877557, "learning_rate": 8.546095176223559e-06, "loss": 1.1942, "step": 9645 }, { "epoch": 0.97, "grad_norm": 6.92183947353392, "learning_rate": 8.544026837369944e-06, "loss": 1.2008, "step": 9650 }, { "epoch": 0.97, "grad_norm": 7.115687819170043, "learning_rate": 8.54195727902008e-06, "loss": 1.2601, "step": 9655 }, { "epoch": 0.97, "grad_norm": 12.63085545181519, "learning_rate": 8.539886501886096e-06, "loss": 1.245, "step": 9660 }, { "epoch": 0.97, "grad_norm": 6.0638585143837895, "learning_rate": 8.537814506680552e-06, "loss": 1.2002, "step": 9665 }, { "epoch": 0.97, "grad_norm": 16.479375704581546, "learning_rate": 8.535741294116413e-06, "loss": 1.2232, "step": 9670 }, { "epoch": 0.98, "grad_norm": 17.557551719070158, "learning_rate": 8.533666864907072e-06, "loss": 1.1973, "step": 9675 }, { "epoch": 0.98, "grad_norm": 10.673967352600394, "learning_rate": 8.531591219766337e-06, "loss": 1.2523, "step": 9680 }, { "epoch": 0.98, "grad_norm": 15.669799825253532, "learning_rate": 8.52951435940844e-06, "loss": 1.2175, "step": 9685 }, { "epoch": 0.98, "grad_norm": 20.04964319057395, "learning_rate": 8.527436284548019e-06, "loss": 1.1941, "step": 9690 }, { "epoch": 0.98, "grad_norm": 10.990033812811166, "learning_rate": 8.525356995900142e-06, "loss": 1.1702, "step": 9695 }, { "epoch": 0.98, "grad_norm": 16.20945799802136, "learning_rate": 8.523276494180293e-06, "loss": 1.1931, "step": 9700 }, { "epoch": 0.98, "grad_norm": 12.84362195949145, "learning_rate": 8.521194780104364e-06, "loss": 1.2362, "step": 9705 }, { "epoch": 0.98, "grad_norm": 12.616349187919386, "learning_rate": 8.519111854388676e-06, "loss": 1.1801, "step": 9710 }, { "epoch": 0.98, "grad_norm": 18.920360038100625, "learning_rate": 8.51702771774996e-06, "loss": 1.2, "step": 9715 }, { "epoch": 0.98, "grad_norm": 31.170767115548916, "learning_rate": 8.514942370905364e-06, "loss": 1.2017, "step": 9720 }, { "epoch": 0.98, "grad_norm": 24.598565989478793, "learning_rate": 8.512855814572456e-06, "loss": 1.1829, "step": 9725 }, { "epoch": 0.98, "grad_norm": 16.275465737808293, "learning_rate": 8.510768049469218e-06, "loss": 1.2021, "step": 9730 }, { "epoch": 0.98, "grad_norm": 72.26265395071671, "learning_rate": 8.508679076314048e-06, "loss": 1.2837, "step": 9735 }, { "epoch": 0.98, "grad_norm": 31.72169190093813, "learning_rate": 8.506588895825758e-06, "loss": 1.2686, "step": 9740 }, { "epoch": 0.98, "grad_norm": 176.50463584566126, "learning_rate": 8.504497508723578e-06, "loss": 1.2294, "step": 9745 }, { "epoch": 0.98, "grad_norm": 58.2525129534981, "learning_rate": 8.502404915727154e-06, "loss": 1.2327, "step": 9750 }, { "epoch": 0.98, "grad_norm": 44.345100118213004, "learning_rate": 8.50031111755654e-06, "loss": 1.2719, "step": 9755 }, { "epoch": 0.98, "grad_norm": 58.80090588929816, "learning_rate": 8.498216114932217e-06, "loss": 1.225, "step": 9760 }, { "epoch": 0.98, "grad_norm": 17.63017726386444, "learning_rate": 8.49611990857507e-06, "loss": 1.2465, "step": 9765 }, { "epoch": 0.99, "grad_norm": 20.442324359928453, "learning_rate": 8.4940224992064e-06, "loss": 1.264, "step": 9770 }, { "epoch": 0.99, "grad_norm": 11.434910677344071, "learning_rate": 8.491923887547926e-06, "loss": 1.1754, "step": 9775 }, { "epoch": 0.99, "grad_norm": 17.22452900332348, "learning_rate": 8.489824074321778e-06, "loss": 1.2591, "step": 9780 }, { "epoch": 0.99, "grad_norm": 15.660771484878797, "learning_rate": 8.487723060250498e-06, "loss": 1.1941, "step": 9785 }, { "epoch": 0.99, "grad_norm": 5.793455215195363, "learning_rate": 8.485620846057044e-06, "loss": 1.2331, "step": 9790 }, { "epoch": 0.99, "grad_norm": 20.189243088111986, "learning_rate": 8.483517432464788e-06, "loss": 1.1827, "step": 9795 }, { "epoch": 0.99, "grad_norm": 31.80617544301878, "learning_rate": 8.481412820197508e-06, "loss": 1.209, "step": 9800 }, { "epoch": 0.99, "grad_norm": 51.16240905871924, "learning_rate": 8.479307009979403e-06, "loss": 1.1993, "step": 9805 }, { "epoch": 0.99, "grad_norm": 37.645660326349336, "learning_rate": 8.47720000253508e-06, "loss": 1.2541, "step": 9810 }, { "epoch": 0.99, "grad_norm": 13.537646193886891, "learning_rate": 8.475091798589556e-06, "loss": 1.2196, "step": 9815 }, { "epoch": 0.99, "grad_norm": 74.70213748980201, "learning_rate": 8.472982398868263e-06, "loss": 1.2352, "step": 9820 }, { "epoch": 0.99, "grad_norm": 135.7574431462861, "learning_rate": 8.470871804097045e-06, "loss": 1.2056, "step": 9825 }, { "epoch": 0.99, "grad_norm": 57.281387936043224, "learning_rate": 8.468760015002153e-06, "loss": 1.1798, "step": 9830 }, { "epoch": 0.99, "grad_norm": 49.41630425177251, "learning_rate": 8.466647032310253e-06, "loss": 1.2587, "step": 9835 }, { "epoch": 0.99, "grad_norm": 24.117672854707138, "learning_rate": 8.46453285674842e-06, "loss": 1.2449, "step": 9840 }, { "epoch": 0.99, "grad_norm": 14.016753129993532, "learning_rate": 8.46241748904414e-06, "loss": 1.2152, "step": 9845 }, { "epoch": 0.99, "grad_norm": 9.079832042525334, "learning_rate": 8.46030092992531e-06, "loss": 1.2344, "step": 9850 }, { "epoch": 0.99, "grad_norm": 16.341759458322716, "learning_rate": 8.458183180120235e-06, "loss": 1.2856, "step": 9855 }, { "epoch": 0.99, "grad_norm": 10.787416825397322, "learning_rate": 8.456064240357628e-06, "loss": 1.2745, "step": 9860 }, { "epoch": 0.99, "grad_norm": 45.30087761027042, "learning_rate": 8.45394411136662e-06, "loss": 1.1844, "step": 9865 }, { "epoch": 1.0, "grad_norm": 35.027289288363285, "learning_rate": 8.451822793876742e-06, "loss": 1.2217, "step": 9870 }, { "epoch": 1.0, "grad_norm": 19.68667017662464, "learning_rate": 8.449700288617935e-06, "loss": 1.2125, "step": 9875 }, { "epoch": 1.0, "grad_norm": 18.34371606158347, "learning_rate": 8.447576596320558e-06, "loss": 1.2053, "step": 9880 }, { "epoch": 1.0, "grad_norm": 18.141594774196527, "learning_rate": 8.445451717715363e-06, "loss": 1.164, "step": 9885 }, { "epoch": 1.0, "grad_norm": 25.351385895760103, "learning_rate": 8.443325653533525e-06, "loss": 1.2194, "step": 9890 }, { "epoch": 1.0, "grad_norm": 16.307025203392016, "learning_rate": 8.441198404506616e-06, "loss": 1.1969, "step": 9895 }, { "epoch": 1.0, "grad_norm": 12.175196142826138, "learning_rate": 8.439069971366625e-06, "loss": 1.1987, "step": 9900 }, { "epoch": 1.0, "grad_norm": 14.931195059799597, "learning_rate": 8.436940354845939e-06, "loss": 1.2042, "step": 9905 }, { "epoch": 1.0, "grad_norm": 11.002472470006175, "learning_rate": 8.434809555677361e-06, "loss": 1.2303, "step": 9910 }, { "epoch": 1.0, "grad_norm": 12.290291630079329, "learning_rate": 8.432677574594095e-06, "loss": 1.1862, "step": 9915 }, { "epoch": 1.0, "eval_loss": 1.2109025716781616, "eval_runtime": 25.7471, "eval_samples_per_second": 31.305, "eval_steps_per_second": 3.923, "step": 9918 }, { "epoch": 1.0, "grad_norm": 10.829768346435534, "learning_rate": 8.430544412329756e-06, "loss": 1.1478, "step": 9920 }, { "epoch": 1.0, "grad_norm": 8.970145587847068, "learning_rate": 8.42841006961836e-06, "loss": 0.9985, "step": 9925 }, { "epoch": 1.0, "grad_norm": 12.882498532354235, "learning_rate": 8.426274547194332e-06, "loss": 1.0612, "step": 9930 }, { "epoch": 1.0, "grad_norm": 10.97482649042033, "learning_rate": 8.424137845792509e-06, "loss": 0.9763, "step": 9935 }, { "epoch": 1.0, "grad_norm": 11.76466163919948, "learning_rate": 8.42199996614812e-06, "loss": 1.0238, "step": 9940 }, { "epoch": 1.0, "grad_norm": 9.112399675573785, "learning_rate": 8.41986090899681e-06, "loss": 1.0398, "step": 9945 }, { "epoch": 1.0, "grad_norm": 6.213687218919592, "learning_rate": 8.417720675074633e-06, "loss": 1.051, "step": 9950 }, { "epoch": 1.0, "grad_norm": 9.663983135535817, "learning_rate": 8.415579265118032e-06, "loss": 1.0585, "step": 9955 }, { "epoch": 1.0, "grad_norm": 5.359268922806681, "learning_rate": 8.413436679863868e-06, "loss": 0.9919, "step": 9960 }, { "epoch": 1.0, "grad_norm": 8.245242498527361, "learning_rate": 8.411292920049403e-06, "loss": 1.0313, "step": 9965 }, { "epoch": 1.01, "grad_norm": 9.768501766101803, "learning_rate": 8.409147986412302e-06, "loss": 0.9857, "step": 9970 }, { "epoch": 1.01, "grad_norm": 20.84868605870357, "learning_rate": 8.407001879690637e-06, "loss": 1.0668, "step": 9975 }, { "epoch": 1.01, "grad_norm": 10.832758318711273, "learning_rate": 8.404854600622878e-06, "loss": 1.061, "step": 9980 }, { "epoch": 1.01, "grad_norm": 28.323129537818932, "learning_rate": 8.402706149947903e-06, "loss": 1.0443, "step": 9985 }, { "epoch": 1.01, "grad_norm": 32.08304774339511, "learning_rate": 8.40055652840499e-06, "loss": 1.102, "step": 9990 }, { "epoch": 1.01, "grad_norm": 13.982485513234868, "learning_rate": 8.398405736733827e-06, "loss": 1.0058, "step": 9995 }, { "epoch": 1.01, "grad_norm": 9.166678030612518, "learning_rate": 8.396253775674495e-06, "loss": 0.9781, "step": 10000 }, { "epoch": 1.01, "grad_norm": 24.744865722386095, "learning_rate": 8.39410064596748e-06, "loss": 1.054, "step": 10005 }, { "epoch": 1.01, "grad_norm": 33.39987986980466, "learning_rate": 8.391946348353676e-06, "loss": 1.0492, "step": 10010 }, { "epoch": 1.01, "grad_norm": 16.197408664885675, "learning_rate": 8.389790883574374e-06, "loss": 1.0285, "step": 10015 }, { "epoch": 1.01, "grad_norm": 6.478049205668278, "learning_rate": 8.387634252371267e-06, "loss": 1.0178, "step": 10020 }, { "epoch": 1.01, "grad_norm": 11.22929308446771, "learning_rate": 8.385476455486447e-06, "loss": 1.0622, "step": 10025 }, { "epoch": 1.01, "grad_norm": 6.37257185988015, "learning_rate": 8.383317493662412e-06, "loss": 1.0177, "step": 10030 }, { "epoch": 1.01, "grad_norm": 34.315595057653006, "learning_rate": 8.381157367642062e-06, "loss": 1.0417, "step": 10035 }, { "epoch": 1.01, "grad_norm": 18.402346346049598, "learning_rate": 8.37899607816869e-06, "loss": 1.052, "step": 10040 }, { "epoch": 1.01, "grad_norm": 11.337683054348759, "learning_rate": 8.376833625985994e-06, "loss": 1.062, "step": 10045 }, { "epoch": 1.01, "grad_norm": 20.548054154408476, "learning_rate": 8.374670011838072e-06, "loss": 1.031, "step": 10050 }, { "epoch": 1.01, "grad_norm": 17.210150969500315, "learning_rate": 8.372505236469424e-06, "loss": 1.0524, "step": 10055 }, { "epoch": 1.01, "grad_norm": 12.613329324259729, "learning_rate": 8.370339300624943e-06, "loss": 1.0022, "step": 10060 }, { "epoch": 1.01, "grad_norm": 7.485345502725573, "learning_rate": 8.36817220504993e-06, "loss": 0.9975, "step": 10065 }, { "epoch": 1.02, "grad_norm": 7.671073845035285, "learning_rate": 8.366003950490076e-06, "loss": 1.0465, "step": 10070 }, { "epoch": 1.02, "grad_norm": 8.982752291924099, "learning_rate": 8.36383453769148e-06, "loss": 1.0219, "step": 10075 }, { "epoch": 1.02, "grad_norm": 14.116848311765736, "learning_rate": 8.36166396740063e-06, "loss": 1.0371, "step": 10080 }, { "epoch": 1.02, "grad_norm": 9.768301556799226, "learning_rate": 8.35949224036442e-06, "loss": 1.0401, "step": 10085 }, { "epoch": 1.02, "grad_norm": 13.02668779107601, "learning_rate": 8.35731935733014e-06, "loss": 0.9951, "step": 10090 }, { "epoch": 1.02, "grad_norm": 11.426296707679093, "learning_rate": 8.355145319045475e-06, "loss": 1.0167, "step": 10095 }, { "epoch": 1.02, "grad_norm": 16.00541707616357, "learning_rate": 8.35297012625851e-06, "loss": 0.9962, "step": 10100 }, { "epoch": 1.02, "grad_norm": 6.588001957684907, "learning_rate": 8.350793779717727e-06, "loss": 1.0284, "step": 10105 }, { "epoch": 1.02, "grad_norm": 6.013644599649537, "learning_rate": 8.348616280172006e-06, "loss": 1.0332, "step": 10110 }, { "epoch": 1.02, "grad_norm": 19.298108799226043, "learning_rate": 8.346437628370621e-06, "loss": 1.0447, "step": 10115 }, { "epoch": 1.02, "grad_norm": 7.074142078355483, "learning_rate": 8.344257825063242e-06, "loss": 1.0473, "step": 10120 }, { "epoch": 1.02, "grad_norm": 6.3778894218672, "learning_rate": 8.342076870999943e-06, "loss": 0.9901, "step": 10125 }, { "epoch": 1.02, "grad_norm": 7.1894231376209685, "learning_rate": 8.339894766931183e-06, "loss": 1.0352, "step": 10130 }, { "epoch": 1.02, "grad_norm": 15.009292576306457, "learning_rate": 8.337711513607823e-06, "loss": 0.9798, "step": 10135 }, { "epoch": 1.02, "grad_norm": 12.916583311141933, "learning_rate": 8.335527111781121e-06, "loss": 1.0658, "step": 10140 }, { "epoch": 1.02, "grad_norm": 8.335359800171835, "learning_rate": 8.333341562202724e-06, "loss": 1.0424, "step": 10145 }, { "epoch": 1.02, "grad_norm": 16.405141871430068, "learning_rate": 8.33115486562468e-06, "loss": 1.0568, "step": 10150 }, { "epoch": 1.02, "grad_norm": 15.878882635298778, "learning_rate": 8.328967022799427e-06, "loss": 1.0359, "step": 10155 }, { "epoch": 1.02, "grad_norm": 7.011786564450279, "learning_rate": 8.326778034479802e-06, "loss": 1.0394, "step": 10160 }, { "epoch": 1.02, "grad_norm": 6.231043280092075, "learning_rate": 8.324587901419033e-06, "loss": 1.0051, "step": 10165 }, { "epoch": 1.03, "grad_norm": 6.168059044544621, "learning_rate": 8.322396624370741e-06, "loss": 1.0036, "step": 10170 }, { "epoch": 1.03, "grad_norm": 10.939236046593278, "learning_rate": 8.320204204088946e-06, "loss": 1.0453, "step": 10175 }, { "epoch": 1.03, "grad_norm": 5.345591223655666, "learning_rate": 8.318010641328053e-06, "loss": 1.0282, "step": 10180 }, { "epoch": 1.03, "grad_norm": 18.90314250251431, "learning_rate": 8.31581593684287e-06, "loss": 0.9976, "step": 10185 }, { "epoch": 1.03, "grad_norm": 18.084551690785258, "learning_rate": 8.313620091388588e-06, "loss": 1.0772, "step": 10190 }, { "epoch": 1.03, "grad_norm": 9.684884049853789, "learning_rate": 8.311423105720799e-06, "loss": 1.0162, "step": 10195 }, { "epoch": 1.03, "grad_norm": 7.549694639447544, "learning_rate": 8.30922498059548e-06, "loss": 0.987, "step": 10200 }, { "epoch": 1.03, "grad_norm": 7.143518433232426, "learning_rate": 8.307025716769008e-06, "loss": 1.0323, "step": 10205 }, { "epoch": 1.03, "grad_norm": 8.884891662639422, "learning_rate": 8.304825314998147e-06, "loss": 0.9754, "step": 10210 }, { "epoch": 1.03, "grad_norm": 12.655469405362567, "learning_rate": 8.302623776040047e-06, "loss": 1.0069, "step": 10215 }, { "epoch": 1.03, "grad_norm": 14.075675562314656, "learning_rate": 8.300421100652263e-06, "loss": 1.0739, "step": 10220 }, { "epoch": 1.03, "grad_norm": 18.065814927344455, "learning_rate": 8.298217289592729e-06, "loss": 1.0376, "step": 10225 }, { "epoch": 1.03, "grad_norm": 54.61351609242025, "learning_rate": 8.296012343619777e-06, "loss": 1.1097, "step": 10230 }, { "epoch": 1.03, "grad_norm": 9.53814558868182, "learning_rate": 8.293806263492126e-06, "loss": 1.0131, "step": 10235 }, { "epoch": 1.03, "grad_norm": 11.438272279146418, "learning_rate": 8.291599049968885e-06, "loss": 1.0001, "step": 10240 }, { "epoch": 1.03, "grad_norm": 9.00508276425213, "learning_rate": 8.289390703809554e-06, "loss": 1.0835, "step": 10245 }, { "epoch": 1.03, "grad_norm": 9.395690192570587, "learning_rate": 8.287181225774025e-06, "loss": 1.051, "step": 10250 }, { "epoch": 1.03, "grad_norm": 10.050017934085144, "learning_rate": 8.284970616622575e-06, "loss": 0.9877, "step": 10255 }, { "epoch": 1.03, "grad_norm": 6.029724756383363, "learning_rate": 8.282758877115873e-06, "loss": 1.0171, "step": 10260 }, { "epoch": 1.03, "grad_norm": 11.208701180126377, "learning_rate": 8.280546008014979e-06, "loss": 1.0231, "step": 10265 }, { "epoch": 1.04, "grad_norm": 12.966732406844317, "learning_rate": 8.278332010081333e-06, "loss": 1.0141, "step": 10270 }, { "epoch": 1.04, "grad_norm": 7.461619182024871, "learning_rate": 8.276116884076777e-06, "loss": 1.0603, "step": 10275 }, { "epoch": 1.04, "grad_norm": 13.178804487830194, "learning_rate": 8.273900630763529e-06, "loss": 1.0469, "step": 10280 }, { "epoch": 1.04, "grad_norm": 15.349058379616233, "learning_rate": 8.2716832509042e-06, "loss": 0.9975, "step": 10285 }, { "epoch": 1.04, "grad_norm": 19.535910730995127, "learning_rate": 8.26946474526179e-06, "loss": 1.0539, "step": 10290 }, { "epoch": 1.04, "grad_norm": 11.929726128486617, "learning_rate": 8.267245114599684e-06, "loss": 1.0546, "step": 10295 }, { "epoch": 1.04, "grad_norm": 12.196993975441478, "learning_rate": 8.265024359681655e-06, "loss": 0.9936, "step": 10300 }, { "epoch": 1.04, "grad_norm": 7.850173272592965, "learning_rate": 8.262802481271861e-06, "loss": 1.041, "step": 10305 }, { "epoch": 1.04, "grad_norm": 7.691774593198793, "learning_rate": 8.26057948013485e-06, "loss": 1.0955, "step": 10310 }, { "epoch": 1.04, "grad_norm": 7.1729861230834295, "learning_rate": 8.258355357035555e-06, "loss": 1.0714, "step": 10315 }, { "epoch": 1.04, "grad_norm": 8.238572105953923, "learning_rate": 8.256130112739293e-06, "loss": 1.0458, "step": 10320 }, { "epoch": 1.04, "grad_norm": 5.624595274100203, "learning_rate": 8.253903748011769e-06, "loss": 1.0115, "step": 10325 }, { "epoch": 1.04, "grad_norm": 17.442304927152296, "learning_rate": 8.251676263619074e-06, "loss": 1.0122, "step": 10330 }, { "epoch": 1.04, "grad_norm": 6.0748278749495785, "learning_rate": 8.249447660327681e-06, "loss": 1.0554, "step": 10335 }, { "epoch": 1.04, "grad_norm": 6.41122547786913, "learning_rate": 8.247217938904453e-06, "loss": 1.0389, "step": 10340 }, { "epoch": 1.04, "grad_norm": 14.149361527527443, "learning_rate": 8.244987100116632e-06, "loss": 1.066, "step": 10345 }, { "epoch": 1.04, "grad_norm": 20.382488116202655, "learning_rate": 8.242755144731851e-06, "loss": 1.0616, "step": 10350 }, { "epoch": 1.04, "grad_norm": 5.805046554209081, "learning_rate": 8.240522073518122e-06, "loss": 1.0178, "step": 10355 }, { "epoch": 1.04, "grad_norm": 11.771420372480122, "learning_rate": 8.23828788724384e-06, "loss": 1.0467, "step": 10360 }, { "epoch": 1.05, "grad_norm": 39.87695906960376, "learning_rate": 8.236052586677792e-06, "loss": 1.0675, "step": 10365 }, { "epoch": 1.05, "grad_norm": 26.86833571951981, "learning_rate": 8.233816172589139e-06, "loss": 1.0649, "step": 10370 }, { "epoch": 1.05, "grad_norm": 6.884515060507346, "learning_rate": 8.231578645747428e-06, "loss": 1.0472, "step": 10375 }, { "epoch": 1.05, "grad_norm": 23.93658008919047, "learning_rate": 8.229340006922592e-06, "loss": 1.0236, "step": 10380 }, { "epoch": 1.05, "grad_norm": 9.962471938855685, "learning_rate": 8.227100256884945e-06, "loss": 1.0748, "step": 10385 }, { "epoch": 1.05, "grad_norm": 9.589553264943177, "learning_rate": 8.224859396405183e-06, "loss": 1.0212, "step": 10390 }, { "epoch": 1.05, "grad_norm": 27.241871994524665, "learning_rate": 8.22261742625438e-06, "loss": 1.0397, "step": 10395 }, { "epoch": 1.05, "grad_norm": 35.438534496728494, "learning_rate": 8.220374347204001e-06, "loss": 1.0519, "step": 10400 }, { "epoch": 1.05, "grad_norm": 63.37721092054966, "learning_rate": 8.218130160025883e-06, "loss": 1.0447, "step": 10405 }, { "epoch": 1.05, "grad_norm": 46.362651376223255, "learning_rate": 8.215884865492252e-06, "loss": 1.0396, "step": 10410 }, { "epoch": 1.05, "grad_norm": 86.81240093881469, "learning_rate": 8.21363846437571e-06, "loss": 1.0687, "step": 10415 }, { "epoch": 1.05, "grad_norm": 38.108316598515685, "learning_rate": 8.211390957449242e-06, "loss": 1.0728, "step": 10420 }, { "epoch": 1.05, "grad_norm": 33.206193958643894, "learning_rate": 8.20914234548621e-06, "loss": 1.0477, "step": 10425 }, { "epoch": 1.05, "grad_norm": 6.020883745985015, "learning_rate": 8.206892629260366e-06, "loss": 1.0067, "step": 10430 }, { "epoch": 1.05, "grad_norm": 16.45834233869216, "learning_rate": 8.204641809545829e-06, "loss": 1.0321, "step": 10435 }, { "epoch": 1.05, "grad_norm": 22.878622903566985, "learning_rate": 8.202389887117106e-06, "loss": 1.0424, "step": 10440 }, { "epoch": 1.05, "grad_norm": 9.914121611087722, "learning_rate": 8.200136862749083e-06, "loss": 1.0094, "step": 10445 }, { "epoch": 1.05, "grad_norm": 6.991708402704003, "learning_rate": 8.19788273721702e-06, "loss": 1.08, "step": 10450 }, { "epoch": 1.05, "grad_norm": 43.71924056121477, "learning_rate": 8.195627511296562e-06, "loss": 1.0456, "step": 10455 }, { "epoch": 1.05, "grad_norm": 74.34005505033582, "learning_rate": 8.19337118576373e-06, "loss": 1.0431, "step": 10460 }, { "epoch": 1.06, "grad_norm": 26.077817865542375, "learning_rate": 8.191113761394924e-06, "loss": 1.0635, "step": 10465 }, { "epoch": 1.06, "grad_norm": 9.513061881158686, "learning_rate": 8.188855238966918e-06, "loss": 1.0094, "step": 10470 }, { "epoch": 1.06, "grad_norm": 27.962173814297504, "learning_rate": 8.186595619256872e-06, "loss": 1.0165, "step": 10475 }, { "epoch": 1.06, "grad_norm": 9.287680421146506, "learning_rate": 8.184334903042317e-06, "loss": 1.0481, "step": 10480 }, { "epoch": 1.06, "grad_norm": 18.770639134900765, "learning_rate": 8.182073091101162e-06, "loss": 1.0454, "step": 10485 }, { "epoch": 1.06, "grad_norm": 10.44461465762257, "learning_rate": 8.179810184211697e-06, "loss": 1.0993, "step": 10490 }, { "epoch": 1.06, "grad_norm": 14.95781697961938, "learning_rate": 8.177546183152584e-06, "loss": 1.0544, "step": 10495 }, { "epoch": 1.06, "grad_norm": 9.94690016611478, "learning_rate": 8.175281088702865e-06, "loss": 1.0863, "step": 10500 }, { "epoch": 1.06, "grad_norm": 6.348800475665943, "learning_rate": 8.173014901641955e-06, "loss": 1.079, "step": 10505 }, { "epoch": 1.06, "grad_norm": 15.869679015605106, "learning_rate": 8.170747622749648e-06, "loss": 1.029, "step": 10510 }, { "epoch": 1.06, "grad_norm": 6.568519455147601, "learning_rate": 8.168479252806111e-06, "loss": 1.069, "step": 10515 }, { "epoch": 1.06, "grad_norm": 6.191276539726678, "learning_rate": 8.166209792591893e-06, "loss": 1.0137, "step": 10520 }, { "epoch": 1.06, "grad_norm": 17.364086529976788, "learning_rate": 8.163939242887909e-06, "loss": 1.0729, "step": 10525 }, { "epoch": 1.06, "grad_norm": 11.752862921532925, "learning_rate": 8.161667604475452e-06, "loss": 1.0175, "step": 10530 }, { "epoch": 1.06, "grad_norm": 8.257352736574848, "learning_rate": 8.15939487813619e-06, "loss": 1.0339, "step": 10535 }, { "epoch": 1.06, "grad_norm": 21.954806474603796, "learning_rate": 8.157121064652171e-06, "loss": 1.0565, "step": 10540 }, { "epoch": 1.06, "grad_norm": 10.781500694131484, "learning_rate": 8.154846164805807e-06, "loss": 1.0432, "step": 10545 }, { "epoch": 1.06, "grad_norm": 22.378219693704544, "learning_rate": 8.152570179379893e-06, "loss": 1.0174, "step": 10550 }, { "epoch": 1.06, "grad_norm": 5.7171653504915385, "learning_rate": 8.15029310915759e-06, "loss": 1.0416, "step": 10555 }, { "epoch": 1.06, "grad_norm": 11.155624751237, "learning_rate": 8.148014954922438e-06, "loss": 1.0438, "step": 10560 }, { "epoch": 1.07, "grad_norm": 9.151260055253163, "learning_rate": 8.145735717458344e-06, "loss": 0.9877, "step": 10565 }, { "epoch": 1.07, "grad_norm": 7.068599316429127, "learning_rate": 8.143455397549597e-06, "loss": 1.0721, "step": 10570 }, { "epoch": 1.07, "grad_norm": 13.183688049917007, "learning_rate": 8.14117399598085e-06, "loss": 1.0617, "step": 10575 }, { "epoch": 1.07, "grad_norm": 16.8120516390695, "learning_rate": 8.13889151353713e-06, "loss": 1.0236, "step": 10580 }, { "epoch": 1.07, "grad_norm": 6.849510964885902, "learning_rate": 8.13660795100384e-06, "loss": 1.0305, "step": 10585 }, { "epoch": 1.07, "grad_norm": 6.130947304232826, "learning_rate": 8.134323309166747e-06, "loss": 1.0564, "step": 10590 }, { "epoch": 1.07, "grad_norm": 5.548870019213742, "learning_rate": 8.132037588811998e-06, "loss": 1.0183, "step": 10595 }, { "epoch": 1.07, "grad_norm": 35.41958649859262, "learning_rate": 8.129750790726108e-06, "loss": 1.0407, "step": 10600 }, { "epoch": 1.07, "grad_norm": 20.220901648050884, "learning_rate": 8.127462915695958e-06, "loss": 1.0309, "step": 10605 }, { "epoch": 1.07, "grad_norm": 14.431443131862906, "learning_rate": 8.125173964508806e-06, "loss": 0.9904, "step": 10610 }, { "epoch": 1.07, "grad_norm": 14.018049652826944, "learning_rate": 8.12288393795228e-06, "loss": 1.05, "step": 10615 }, { "epoch": 1.07, "grad_norm": 8.331762857681298, "learning_rate": 8.120592836814372e-06, "loss": 0.9944, "step": 10620 }, { "epoch": 1.07, "grad_norm": 10.694322359008826, "learning_rate": 8.118300661883447e-06, "loss": 1.0141, "step": 10625 }, { "epoch": 1.07, "grad_norm": 7.139637852801844, "learning_rate": 8.116007413948246e-06, "loss": 1.0605, "step": 10630 }, { "epoch": 1.07, "grad_norm": 27.75660179197848, "learning_rate": 8.113713093797868e-06, "loss": 1.0805, "step": 10635 }, { "epoch": 1.07, "grad_norm": 23.475321565301055, "learning_rate": 8.111417702221789e-06, "loss": 1.0264, "step": 10640 }, { "epoch": 1.07, "grad_norm": 21.672369017405423, "learning_rate": 8.109121240009851e-06, "loss": 1.0534, "step": 10645 }, { "epoch": 1.07, "grad_norm": 6.602144775695785, "learning_rate": 8.106823707952263e-06, "loss": 1.0419, "step": 10650 }, { "epoch": 1.07, "grad_norm": 9.930638541831872, "learning_rate": 8.104525106839606e-06, "loss": 1.0507, "step": 10655 }, { "epoch": 1.07, "grad_norm": 21.190413953730527, "learning_rate": 8.102225437462824e-06, "loss": 1.0411, "step": 10660 }, { "epoch": 1.08, "grad_norm": 20.32191528592284, "learning_rate": 8.09992470061323e-06, "loss": 1.0641, "step": 10665 }, { "epoch": 1.08, "grad_norm": 11.930762358782742, "learning_rate": 8.097622897082512e-06, "loss": 1.0278, "step": 10670 }, { "epoch": 1.08, "grad_norm": 34.26451847010071, "learning_rate": 8.09532002766271e-06, "loss": 1.0161, "step": 10675 }, { "epoch": 1.08, "grad_norm": 54.58971847600707, "learning_rate": 8.093016093146242e-06, "loss": 1.0074, "step": 10680 }, { "epoch": 1.08, "grad_norm": 21.47990163607273, "learning_rate": 8.090711094325891e-06, "loss": 1.0681, "step": 10685 }, { "epoch": 1.08, "grad_norm": 7.386616762935336, "learning_rate": 8.088405031994807e-06, "loss": 1.0295, "step": 10690 }, { "epoch": 1.08, "grad_norm": 27.232856368628745, "learning_rate": 8.0860979069465e-06, "loss": 1.0289, "step": 10695 }, { "epoch": 1.08, "grad_norm": 9.453651242681508, "learning_rate": 8.083789719974851e-06, "loss": 1.0618, "step": 10700 }, { "epoch": 1.08, "grad_norm": 6.668346991769979, "learning_rate": 8.081480471874104e-06, "loss": 1.0612, "step": 10705 }, { "epoch": 1.08, "grad_norm": 6.643278409339298, "learning_rate": 8.079170163438868e-06, "loss": 1.0549, "step": 10710 }, { "epoch": 1.08, "grad_norm": 13.331052850098343, "learning_rate": 8.076858795464122e-06, "loss": 1.0276, "step": 10715 }, { "epoch": 1.08, "grad_norm": 10.186490783994433, "learning_rate": 8.074546368745203e-06, "loss": 1.0888, "step": 10720 }, { "epoch": 1.08, "grad_norm": 18.851520851508756, "learning_rate": 8.072232884077816e-06, "loss": 1.0049, "step": 10725 }, { "epoch": 1.08, "grad_norm": 43.982567273199294, "learning_rate": 8.069918342258026e-06, "loss": 1.0366, "step": 10730 }, { "epoch": 1.08, "grad_norm": 15.929804138237563, "learning_rate": 8.067602744082268e-06, "loss": 1.0421, "step": 10735 }, { "epoch": 1.08, "grad_norm": 8.961094668401225, "learning_rate": 8.065286090347335e-06, "loss": 1.0774, "step": 10740 }, { "epoch": 1.08, "grad_norm": 24.28551240410909, "learning_rate": 8.062968381850386e-06, "loss": 1.0076, "step": 10745 }, { "epoch": 1.08, "grad_norm": 10.853929888653179, "learning_rate": 8.060649619388942e-06, "loss": 1.0128, "step": 10750 }, { "epoch": 1.08, "grad_norm": 8.068621266149146, "learning_rate": 8.058329803760887e-06, "loss": 1.0167, "step": 10755 }, { "epoch": 1.08, "grad_norm": 19.775858823799506, "learning_rate": 8.056008935764469e-06, "loss": 1.0708, "step": 10760 }, { "epoch": 1.09, "grad_norm": 28.512827810024064, "learning_rate": 8.053687016198292e-06, "loss": 1.0837, "step": 10765 }, { "epoch": 1.09, "grad_norm": 23.63028805052106, "learning_rate": 8.051364045861332e-06, "loss": 1.0027, "step": 10770 }, { "epoch": 1.09, "grad_norm": 8.396814053507557, "learning_rate": 8.049040025552919e-06, "loss": 1.0051, "step": 10775 }, { "epoch": 1.09, "grad_norm": 33.01296800751487, "learning_rate": 8.046714956072744e-06, "loss": 1.0547, "step": 10780 }, { "epoch": 1.09, "grad_norm": 38.4056182742984, "learning_rate": 8.044388838220863e-06, "loss": 1.0533, "step": 10785 }, { "epoch": 1.09, "grad_norm": 5.4678980940781985, "learning_rate": 8.04206167279769e-06, "loss": 1.0223, "step": 10790 }, { "epoch": 1.09, "grad_norm": 16.31958176301968, "learning_rate": 8.039733460604002e-06, "loss": 1.0467, "step": 10795 }, { "epoch": 1.09, "grad_norm": 29.78757054516638, "learning_rate": 8.037404202440932e-06, "loss": 1.1068, "step": 10800 }, { "epoch": 1.09, "grad_norm": 9.351514786042655, "learning_rate": 8.035073899109978e-06, "loss": 1.055, "step": 10805 }, { "epoch": 1.09, "grad_norm": 16.13405242140683, "learning_rate": 8.032742551412994e-06, "loss": 1.0259, "step": 10810 }, { "epoch": 1.09, "grad_norm": 23.518528190560342, "learning_rate": 8.030410160152193e-06, "loss": 1.0932, "step": 10815 }, { "epoch": 1.09, "grad_norm": 39.96884044404456, "learning_rate": 8.028076726130152e-06, "loss": 1.037, "step": 10820 }, { "epoch": 1.09, "grad_norm": 27.79746347759069, "learning_rate": 8.0257422501498e-06, "loss": 1.0445, "step": 10825 }, { "epoch": 1.09, "grad_norm": 41.14827167125988, "learning_rate": 8.023406733014432e-06, "loss": 1.12, "step": 10830 }, { "epoch": 1.09, "grad_norm": 41.22766937081543, "learning_rate": 8.021070175527693e-06, "loss": 1.0478, "step": 10835 }, { "epoch": 1.09, "grad_norm": 8.38343686690903, "learning_rate": 8.018732578493593e-06, "loss": 1.0177, "step": 10840 }, { "epoch": 1.09, "grad_norm": 7.7279491440924115, "learning_rate": 8.016393942716495e-06, "loss": 1.0698, "step": 10845 }, { "epoch": 1.09, "grad_norm": 10.603661113936377, "learning_rate": 8.014054269001124e-06, "loss": 1.0172, "step": 10850 }, { "epoch": 1.09, "grad_norm": 6.832556522153212, "learning_rate": 8.011713558152556e-06, "loss": 1.0496, "step": 10855 }, { "epoch": 1.09, "grad_norm": 8.44330576857254, "learning_rate": 8.00937181097623e-06, "loss": 1.0259, "step": 10860 }, { "epoch": 1.1, "grad_norm": 19.961190197259683, "learning_rate": 8.00702902827794e-06, "loss": 1.0294, "step": 10865 }, { "epoch": 1.1, "grad_norm": 7.772105224394243, "learning_rate": 8.004685210863831e-06, "loss": 1.0449, "step": 10870 }, { "epoch": 1.1, "grad_norm": 15.284396898913977, "learning_rate": 8.00234035954041e-06, "loss": 1.0185, "step": 10875 }, { "epoch": 1.1, "grad_norm": 29.785290427174154, "learning_rate": 7.999994475114541e-06, "loss": 0.9921, "step": 10880 }, { "epoch": 1.1, "grad_norm": 13.357731611223983, "learning_rate": 7.997647558393438e-06, "loss": 1.0381, "step": 10885 }, { "epoch": 1.1, "grad_norm": 5.406314279732902, "learning_rate": 7.995299610184673e-06, "loss": 1.0861, "step": 10890 }, { "epoch": 1.1, "grad_norm": 17.625404656509023, "learning_rate": 7.992950631296172e-06, "loss": 1.0584, "step": 10895 }, { "epoch": 1.1, "grad_norm": 34.69791299273681, "learning_rate": 7.990600622536217e-06, "loss": 1.0001, "step": 10900 }, { "epoch": 1.1, "grad_norm": 16.127613634453, "learning_rate": 7.988249584713447e-06, "loss": 1.1006, "step": 10905 }, { "epoch": 1.1, "grad_norm": 13.244903524023991, "learning_rate": 7.985897518636847e-06, "loss": 1.1066, "step": 10910 }, { "epoch": 1.1, "grad_norm": 18.772660505535132, "learning_rate": 7.983544425115763e-06, "loss": 1.0398, "step": 10915 }, { "epoch": 1.1, "grad_norm": 6.461138582086308, "learning_rate": 7.981190304959891e-06, "loss": 1.066, "step": 10920 }, { "epoch": 1.1, "grad_norm": 26.736545490527853, "learning_rate": 7.978835158979285e-06, "loss": 1.0661, "step": 10925 }, { "epoch": 1.1, "grad_norm": 18.0084969505494, "learning_rate": 7.976478987984345e-06, "loss": 1.02, "step": 10930 }, { "epoch": 1.1, "grad_norm": 25.307357719882972, "learning_rate": 7.974121792785826e-06, "loss": 1.0413, "step": 10935 }, { "epoch": 1.1, "grad_norm": 13.887929931927594, "learning_rate": 7.97176357419484e-06, "loss": 1.0631, "step": 10940 }, { "epoch": 1.1, "grad_norm": 8.280734504239222, "learning_rate": 7.969404333022846e-06, "loss": 1.0711, "step": 10945 }, { "epoch": 1.1, "grad_norm": 22.243380183936026, "learning_rate": 7.967044070081658e-06, "loss": 1.0771, "step": 10950 }, { "epoch": 1.1, "grad_norm": 32.605761545212125, "learning_rate": 7.96468278618344e-06, "loss": 1.0772, "step": 10955 }, { "epoch": 1.11, "grad_norm": 10.284422751807178, "learning_rate": 7.962320482140704e-06, "loss": 1.0444, "step": 10960 }, { "epoch": 1.11, "grad_norm": 5.770567621626649, "learning_rate": 7.959957158766323e-06, "loss": 1.0371, "step": 10965 }, { "epoch": 1.11, "grad_norm": 6.864176917653522, "learning_rate": 7.957592816873509e-06, "loss": 1.0122, "step": 10970 }, { "epoch": 1.11, "grad_norm": 9.70443716107041, "learning_rate": 7.95522745727583e-06, "loss": 1.0528, "step": 10975 }, { "epoch": 1.11, "grad_norm": 6.24545368926349, "learning_rate": 7.952861080787209e-06, "loss": 1.0377, "step": 10980 }, { "epoch": 1.11, "grad_norm": 7.757228862705906, "learning_rate": 7.950493688221907e-06, "loss": 1.0669, "step": 10985 }, { "epoch": 1.11, "grad_norm": 12.116188153519241, "learning_rate": 7.948125280394548e-06, "loss": 1.0545, "step": 10990 }, { "epoch": 1.11, "grad_norm": 5.717967247580755, "learning_rate": 7.945755858120095e-06, "loss": 1.0256, "step": 10995 }, { "epoch": 1.11, "grad_norm": 5.836371646522697, "learning_rate": 7.943385422213862e-06, "loss": 1.0441, "step": 11000 }, { "epoch": 1.11, "grad_norm": 30.217546600081967, "learning_rate": 7.941013973491518e-06, "loss": 1.0654, "step": 11005 }, { "epoch": 1.11, "grad_norm": 40.932544472644175, "learning_rate": 7.938641512769077e-06, "loss": 1.0572, "step": 11010 }, { "epoch": 1.11, "grad_norm": 22.72804791336364, "learning_rate": 7.936268040862895e-06, "loss": 1.0351, "step": 11015 }, { "epoch": 1.11, "grad_norm": 15.044052151674721, "learning_rate": 7.933893558589687e-06, "loss": 1.0369, "step": 11020 }, { "epoch": 1.11, "grad_norm": 8.747254479015107, "learning_rate": 7.931518066766506e-06, "loss": 1.0475, "step": 11025 }, { "epoch": 1.11, "grad_norm": 7.632051553686237, "learning_rate": 7.92914156621076e-06, "loss": 1.035, "step": 11030 }, { "epoch": 1.11, "grad_norm": 7.1829450330708955, "learning_rate": 7.926764057740198e-06, "loss": 1.0905, "step": 11035 }, { "epoch": 1.11, "grad_norm": 26.81836768913999, "learning_rate": 7.924385542172919e-06, "loss": 1.0201, "step": 11040 }, { "epoch": 1.11, "grad_norm": 17.98999389891583, "learning_rate": 7.922006020327365e-06, "loss": 1.0039, "step": 11045 }, { "epoch": 1.11, "grad_norm": 21.47333072000413, "learning_rate": 7.919625493022335e-06, "loss": 1.0052, "step": 11050 }, { "epoch": 1.11, "grad_norm": 9.565135655710572, "learning_rate": 7.917243961076959e-06, "loss": 1.0425, "step": 11055 }, { "epoch": 1.12, "grad_norm": 10.493952061391708, "learning_rate": 7.914861425310723e-06, "loss": 1.0527, "step": 11060 }, { "epoch": 1.12, "grad_norm": 15.28996338406018, "learning_rate": 7.912477886543454e-06, "loss": 1.1012, "step": 11065 }, { "epoch": 1.12, "grad_norm": 5.786521467272293, "learning_rate": 7.910093345595326e-06, "loss": 1.064, "step": 11070 }, { "epoch": 1.12, "grad_norm": 17.638360793188944, "learning_rate": 7.907707803286856e-06, "loss": 1.0817, "step": 11075 }, { "epoch": 1.12, "grad_norm": 11.579350487528567, "learning_rate": 7.90532126043891e-06, "loss": 1.0424, "step": 11080 }, { "epoch": 1.12, "grad_norm": 16.179693273682382, "learning_rate": 7.902933717872693e-06, "loss": 1.0705, "step": 11085 }, { "epoch": 1.12, "grad_norm": 18.410102516344413, "learning_rate": 7.900545176409756e-06, "loss": 1.0398, "step": 11090 }, { "epoch": 1.12, "grad_norm": 9.49004821436449, "learning_rate": 7.898155636871995e-06, "loss": 0.9996, "step": 11095 }, { "epoch": 1.12, "grad_norm": 12.33850196492309, "learning_rate": 7.895765100081646e-06, "loss": 1.0006, "step": 11100 }, { "epoch": 1.12, "grad_norm": 10.626847004913168, "learning_rate": 7.893373566861292e-06, "loss": 1.0345, "step": 11105 }, { "epoch": 1.12, "grad_norm": 7.446076591813217, "learning_rate": 7.890981038033859e-06, "loss": 1.0387, "step": 11110 }, { "epoch": 1.12, "grad_norm": 8.060957710750417, "learning_rate": 7.88858751442261e-06, "loss": 1.026, "step": 11115 }, { "epoch": 1.12, "grad_norm": 7.228268268058988, "learning_rate": 7.886192996851157e-06, "loss": 1.0508, "step": 11120 }, { "epoch": 1.12, "grad_norm": 7.8075659425069, "learning_rate": 7.88379748614345e-06, "loss": 1.077, "step": 11125 }, { "epoch": 1.12, "grad_norm": 6.819613988104902, "learning_rate": 7.881400983123781e-06, "loss": 1.0307, "step": 11130 }, { "epoch": 1.12, "grad_norm": 9.468776605044408, "learning_rate": 7.879003488616788e-06, "loss": 1.0522, "step": 11135 }, { "epoch": 1.12, "grad_norm": 6.1731817046492745, "learning_rate": 7.876605003447443e-06, "loss": 1.0664, "step": 11140 }, { "epoch": 1.12, "grad_norm": 5.531644225900602, "learning_rate": 7.874205528441066e-06, "loss": 1.0354, "step": 11145 }, { "epoch": 1.12, "grad_norm": 13.930059962381911, "learning_rate": 7.871805064423308e-06, "loss": 1.0302, "step": 11150 }, { "epoch": 1.12, "grad_norm": 23.65862826045937, "learning_rate": 7.869403612220174e-06, "loss": 1.0645, "step": 11155 }, { "epoch": 1.13, "grad_norm": 14.90337054778474, "learning_rate": 7.867001172657995e-06, "loss": 1.0685, "step": 11160 }, { "epoch": 1.13, "grad_norm": 17.389451509530925, "learning_rate": 7.864597746563453e-06, "loss": 1.0228, "step": 11165 }, { "epoch": 1.13, "grad_norm": 19.688390048504406, "learning_rate": 7.862193334763562e-06, "loss": 1.0236, "step": 11170 }, { "epoch": 1.13, "grad_norm": 15.867171483942066, "learning_rate": 7.859787938085677e-06, "loss": 1.098, "step": 11175 }, { "epoch": 1.13, "grad_norm": 9.273633495798792, "learning_rate": 7.857381557357495e-06, "loss": 1.0321, "step": 11180 }, { "epoch": 1.13, "grad_norm": 30.61533719738187, "learning_rate": 7.854974193407047e-06, "loss": 1.0454, "step": 11185 }, { "epoch": 1.13, "grad_norm": 18.968282250852365, "learning_rate": 7.852565847062706e-06, "loss": 1.0331, "step": 11190 }, { "epoch": 1.13, "grad_norm": 28.827140133606875, "learning_rate": 7.850156519153183e-06, "loss": 1.024, "step": 11195 }, { "epoch": 1.13, "grad_norm": 36.740794689535356, "learning_rate": 7.847746210507522e-06, "loss": 1.099, "step": 11200 }, { "epoch": 1.13, "grad_norm": 44.494900973755506, "learning_rate": 7.845334921955111e-06, "loss": 1.0322, "step": 11205 }, { "epoch": 1.13, "grad_norm": 42.45845432983005, "learning_rate": 7.842922654325672e-06, "loss": 1.05, "step": 11210 }, { "epoch": 1.13, "grad_norm": 7.523642147761985, "learning_rate": 7.84050940844926e-06, "loss": 1.0771, "step": 11215 }, { "epoch": 1.13, "grad_norm": 21.394059843462124, "learning_rate": 7.838095185156276e-06, "loss": 1.0405, "step": 11220 }, { "epoch": 1.13, "grad_norm": 9.283677153672595, "learning_rate": 7.83567998527745e-06, "loss": 1.0834, "step": 11225 }, { "epoch": 1.13, "grad_norm": 28.72774136482472, "learning_rate": 7.833263809643848e-06, "loss": 1.0269, "step": 11230 }, { "epoch": 1.13, "grad_norm": 19.66660259770272, "learning_rate": 7.830846659086876e-06, "loss": 1.0759, "step": 11235 }, { "epoch": 1.13, "grad_norm": 11.813658613230237, "learning_rate": 7.82842853443827e-06, "loss": 1.0506, "step": 11240 }, { "epoch": 1.13, "grad_norm": 7.133952082068999, "learning_rate": 7.826009436530109e-06, "loss": 1.0687, "step": 11245 }, { "epoch": 1.13, "grad_norm": 7.930521893875791, "learning_rate": 7.823589366194799e-06, "loss": 1.043, "step": 11250 }, { "epoch": 1.13, "grad_norm": 9.752854297463534, "learning_rate": 7.821168324265088e-06, "loss": 1.0623, "step": 11255 }, { "epoch": 1.14, "grad_norm": 17.145263152954865, "learning_rate": 7.818746311574047e-06, "loss": 1.0475, "step": 11260 }, { "epoch": 1.14, "grad_norm": 7.308207081197901, "learning_rate": 7.816323328955094e-06, "loss": 1.0341, "step": 11265 }, { "epoch": 1.14, "grad_norm": 5.357752176077241, "learning_rate": 7.813899377241973e-06, "loss": 1.0696, "step": 11270 }, { "epoch": 1.14, "grad_norm": 35.70239091708709, "learning_rate": 7.811474457268765e-06, "loss": 1.0979, "step": 11275 }, { "epoch": 1.14, "grad_norm": 9.081859081902147, "learning_rate": 7.809048569869881e-06, "loss": 1.018, "step": 11280 }, { "epoch": 1.14, "grad_norm": 12.144620324513742, "learning_rate": 7.806621715880066e-06, "loss": 1.0702, "step": 11285 }, { "epoch": 1.14, "grad_norm": 6.676098661137126, "learning_rate": 7.804193896134402e-06, "loss": 1.0519, "step": 11290 }, { "epoch": 1.14, "grad_norm": 13.957609131873946, "learning_rate": 7.801765111468295e-06, "loss": 1.065, "step": 11295 }, { "epoch": 1.14, "grad_norm": 30.804669147183866, "learning_rate": 7.799335362717488e-06, "loss": 1.0803, "step": 11300 }, { "epoch": 1.14, "grad_norm": 8.972976576259954, "learning_rate": 7.796904650718058e-06, "loss": 1.0595, "step": 11305 }, { "epoch": 1.14, "grad_norm": 14.42614691099274, "learning_rate": 7.794472976306409e-06, "loss": 1.1013, "step": 11310 }, { "epoch": 1.14, "grad_norm": 7.3877898822812, "learning_rate": 7.792040340319275e-06, "loss": 1.0484, "step": 11315 }, { "epoch": 1.14, "grad_norm": 6.5635085577917245, "learning_rate": 7.78960674359373e-06, "loss": 1.0075, "step": 11320 }, { "epoch": 1.14, "grad_norm": 6.7544240823692085, "learning_rate": 7.787172186967169e-06, "loss": 1.0101, "step": 11325 }, { "epoch": 1.14, "grad_norm": 12.521578068619176, "learning_rate": 7.784736671277318e-06, "loss": 1.0461, "step": 11330 }, { "epoch": 1.14, "grad_norm": 7.301669315226013, "learning_rate": 7.782300197362241e-06, "loss": 0.9991, "step": 11335 }, { "epoch": 1.14, "grad_norm": 10.220694186657157, "learning_rate": 7.77986276606032e-06, "loss": 1.0877, "step": 11340 }, { "epoch": 1.14, "grad_norm": 7.972744230181513, "learning_rate": 7.777424378210279e-06, "loss": 1.0245, "step": 11345 }, { "epoch": 1.14, "grad_norm": 15.665389124617688, "learning_rate": 7.77498503465116e-06, "loss": 1.0289, "step": 11350 }, { "epoch": 1.14, "grad_norm": 22.08979678059866, "learning_rate": 7.772544736222345e-06, "loss": 1.0317, "step": 11355 }, { "epoch": 1.15, "grad_norm": 26.602186669253843, "learning_rate": 7.77010348376353e-06, "loss": 1.024, "step": 11360 }, { "epoch": 1.15, "grad_norm": 27.038319449125172, "learning_rate": 7.767661278114754e-06, "loss": 1.015, "step": 11365 }, { "epoch": 1.15, "grad_norm": 32.94987066921108, "learning_rate": 7.765218120116376e-06, "loss": 1.0769, "step": 11370 }, { "epoch": 1.15, "grad_norm": 23.66418718772051, "learning_rate": 7.762774010609083e-06, "loss": 1.0451, "step": 11375 }, { "epoch": 1.15, "grad_norm": 32.45744837740459, "learning_rate": 7.760328950433892e-06, "loss": 1.0282, "step": 11380 }, { "epoch": 1.15, "grad_norm": 29.766784519041305, "learning_rate": 7.757882940432145e-06, "loss": 1.0397, "step": 11385 }, { "epoch": 1.15, "grad_norm": 6.7197884991075405, "learning_rate": 7.755435981445513e-06, "loss": 1.0311, "step": 11390 }, { "epoch": 1.15, "grad_norm": 23.359189631838863, "learning_rate": 7.752988074315991e-06, "loss": 1.0214, "step": 11395 }, { "epoch": 1.15, "grad_norm": 23.444606363008706, "learning_rate": 7.750539219885902e-06, "loss": 1.0268, "step": 11400 }, { "epoch": 1.15, "grad_norm": 14.199249449692479, "learning_rate": 7.748089418997895e-06, "loss": 1.018, "step": 11405 }, { "epoch": 1.15, "grad_norm": 10.347617826238135, "learning_rate": 7.745638672494944e-06, "loss": 1.0613, "step": 11410 }, { "epoch": 1.15, "grad_norm": 8.417151079608356, "learning_rate": 7.743186981220348e-06, "loss": 1.061, "step": 11415 }, { "epoch": 1.15, "grad_norm": 12.959055237876981, "learning_rate": 7.740734346017733e-06, "loss": 1.056, "step": 11420 }, { "epoch": 1.15, "grad_norm": 15.176059141528643, "learning_rate": 7.738280767731045e-06, "loss": 0.9817, "step": 11425 }, { "epoch": 1.15, "grad_norm": 6.2690785308256105, "learning_rate": 7.73582624720456e-06, "loss": 1.0171, "step": 11430 }, { "epoch": 1.15, "grad_norm": 8.486811724837144, "learning_rate": 7.733370785282879e-06, "loss": 1.0282, "step": 11435 }, { "epoch": 1.15, "grad_norm": 11.823763052543553, "learning_rate": 7.730914382810919e-06, "loss": 1.0827, "step": 11440 }, { "epoch": 1.15, "grad_norm": 10.257250829565304, "learning_rate": 7.72845704063393e-06, "loss": 1.0779, "step": 11445 }, { "epoch": 1.15, "grad_norm": 9.893179851041156, "learning_rate": 7.725998759597478e-06, "loss": 1.0631, "step": 11450 }, { "epoch": 1.15, "grad_norm": 5.05310239761301, "learning_rate": 7.723539540547455e-06, "loss": 1.087, "step": 11455 }, { "epoch": 1.16, "grad_norm": 7.568081314755192, "learning_rate": 7.72107938433008e-06, "loss": 1.0407, "step": 11460 }, { "epoch": 1.16, "grad_norm": 12.30451232867941, "learning_rate": 7.718618291791887e-06, "loss": 1.0147, "step": 11465 }, { "epoch": 1.16, "grad_norm": 5.581416562349123, "learning_rate": 7.716156263779736e-06, "loss": 1.0496, "step": 11470 }, { "epoch": 1.16, "grad_norm": 18.265076701434122, "learning_rate": 7.71369330114081e-06, "loss": 1.0413, "step": 11475 }, { "epoch": 1.16, "grad_norm": 6.449712305653418, "learning_rate": 7.71122940472261e-06, "loss": 1.0169, "step": 11480 }, { "epoch": 1.16, "grad_norm": 6.470154640544608, "learning_rate": 7.708764575372962e-06, "loss": 1.0046, "step": 11485 }, { "epoch": 1.16, "grad_norm": 11.209267270155115, "learning_rate": 7.70629881394001e-06, "loss": 1.0392, "step": 11490 }, { "epoch": 1.16, "grad_norm": 14.664536173796234, "learning_rate": 7.703832121272221e-06, "loss": 1.0161, "step": 11495 }, { "epoch": 1.16, "grad_norm": 6.939112700472921, "learning_rate": 7.701364498218381e-06, "loss": 1.0694, "step": 11500 }, { "epoch": 1.16, "grad_norm": 17.043229302231182, "learning_rate": 7.698895945627597e-06, "loss": 0.9974, "step": 11505 }, { "epoch": 1.16, "grad_norm": 23.990478258309913, "learning_rate": 7.696426464349299e-06, "loss": 1.0676, "step": 11510 }, { "epoch": 1.16, "grad_norm": 25.65013648668628, "learning_rate": 7.693956055233227e-06, "loss": 1.068, "step": 11515 }, { "epoch": 1.16, "grad_norm": 10.336450698290124, "learning_rate": 7.69148471912945e-06, "loss": 1.0261, "step": 11520 }, { "epoch": 1.16, "grad_norm": 7.209808655252218, "learning_rate": 7.689012456888352e-06, "loss": 1.0366, "step": 11525 }, { "epoch": 1.16, "grad_norm": 8.653020039541587, "learning_rate": 7.686539269360636e-06, "loss": 1.0248, "step": 11530 }, { "epoch": 1.16, "grad_norm": 21.89442345749155, "learning_rate": 7.684065157397323e-06, "loss": 1.0303, "step": 11535 }, { "epoch": 1.16, "grad_norm": 13.183307252692156, "learning_rate": 7.681590121849754e-06, "loss": 1.0576, "step": 11540 }, { "epoch": 1.16, "grad_norm": 10.55522118711657, "learning_rate": 7.679114163569584e-06, "loss": 0.9881, "step": 11545 }, { "epoch": 1.16, "grad_norm": 8.645009341875197, "learning_rate": 7.67663728340879e-06, "loss": 1.0538, "step": 11550 }, { "epoch": 1.16, "grad_norm": 10.288234650084943, "learning_rate": 7.674159482219663e-06, "loss": 1.0916, "step": 11555 }, { "epoch": 1.17, "grad_norm": 6.323944478111165, "learning_rate": 7.671680760854812e-06, "loss": 1.0389, "step": 11560 }, { "epoch": 1.17, "grad_norm": 10.28682974878779, "learning_rate": 7.669201120167164e-06, "loss": 1.0077, "step": 11565 }, { "epoch": 1.17, "grad_norm": 12.965117672513008, "learning_rate": 7.666720561009959e-06, "loss": 1.0556, "step": 11570 }, { "epoch": 1.17, "grad_norm": 12.879913896859524, "learning_rate": 7.664239084236756e-06, "loss": 1.0431, "step": 11575 }, { "epoch": 1.17, "grad_norm": 10.09897871614661, "learning_rate": 7.661756690701429e-06, "loss": 1.0235, "step": 11580 }, { "epoch": 1.17, "grad_norm": 6.193103071161919, "learning_rate": 7.659273381258165e-06, "loss": 1.0652, "step": 11585 }, { "epoch": 1.17, "grad_norm": 36.002717652272395, "learning_rate": 7.656789156761473e-06, "loss": 1.0214, "step": 11590 }, { "epoch": 1.17, "grad_norm": 27.389027634831674, "learning_rate": 7.654304018066169e-06, "loss": 1.0712, "step": 11595 }, { "epoch": 1.17, "grad_norm": 29.295790763413898, "learning_rate": 7.651817966027387e-06, "loss": 1.0563, "step": 11600 }, { "epoch": 1.17, "grad_norm": 78.5986973673433, "learning_rate": 7.649331001500576e-06, "loss": 1.0787, "step": 11605 }, { "epoch": 1.17, "grad_norm": 20.747085214926916, "learning_rate": 7.646843125341498e-06, "loss": 1.0394, "step": 11610 }, { "epoch": 1.17, "grad_norm": 42.82699540343177, "learning_rate": 7.644354338406229e-06, "loss": 1.0461, "step": 11615 }, { "epoch": 1.17, "grad_norm": 38.36576292944948, "learning_rate": 7.641864641551157e-06, "loss": 1.0601, "step": 11620 }, { "epoch": 1.17, "grad_norm": 42.49809837717228, "learning_rate": 7.639374035632984e-06, "loss": 1.0101, "step": 11625 }, { "epoch": 1.17, "grad_norm": 21.982321503938692, "learning_rate": 7.636882521508728e-06, "loss": 1.0013, "step": 11630 }, { "epoch": 1.17, "grad_norm": 31.605933081763478, "learning_rate": 7.634390100035716e-06, "loss": 1.0223, "step": 11635 }, { "epoch": 1.17, "grad_norm": 11.527420466489241, "learning_rate": 7.631896772071585e-06, "loss": 1.0067, "step": 11640 }, { "epoch": 1.17, "grad_norm": 57.497324855910215, "learning_rate": 7.629402538474288e-06, "loss": 1.0984, "step": 11645 }, { "epoch": 1.17, "grad_norm": 70.73824629974315, "learning_rate": 7.626907400102089e-06, "loss": 1.0807, "step": 11650 }, { "epoch": 1.18, "grad_norm": 38.39492524899637, "learning_rate": 7.624411357813564e-06, "loss": 1.021, "step": 11655 }, { "epoch": 1.18, "grad_norm": 23.23834490509645, "learning_rate": 7.621914412467597e-06, "loss": 1.0724, "step": 11660 }, { "epoch": 1.18, "grad_norm": 13.626825151703164, "learning_rate": 7.6194165649233834e-06, "loss": 1.0526, "step": 11665 }, { "epoch": 1.18, "grad_norm": 13.050704659349623, "learning_rate": 7.616917816040433e-06, "loss": 1.058, "step": 11670 }, { "epoch": 1.18, "grad_norm": 25.018079860661217, "learning_rate": 7.614418166678562e-06, "loss": 1.0797, "step": 11675 }, { "epoch": 1.18, "grad_norm": 19.988606047033, "learning_rate": 7.611917617697896e-06, "loss": 1.0386, "step": 11680 }, { "epoch": 1.18, "grad_norm": 8.389834101700366, "learning_rate": 7.609416169958872e-06, "loss": 1.0451, "step": 11685 }, { "epoch": 1.18, "grad_norm": 14.369819552441536, "learning_rate": 7.606913824322238e-06, "loss": 1.013, "step": 11690 }, { "epoch": 1.18, "grad_norm": 14.873428629131478, "learning_rate": 7.604410581649045e-06, "loss": 1.0511, "step": 11695 }, { "epoch": 1.18, "grad_norm": 6.226147562203659, "learning_rate": 7.601906442800658e-06, "loss": 1.0711, "step": 11700 }, { "epoch": 1.18, "grad_norm": 13.70917967690263, "learning_rate": 7.599401408638751e-06, "loss": 1.0148, "step": 11705 }, { "epoch": 1.18, "grad_norm": 6.286083740875113, "learning_rate": 7.5968954800253015e-06, "loss": 1.0611, "step": 11710 }, { "epoch": 1.18, "grad_norm": 15.715994147646205, "learning_rate": 7.594388657822596e-06, "loss": 1.0474, "step": 11715 }, { "epoch": 1.18, "grad_norm": 8.514305533684258, "learning_rate": 7.591880942893234e-06, "loss": 1.0829, "step": 11720 }, { "epoch": 1.18, "grad_norm": 13.13120395760116, "learning_rate": 7.5893723361001135e-06, "loss": 1.0444, "step": 11725 }, { "epoch": 1.18, "grad_norm": 11.178578396505047, "learning_rate": 7.586862838306445e-06, "loss": 1.0816, "step": 11730 }, { "epoch": 1.18, "grad_norm": 12.746854313765677, "learning_rate": 7.5843524503757445e-06, "loss": 0.9941, "step": 11735 }, { "epoch": 1.18, "grad_norm": 11.078364992056647, "learning_rate": 7.581841173171835e-06, "loss": 1.0006, "step": 11740 }, { "epoch": 1.18, "grad_norm": 12.740739798927343, "learning_rate": 7.579329007558843e-06, "loss": 1.0418, "step": 11745 }, { "epoch": 1.18, "grad_norm": 6.539722370722517, "learning_rate": 7.576815954401204e-06, "loss": 1.0112, "step": 11750 }, { "epoch": 1.19, "grad_norm": 5.299343963705468, "learning_rate": 7.574302014563656e-06, "loss": 1.0479, "step": 11755 }, { "epoch": 1.19, "grad_norm": 29.875561566756147, "learning_rate": 7.571787188911243e-06, "loss": 1.0701, "step": 11760 }, { "epoch": 1.19, "grad_norm": 25.05908382103154, "learning_rate": 7.569271478309315e-06, "loss": 1.0341, "step": 11765 }, { "epoch": 1.19, "grad_norm": 15.601761644761815, "learning_rate": 7.566754883623525e-06, "loss": 1.0087, "step": 11770 }, { "epoch": 1.19, "grad_norm": 17.813865904730815, "learning_rate": 7.564237405719831e-06, "loss": 1.004, "step": 11775 }, { "epoch": 1.19, "grad_norm": 17.25236748256092, "learning_rate": 7.561719045464495e-06, "loss": 1.081, "step": 11780 }, { "epoch": 1.19, "grad_norm": 27.333124687407285, "learning_rate": 7.5591998037240825e-06, "loss": 1.0157, "step": 11785 }, { "epoch": 1.19, "grad_norm": 22.089282057926013, "learning_rate": 7.556679681365462e-06, "loss": 1.0206, "step": 11790 }, { "epoch": 1.19, "grad_norm": 39.214568937531084, "learning_rate": 7.554158679255805e-06, "loss": 1.0419, "step": 11795 }, { "epoch": 1.19, "grad_norm": 26.746819599838243, "learning_rate": 7.551636798262585e-06, "loss": 1.0797, "step": 11800 }, { "epoch": 1.19, "grad_norm": 25.734360047448632, "learning_rate": 7.549114039253581e-06, "loss": 1.0184, "step": 11805 }, { "epoch": 1.19, "grad_norm": 45.72851511737219, "learning_rate": 7.546590403096871e-06, "loss": 1.0573, "step": 11810 }, { "epoch": 1.19, "grad_norm": 9.937427370671701, "learning_rate": 7.544065890660834e-06, "loss": 1.0428, "step": 11815 }, { "epoch": 1.19, "grad_norm": 56.33537705264059, "learning_rate": 7.541540502814154e-06, "loss": 1.0469, "step": 11820 }, { "epoch": 1.19, "grad_norm": 33.71330192354937, "learning_rate": 7.539014240425816e-06, "loss": 1.0631, "step": 11825 }, { "epoch": 1.19, "grad_norm": 8.407018181834788, "learning_rate": 7.536487104365102e-06, "loss": 1.0612, "step": 11830 }, { "epoch": 1.19, "grad_norm": 29.552370346206306, "learning_rate": 7.533959095501597e-06, "loss": 1.0555, "step": 11835 }, { "epoch": 1.19, "grad_norm": 9.965135513648303, "learning_rate": 7.531430214705189e-06, "loss": 1.0246, "step": 11840 }, { "epoch": 1.19, "grad_norm": 37.11431532290853, "learning_rate": 7.528900462846062e-06, "loss": 1.0686, "step": 11845 }, { "epoch": 1.19, "grad_norm": 11.840035222559349, "learning_rate": 7.526369840794699e-06, "loss": 1.0106, "step": 11850 }, { "epoch": 1.2, "grad_norm": 6.281527677159895, "learning_rate": 7.523838349421889e-06, "loss": 1.028, "step": 11855 }, { "epoch": 1.2, "grad_norm": 12.020245250646116, "learning_rate": 7.5213059895987124e-06, "loss": 1.0179, "step": 11860 }, { "epoch": 1.2, "grad_norm": 10.354434431670976, "learning_rate": 7.518772762196553e-06, "loss": 1.0332, "step": 11865 }, { "epoch": 1.2, "grad_norm": 25.30003234851166, "learning_rate": 7.516238668087092e-06, "loss": 1.0493, "step": 11870 }, { "epoch": 1.2, "grad_norm": 14.778100727598297, "learning_rate": 7.513703708142309e-06, "loss": 1.0585, "step": 11875 }, { "epoch": 1.2, "grad_norm": 5.594776106824048, "learning_rate": 7.5111678832344806e-06, "loss": 1.0452, "step": 11880 }, { "epoch": 1.2, "grad_norm": 19.96266800576846, "learning_rate": 7.508631194236182e-06, "loss": 1.0303, "step": 11885 }, { "epoch": 1.2, "grad_norm": 10.193457442652853, "learning_rate": 7.506093642020286e-06, "loss": 1.0045, "step": 11890 }, { "epoch": 1.2, "grad_norm": 7.817856979567341, "learning_rate": 7.50355522745996e-06, "loss": 1.02, "step": 11895 }, { "epoch": 1.2, "grad_norm": 14.320404713505287, "learning_rate": 7.501015951428673e-06, "loss": 1.0394, "step": 11900 }, { "epoch": 1.2, "grad_norm": 5.896707491370909, "learning_rate": 7.498475814800184e-06, "loss": 1.0197, "step": 11905 }, { "epoch": 1.2, "grad_norm": 23.15339531944063, "learning_rate": 7.495934818448555e-06, "loss": 1.0546, "step": 11910 }, { "epoch": 1.2, "grad_norm": 21.27469954026058, "learning_rate": 7.493392963248138e-06, "loss": 1.0375, "step": 11915 }, { "epoch": 1.2, "grad_norm": 18.41665952623711, "learning_rate": 7.490850250073585e-06, "loss": 1.0324, "step": 11920 }, { "epoch": 1.2, "grad_norm": 20.31198194365891, "learning_rate": 7.488306679799838e-06, "loss": 1.0249, "step": 11925 }, { "epoch": 1.2, "grad_norm": 7.6231025150440415, "learning_rate": 7.4857622533021415e-06, "loss": 1.014, "step": 11930 }, { "epoch": 1.2, "grad_norm": 16.329948465900326, "learning_rate": 7.483216971456027e-06, "loss": 1.0423, "step": 11935 }, { "epoch": 1.2, "grad_norm": 10.185718359298546, "learning_rate": 7.480670835137324e-06, "loss": 1.0605, "step": 11940 }, { "epoch": 1.2, "grad_norm": 11.293068193481355, "learning_rate": 7.478123845222155e-06, "loss": 0.9933, "step": 11945 }, { "epoch": 1.2, "grad_norm": 21.82002169605213, "learning_rate": 7.47557600258694e-06, "loss": 1.0965, "step": 11950 }, { "epoch": 1.21, "grad_norm": 21.896520825672138, "learning_rate": 7.4730273081083845e-06, "loss": 1.0227, "step": 11955 }, { "epoch": 1.21, "grad_norm": 14.134215363044147, "learning_rate": 7.470477762663495e-06, "loss": 0.9876, "step": 11960 }, { "epoch": 1.21, "grad_norm": 5.438427314214314, "learning_rate": 7.467927367129568e-06, "loss": 1.0533, "step": 11965 }, { "epoch": 1.21, "grad_norm": 6.797106966731714, "learning_rate": 7.4653761223841906e-06, "loss": 1.0289, "step": 11970 }, { "epoch": 1.21, "grad_norm": 6.665798452998185, "learning_rate": 7.462824029305243e-06, "loss": 1.0613, "step": 11975 }, { "epoch": 1.21, "grad_norm": 7.152409531921233, "learning_rate": 7.460271088770902e-06, "loss": 1.0811, "step": 11980 }, { "epoch": 1.21, "grad_norm": 12.889424672209163, "learning_rate": 7.457717301659626e-06, "loss": 1.0123, "step": 11985 }, { "epoch": 1.21, "grad_norm": 14.443733425576188, "learning_rate": 7.455162668850175e-06, "loss": 1.0179, "step": 11990 }, { "epoch": 1.21, "grad_norm": 9.578724563179945, "learning_rate": 7.452607191221596e-06, "loss": 1.043, "step": 11995 }, { "epoch": 1.21, "grad_norm": 20.593865569307333, "learning_rate": 7.450050869653224e-06, "loss": 0.9908, "step": 12000 }, { "epoch": 1.21, "grad_norm": 42.410456039081524, "learning_rate": 7.4474937050246865e-06, "loss": 1.0648, "step": 12005 }, { "epoch": 1.21, "grad_norm": 32.589676758330064, "learning_rate": 7.444935698215905e-06, "loss": 1.0305, "step": 12010 }, { "epoch": 1.21, "grad_norm": 16.916616819955006, "learning_rate": 7.442376850107083e-06, "loss": 1.0317, "step": 12015 }, { "epoch": 1.21, "grad_norm": 6.563970452745615, "learning_rate": 7.439817161578721e-06, "loss": 1.0074, "step": 12020 }, { "epoch": 1.21, "grad_norm": 14.369544319819505, "learning_rate": 7.437256633511603e-06, "loss": 1.0282, "step": 12025 }, { "epoch": 1.21, "grad_norm": 81.02214877177478, "learning_rate": 7.434695266786807e-06, "loss": 1.0539, "step": 12030 }, { "epoch": 1.21, "grad_norm": 63.362129758959256, "learning_rate": 7.432133062285693e-06, "loss": 1.0339, "step": 12035 }, { "epoch": 1.21, "grad_norm": 18.466818473382034, "learning_rate": 7.429570020889916e-06, "loss": 1.123, "step": 12040 }, { "epoch": 1.21, "grad_norm": 117.99242146591413, "learning_rate": 7.4270061434814156e-06, "loss": 1.1451, "step": 12045 }, { "epoch": 1.21, "grad_norm": 26.428118691401227, "learning_rate": 7.424441430942418e-06, "loss": 1.062, "step": 12050 }, { "epoch": 1.22, "grad_norm": 61.68523063344123, "learning_rate": 7.421875884155442e-06, "loss": 1.0528, "step": 12055 }, { "epoch": 1.22, "grad_norm": 16.310893704321423, "learning_rate": 7.419309504003286e-06, "loss": 1.009, "step": 12060 }, { "epoch": 1.22, "grad_norm": 12.134186371943398, "learning_rate": 7.416742291369041e-06, "loss": 1.092, "step": 12065 }, { "epoch": 1.22, "grad_norm": 8.07378676429573, "learning_rate": 7.414174247136081e-06, "loss": 1.0399, "step": 12070 }, { "epoch": 1.22, "grad_norm": 8.895453691924525, "learning_rate": 7.411605372188068e-06, "loss": 1.057, "step": 12075 }, { "epoch": 1.22, "grad_norm": 11.816493232257297, "learning_rate": 7.409035667408951e-06, "loss": 1.0485, "step": 12080 }, { "epoch": 1.22, "grad_norm": 10.004634418174367, "learning_rate": 7.406465133682961e-06, "loss": 1.0245, "step": 12085 }, { "epoch": 1.22, "grad_norm": 10.262160758749364, "learning_rate": 7.403893771894618e-06, "loss": 1.0168, "step": 12090 }, { "epoch": 1.22, "grad_norm": 83.46486853809938, "learning_rate": 7.401321582928722e-06, "loss": 1.0558, "step": 12095 }, { "epoch": 1.22, "grad_norm": 45.59415566925905, "learning_rate": 7.398748567670364e-06, "loss": 1.0833, "step": 12100 }, { "epoch": 1.22, "grad_norm": 10.413525481182806, "learning_rate": 7.396174727004915e-06, "loss": 1.0297, "step": 12105 }, { "epoch": 1.22, "grad_norm": 36.060354029203495, "learning_rate": 7.393600061818031e-06, "loss": 0.9854, "step": 12110 }, { "epoch": 1.22, "grad_norm": 8.742621477803867, "learning_rate": 7.391024572995652e-06, "loss": 1.0082, "step": 12115 }, { "epoch": 1.22, "grad_norm": 9.899628942463798, "learning_rate": 7.388448261424002e-06, "loss": 1.026, "step": 12120 }, { "epoch": 1.22, "grad_norm": 8.47051525142764, "learning_rate": 7.385871127989584e-06, "loss": 1.0435, "step": 12125 }, { "epoch": 1.22, "grad_norm": 8.032898832474729, "learning_rate": 7.383293173579192e-06, "loss": 1.0847, "step": 12130 }, { "epoch": 1.22, "grad_norm": 8.214473601243663, "learning_rate": 7.380714399079894e-06, "loss": 1.0917, "step": 12135 }, { "epoch": 1.22, "grad_norm": 7.741330271398156, "learning_rate": 7.378134805379046e-06, "loss": 1.0164, "step": 12140 }, { "epoch": 1.22, "grad_norm": 7.721926747859045, "learning_rate": 7.375554393364281e-06, "loss": 1.0236, "step": 12145 }, { "epoch": 1.22, "grad_norm": 18.241684309958316, "learning_rate": 7.372973163923521e-06, "loss": 1.0521, "step": 12150 }, { "epoch": 1.23, "grad_norm": 7.768640195269594, "learning_rate": 7.37039111794496e-06, "loss": 1.0152, "step": 12155 }, { "epoch": 1.23, "grad_norm": 16.11180816202643, "learning_rate": 7.3678082563170795e-06, "loss": 1.0445, "step": 12160 }, { "epoch": 1.23, "grad_norm": 13.321856476735322, "learning_rate": 7.36522457992864e-06, "loss": 1.0724, "step": 12165 }, { "epoch": 1.23, "grad_norm": 6.679558986447608, "learning_rate": 7.36264008966868e-06, "loss": 1.0118, "step": 12170 }, { "epoch": 1.23, "grad_norm": 9.763045702610519, "learning_rate": 7.360054786426523e-06, "loss": 1.0341, "step": 12175 }, { "epoch": 1.23, "grad_norm": 5.664202828772289, "learning_rate": 7.357468671091769e-06, "loss": 1.0163, "step": 12180 }, { "epoch": 1.23, "grad_norm": 8.474044749820074, "learning_rate": 7.354881744554295e-06, "loss": 1.0537, "step": 12185 }, { "epoch": 1.23, "grad_norm": 7.806941862617317, "learning_rate": 7.352294007704264e-06, "loss": 1.0285, "step": 12190 }, { "epoch": 1.23, "grad_norm": 11.439091677523939, "learning_rate": 7.349705461432111e-06, "loss": 1.0277, "step": 12195 }, { "epoch": 1.23, "grad_norm": 5.9110356966133075, "learning_rate": 7.347116106628552e-06, "loss": 1.0517, "step": 12200 }, { "epoch": 1.23, "grad_norm": 21.628650555715147, "learning_rate": 7.344525944184583e-06, "loss": 1.0052, "step": 12205 }, { "epoch": 1.23, "grad_norm": 16.59051396975955, "learning_rate": 7.341934974991475e-06, "loss": 1.0597, "step": 12210 }, { "epoch": 1.23, "grad_norm": 12.506905837854545, "learning_rate": 7.339343199940781e-06, "loss": 1.0487, "step": 12215 }, { "epoch": 1.23, "grad_norm": 29.591091767578742, "learning_rate": 7.336750619924324e-06, "loss": 1.0879, "step": 12220 }, { "epoch": 1.23, "grad_norm": 18.500775694028768, "learning_rate": 7.3341572358342106e-06, "loss": 0.9942, "step": 12225 }, { "epoch": 1.23, "grad_norm": 9.698100785554281, "learning_rate": 7.331563048562823e-06, "loss": 0.995, "step": 12230 }, { "epoch": 1.23, "grad_norm": 8.678619222102471, "learning_rate": 7.328968059002816e-06, "loss": 1.0036, "step": 12235 }, { "epoch": 1.23, "grad_norm": 21.717058220261013, "learning_rate": 7.326372268047126e-06, "loss": 1.0141, "step": 12240 }, { "epoch": 1.23, "grad_norm": 32.58218285532491, "learning_rate": 7.32377567658896e-06, "loss": 1.0866, "step": 12245 }, { "epoch": 1.24, "grad_norm": 22.211976172679357, "learning_rate": 7.321178285521803e-06, "loss": 1.048, "step": 12250 }, { "epoch": 1.24, "grad_norm": 8.174005536221587, "learning_rate": 7.318580095739414e-06, "loss": 1.0712, "step": 12255 }, { "epoch": 1.24, "grad_norm": 8.402187876841412, "learning_rate": 7.31598110813583e-06, "loss": 1.0803, "step": 12260 }, { "epoch": 1.24, "grad_norm": 12.961387598255712, "learning_rate": 7.313381323605358e-06, "loss": 1.0511, "step": 12265 }, { "epoch": 1.24, "grad_norm": 10.117317501652218, "learning_rate": 7.310780743042582e-06, "loss": 1.0348, "step": 12270 }, { "epoch": 1.24, "grad_norm": 14.799743957011334, "learning_rate": 7.30817936734236e-06, "loss": 1.0166, "step": 12275 }, { "epoch": 1.24, "grad_norm": 6.5633985549972955, "learning_rate": 7.30557719739982e-06, "loss": 1.0362, "step": 12280 }, { "epoch": 1.24, "grad_norm": 9.630547954930222, "learning_rate": 7.302974234110371e-06, "loss": 1.047, "step": 12285 }, { "epoch": 1.24, "grad_norm": 9.977244441347235, "learning_rate": 7.300370478369687e-06, "loss": 1.0569, "step": 12290 }, { "epoch": 1.24, "grad_norm": 12.767745576845352, "learning_rate": 7.297765931073718e-06, "loss": 1.0362, "step": 12295 }, { "epoch": 1.24, "grad_norm": 15.83769986316688, "learning_rate": 7.295160593118687e-06, "loss": 1.0446, "step": 12300 }, { "epoch": 1.24, "grad_norm": 7.351824272242793, "learning_rate": 7.2925544654010885e-06, "loss": 1.0334, "step": 12305 }, { "epoch": 1.24, "grad_norm": 8.969839115109984, "learning_rate": 7.289947548817687e-06, "loss": 1.0491, "step": 12310 }, { "epoch": 1.24, "grad_norm": 17.136217622195936, "learning_rate": 7.287339844265522e-06, "loss": 1.0099, "step": 12315 }, { "epoch": 1.24, "grad_norm": 7.247329863483515, "learning_rate": 7.284731352641903e-06, "loss": 1.0323, "step": 12320 }, { "epoch": 1.24, "grad_norm": 7.78098142765102, "learning_rate": 7.282122074844404e-06, "loss": 1.0202, "step": 12325 }, { "epoch": 1.24, "grad_norm": 9.18913385981386, "learning_rate": 7.2795120117708804e-06, "loss": 1.0699, "step": 12330 }, { "epoch": 1.24, "grad_norm": 14.796562129763549, "learning_rate": 7.276901164319452e-06, "loss": 1.0205, "step": 12335 }, { "epoch": 1.24, "grad_norm": 11.081581545913867, "learning_rate": 7.274289533388505e-06, "loss": 0.9852, "step": 12340 }, { "epoch": 1.24, "grad_norm": 5.599993939832297, "learning_rate": 7.2716771198767035e-06, "loss": 1.0266, "step": 12345 }, { "epoch": 1.25, "grad_norm": 12.122890189257696, "learning_rate": 7.269063924682974e-06, "loss": 1.001, "step": 12350 }, { "epoch": 1.25, "grad_norm": 8.483029398431478, "learning_rate": 7.266449948706517e-06, "loss": 1.0569, "step": 12355 }, { "epoch": 1.25, "grad_norm": 6.532734085560354, "learning_rate": 7.263835192846795e-06, "loss": 1.0619, "step": 12360 }, { "epoch": 1.25, "grad_norm": 10.057663962553224, "learning_rate": 7.2612196580035465e-06, "loss": 1.0185, "step": 12365 }, { "epoch": 1.25, "grad_norm": 6.063995399514143, "learning_rate": 7.258603345076773e-06, "loss": 1.0223, "step": 12370 }, { "epoch": 1.25, "grad_norm": 20.492031391508416, "learning_rate": 7.255986254966747e-06, "loss": 1.0152, "step": 12375 }, { "epoch": 1.25, "grad_norm": 10.980889415213772, "learning_rate": 7.253368388574004e-06, "loss": 1.0853, "step": 12380 }, { "epoch": 1.25, "grad_norm": 6.932420119536721, "learning_rate": 7.2507497467993515e-06, "loss": 1.0281, "step": 12385 }, { "epoch": 1.25, "grad_norm": 17.442263891628073, "learning_rate": 7.24813033054386e-06, "loss": 1.0653, "step": 12390 }, { "epoch": 1.25, "grad_norm": 19.04890079244909, "learning_rate": 7.2455101407088694e-06, "loss": 1.0582, "step": 12395 }, { "epoch": 1.25, "grad_norm": 10.76606447908117, "learning_rate": 7.242889178195984e-06, "loss": 1.0594, "step": 12400 }, { "epoch": 1.25, "grad_norm": 20.407517494631122, "learning_rate": 7.240267443907074e-06, "loss": 1.0406, "step": 12405 }, { "epoch": 1.25, "grad_norm": 6.844117091500154, "learning_rate": 7.237644938744277e-06, "loss": 1.028, "step": 12410 }, { "epoch": 1.25, "grad_norm": 5.081519520518418, "learning_rate": 7.235021663609995e-06, "loss": 1.04, "step": 12415 }, { "epoch": 1.25, "grad_norm": 10.90657997718108, "learning_rate": 7.232397619406891e-06, "loss": 1.0339, "step": 12420 }, { "epoch": 1.25, "grad_norm": 6.055211671272745, "learning_rate": 7.2297728070379e-06, "loss": 1.0916, "step": 12425 }, { "epoch": 1.25, "grad_norm": 5.292463026715902, "learning_rate": 7.227147227406215e-06, "loss": 1.119, "step": 12430 }, { "epoch": 1.25, "grad_norm": 8.40782131860608, "learning_rate": 7.224520881415296e-06, "loss": 1.0729, "step": 12435 }, { "epoch": 1.25, "grad_norm": 9.861234186251867, "learning_rate": 7.221893769968866e-06, "loss": 1.0636, "step": 12440 }, { "epoch": 1.25, "grad_norm": 5.435769423839574, "learning_rate": 7.219265893970913e-06, "loss": 1.0333, "step": 12445 }, { "epoch": 1.26, "grad_norm": 10.968760917967472, "learning_rate": 7.216637254325685e-06, "loss": 1.0419, "step": 12450 }, { "epoch": 1.26, "grad_norm": 10.53215039683486, "learning_rate": 7.214007851937696e-06, "loss": 1.0314, "step": 12455 }, { "epoch": 1.26, "grad_norm": 12.877044861260606, "learning_rate": 7.21137768771172e-06, "loss": 1.019, "step": 12460 }, { "epoch": 1.26, "grad_norm": 9.287028573377304, "learning_rate": 7.208746762552792e-06, "loss": 0.9964, "step": 12465 }, { "epoch": 1.26, "grad_norm": 11.675474516635187, "learning_rate": 7.206115077366211e-06, "loss": 0.9874, "step": 12470 }, { "epoch": 1.26, "grad_norm": 6.109227137571904, "learning_rate": 7.203482633057542e-06, "loss": 1.0754, "step": 12475 }, { "epoch": 1.26, "grad_norm": 7.617850345781858, "learning_rate": 7.200849430532603e-06, "loss": 1.0351, "step": 12480 }, { "epoch": 1.26, "grad_norm": 9.918311738342016, "learning_rate": 7.198215470697476e-06, "loss": 1.0259, "step": 12485 }, { "epoch": 1.26, "grad_norm": 12.622793750849116, "learning_rate": 7.195580754458506e-06, "loss": 1.0338, "step": 12490 }, { "epoch": 1.26, "grad_norm": 10.284078241578221, "learning_rate": 7.192945282722295e-06, "loss": 0.9951, "step": 12495 }, { "epoch": 1.26, "grad_norm": 5.934648099905459, "learning_rate": 7.190309056395707e-06, "loss": 1.0494, "step": 12500 }, { "epoch": 1.26, "grad_norm": 27.260267090753327, "learning_rate": 7.187672076385866e-06, "loss": 1.0635, "step": 12505 }, { "epoch": 1.26, "grad_norm": 31.740758025053612, "learning_rate": 7.18503434360015e-06, "loss": 1.0887, "step": 12510 }, { "epoch": 1.26, "grad_norm": 7.88891262912335, "learning_rate": 7.182395858946205e-06, "loss": 1.0174, "step": 12515 }, { "epoch": 1.26, "grad_norm": 25.142187362474495, "learning_rate": 7.1797566233319295e-06, "loss": 1.0363, "step": 12520 }, { "epoch": 1.26, "grad_norm": 6.329384696868719, "learning_rate": 7.177116637665481e-06, "loss": 1.0686, "step": 12525 }, { "epoch": 1.26, "grad_norm": 25.886128600810267, "learning_rate": 7.174475902855277e-06, "loss": 1.0288, "step": 12530 }, { "epoch": 1.26, "grad_norm": 28.974868937037098, "learning_rate": 7.171834419809993e-06, "loss": 1.0078, "step": 12535 }, { "epoch": 1.26, "grad_norm": 18.79249425342462, "learning_rate": 7.169192189438558e-06, "loss": 1.0652, "step": 12540 }, { "epoch": 1.26, "grad_norm": 6.237221329610547, "learning_rate": 7.166549212650162e-06, "loss": 1.018, "step": 12545 }, { "epoch": 1.27, "grad_norm": 13.120941011725753, "learning_rate": 7.163905490354254e-06, "loss": 1.0207, "step": 12550 }, { "epoch": 1.27, "grad_norm": 27.040197948817184, "learning_rate": 7.16126102346053e-06, "loss": 1.0338, "step": 12555 }, { "epoch": 1.27, "grad_norm": 5.480407746930139, "learning_rate": 7.158615812878954e-06, "loss": 1.0465, "step": 12560 }, { "epoch": 1.27, "grad_norm": 16.480070729256095, "learning_rate": 7.155969859519739e-06, "loss": 1.0365, "step": 12565 }, { "epoch": 1.27, "grad_norm": 18.25539248289707, "learning_rate": 7.153323164293353e-06, "loss": 1.071, "step": 12570 }, { "epoch": 1.27, "grad_norm": 7.285603597985358, "learning_rate": 7.150675728110525e-06, "loss": 1.024, "step": 12575 }, { "epoch": 1.27, "grad_norm": 22.656511072501885, "learning_rate": 7.148027551882232e-06, "loss": 1.0349, "step": 12580 }, { "epoch": 1.27, "grad_norm": 25.51686516079161, "learning_rate": 7.14537863651971e-06, "loss": 1.0305, "step": 12585 }, { "epoch": 1.27, "grad_norm": 36.296584926767956, "learning_rate": 7.142728982934448e-06, "loss": 1.0582, "step": 12590 }, { "epoch": 1.27, "grad_norm": 9.6989012937694, "learning_rate": 7.140078592038188e-06, "loss": 1.0583, "step": 12595 }, { "epoch": 1.27, "grad_norm": 12.384304551882453, "learning_rate": 7.137427464742931e-06, "loss": 1.0389, "step": 12600 }, { "epoch": 1.27, "grad_norm": 8.185076794077693, "learning_rate": 7.134775601960924e-06, "loss": 0.9996, "step": 12605 }, { "epoch": 1.27, "grad_norm": 11.546886645297697, "learning_rate": 7.1321230046046704e-06, "loss": 1.0152, "step": 12610 }, { "epoch": 1.27, "grad_norm": 7.036397979494588, "learning_rate": 7.129469673586928e-06, "loss": 1.0489, "step": 12615 }, { "epoch": 1.27, "grad_norm": 7.682921144015696, "learning_rate": 7.126815609820705e-06, "loss": 1.0305, "step": 12620 }, { "epoch": 1.27, "grad_norm": 16.658399193206822, "learning_rate": 7.124160814219262e-06, "loss": 1.0618, "step": 12625 }, { "epoch": 1.27, "grad_norm": 13.945032393377005, "learning_rate": 7.1215052876961126e-06, "loss": 1.0678, "step": 12630 }, { "epoch": 1.27, "grad_norm": 7.638107952258533, "learning_rate": 7.118849031165018e-06, "loss": 1.0623, "step": 12635 }, { "epoch": 1.27, "grad_norm": 6.605684454562965, "learning_rate": 7.116192045539996e-06, "loss": 1.0466, "step": 12640 }, { "epoch": 1.27, "grad_norm": 9.750111366604184, "learning_rate": 7.113534331735313e-06, "loss": 1.0526, "step": 12645 }, { "epoch": 1.28, "grad_norm": 12.176878081409408, "learning_rate": 7.110875890665485e-06, "loss": 1.0297, "step": 12650 }, { "epoch": 1.28, "grad_norm": 7.296951539236836, "learning_rate": 7.1082167232452785e-06, "loss": 1.0467, "step": 12655 }, { "epoch": 1.28, "grad_norm": 15.97042566401193, "learning_rate": 7.105556830389713e-06, "loss": 1.0344, "step": 12660 }, { "epoch": 1.28, "grad_norm": 7.3407772774995985, "learning_rate": 7.102896213014051e-06, "loss": 1.0328, "step": 12665 }, { "epoch": 1.28, "grad_norm": 15.607682097535177, "learning_rate": 7.100234872033811e-06, "loss": 1.0044, "step": 12670 }, { "epoch": 1.28, "grad_norm": 40.043840619621726, "learning_rate": 7.097572808364759e-06, "loss": 1.0728, "step": 12675 }, { "epoch": 1.28, "grad_norm": 27.527787667760947, "learning_rate": 7.094910022922905e-06, "loss": 1.0092, "step": 12680 }, { "epoch": 1.28, "grad_norm": 12.531847109483222, "learning_rate": 7.092246516624513e-06, "loss": 1.0342, "step": 12685 }, { "epoch": 1.28, "grad_norm": 75.7379895208296, "learning_rate": 7.089582290386095e-06, "loss": 1.0137, "step": 12690 }, { "epoch": 1.28, "grad_norm": 64.10051819695862, "learning_rate": 7.086917345124406e-06, "loss": 1.0882, "step": 12695 }, { "epoch": 1.28, "grad_norm": 41.26126004999705, "learning_rate": 7.084251681756451e-06, "loss": 1.0554, "step": 12700 }, { "epoch": 1.28, "grad_norm": 10.337770024854704, "learning_rate": 7.081585301199483e-06, "loss": 1.0163, "step": 12705 }, { "epoch": 1.28, "grad_norm": 13.561604971280826, "learning_rate": 7.078918204371003e-06, "loss": 1.0499, "step": 12710 }, { "epoch": 1.28, "grad_norm": 15.69201828658827, "learning_rate": 7.076250392188752e-06, "loss": 1.0125, "step": 12715 }, { "epoch": 1.28, "grad_norm": 7.476788419263224, "learning_rate": 7.073581865570724e-06, "loss": 1.0923, "step": 12720 }, { "epoch": 1.28, "grad_norm": 14.125474784180392, "learning_rate": 7.070912625435158e-06, "loss": 1.0808, "step": 12725 }, { "epoch": 1.28, "grad_norm": 7.270760693192891, "learning_rate": 7.0682426727005345e-06, "loss": 0.9814, "step": 12730 }, { "epoch": 1.28, "grad_norm": 9.556102509114638, "learning_rate": 7.065572008285584e-06, "loss": 1.0297, "step": 12735 }, { "epoch": 1.28, "grad_norm": 9.606222908410741, "learning_rate": 7.062900633109277e-06, "loss": 1.0731, "step": 12740 }, { "epoch": 1.28, "grad_norm": 8.49691656987773, "learning_rate": 7.060228548090833e-06, "loss": 1.0223, "step": 12745 }, { "epoch": 1.29, "grad_norm": 10.925933039779347, "learning_rate": 7.057555754149713e-06, "loss": 0.9925, "step": 12750 }, { "epoch": 1.29, "grad_norm": 13.128472275615595, "learning_rate": 7.054882252205624e-06, "loss": 1.0409, "step": 12755 }, { "epoch": 1.29, "grad_norm": 7.126108692814256, "learning_rate": 7.052208043178514e-06, "loss": 1.0924, "step": 12760 }, { "epoch": 1.29, "grad_norm": 10.21221054468124, "learning_rate": 7.049533127988576e-06, "loss": 1.0358, "step": 12765 }, { "epoch": 1.29, "grad_norm": 7.1484499062030125, "learning_rate": 7.046857507556247e-06, "loss": 1.0567, "step": 12770 }, { "epoch": 1.29, "grad_norm": 38.45334141445022, "learning_rate": 7.0441811828022045e-06, "loss": 1.0411, "step": 12775 }, { "epoch": 1.29, "grad_norm": 25.874324500701785, "learning_rate": 7.041504154647369e-06, "loss": 1.0286, "step": 12780 }, { "epoch": 1.29, "grad_norm": 15.270163623871788, "learning_rate": 7.038826424012904e-06, "loss": 1.0301, "step": 12785 }, { "epoch": 1.29, "grad_norm": 7.999079089209222, "learning_rate": 7.036147991820215e-06, "loss": 1.0585, "step": 12790 }, { "epoch": 1.29, "grad_norm": 24.206478065357793, "learning_rate": 7.033468858990944e-06, "loss": 1.0118, "step": 12795 }, { "epoch": 1.29, "grad_norm": 15.89523978208532, "learning_rate": 7.030789026446984e-06, "loss": 0.9707, "step": 12800 }, { "epoch": 1.29, "grad_norm": 8.747962327773623, "learning_rate": 7.028108495110457e-06, "loss": 1.0732, "step": 12805 }, { "epoch": 1.29, "grad_norm": 23.8698334255745, "learning_rate": 7.025427265903735e-06, "loss": 1.0465, "step": 12810 }, { "epoch": 1.29, "grad_norm": 10.05917059719966, "learning_rate": 7.022745339749426e-06, "loss": 1.0668, "step": 12815 }, { "epoch": 1.29, "grad_norm": 18.915197740297916, "learning_rate": 7.020062717570376e-06, "loss": 1.0105, "step": 12820 }, { "epoch": 1.29, "grad_norm": 20.876299249569453, "learning_rate": 7.017379400289675e-06, "loss": 1.0386, "step": 12825 }, { "epoch": 1.29, "grad_norm": 6.659062012775592, "learning_rate": 7.01469538883065e-06, "loss": 1.0328, "step": 12830 }, { "epoch": 1.29, "grad_norm": 6.95405327987556, "learning_rate": 7.012010684116865e-06, "loss": 1.025, "step": 12835 }, { "epoch": 1.29, "grad_norm": 9.551544747776951, "learning_rate": 7.009325287072125e-06, "loss": 1.0814, "step": 12840 }, { "epoch": 1.3, "grad_norm": 26.97255684330524, "learning_rate": 7.006639198620473e-06, "loss": 0.9959, "step": 12845 }, { "epoch": 1.3, "grad_norm": 37.41006235833254, "learning_rate": 7.00395241968619e-06, "loss": 1.0512, "step": 12850 }, { "epoch": 1.3, "grad_norm": 20.749955587173165, "learning_rate": 7.001264951193793e-06, "loss": 1.0193, "step": 12855 }, { "epoch": 1.3, "grad_norm": 9.119022299789663, "learning_rate": 6.998576794068037e-06, "loss": 0.9929, "step": 12860 }, { "epoch": 1.3, "grad_norm": 17.41580671020243, "learning_rate": 6.995887949233917e-06, "loss": 1.0261, "step": 12865 }, { "epoch": 1.3, "grad_norm": 21.30687944722217, "learning_rate": 6.993198417616658e-06, "loss": 1.0262, "step": 12870 }, { "epoch": 1.3, "grad_norm": 17.579780848368085, "learning_rate": 6.990508200141728e-06, "loss": 0.9938, "step": 12875 }, { "epoch": 1.3, "grad_norm": 15.586664085842704, "learning_rate": 6.987817297734828e-06, "loss": 1.0025, "step": 12880 }, { "epoch": 1.3, "grad_norm": 15.894681746357387, "learning_rate": 6.985125711321894e-06, "loss": 1.0483, "step": 12885 }, { "epoch": 1.3, "grad_norm": 16.365915987102852, "learning_rate": 6.982433441829097e-06, "loss": 1.0512, "step": 12890 }, { "epoch": 1.3, "grad_norm": 14.951731138083796, "learning_rate": 6.979740490182849e-06, "loss": 1.0282, "step": 12895 }, { "epoch": 1.3, "grad_norm": 7.217412059486604, "learning_rate": 6.977046857309788e-06, "loss": 1.0322, "step": 12900 }, { "epoch": 1.3, "grad_norm": 8.806719211080662, "learning_rate": 6.974352544136791e-06, "loss": 1.045, "step": 12905 }, { "epoch": 1.3, "grad_norm": 5.92757104630876, "learning_rate": 6.971657551590971e-06, "loss": 1.0264, "step": 12910 }, { "epoch": 1.3, "grad_norm": 15.25657133532226, "learning_rate": 6.968961880599668e-06, "loss": 1.0021, "step": 12915 }, { "epoch": 1.3, "grad_norm": 14.516066347033293, "learning_rate": 6.9662655320904636e-06, "loss": 1.0564, "step": 12920 }, { "epoch": 1.3, "grad_norm": 9.063912907532362, "learning_rate": 6.96356850699117e-06, "loss": 1.0061, "step": 12925 }, { "epoch": 1.3, "grad_norm": 15.391986135466741, "learning_rate": 6.960870806229826e-06, "loss": 1.0156, "step": 12930 }, { "epoch": 1.3, "grad_norm": 6.738347589270799, "learning_rate": 6.958172430734711e-06, "loss": 1.0248, "step": 12935 }, { "epoch": 1.3, "grad_norm": 7.753620709514878, "learning_rate": 6.955473381434332e-06, "loss": 1.03, "step": 12940 }, { "epoch": 1.31, "grad_norm": 21.557169202777263, "learning_rate": 6.952773659257431e-06, "loss": 1.0125, "step": 12945 }, { "epoch": 1.31, "grad_norm": 6.965167026749218, "learning_rate": 6.95007326513298e-06, "loss": 1.0619, "step": 12950 }, { "epoch": 1.31, "grad_norm": 7.265830292021931, "learning_rate": 6.94737219999018e-06, "loss": 1.0392, "step": 12955 }, { "epoch": 1.31, "grad_norm": 23.52081806715469, "learning_rate": 6.9446704647584665e-06, "loss": 1.0245, "step": 12960 }, { "epoch": 1.31, "grad_norm": 12.871420177875759, "learning_rate": 6.9419680603675026e-06, "loss": 0.9975, "step": 12965 }, { "epoch": 1.31, "grad_norm": 11.222317702243371, "learning_rate": 6.9392649877471855e-06, "loss": 1.0536, "step": 12970 }, { "epoch": 1.31, "grad_norm": 42.25272051987114, "learning_rate": 6.936561247827635e-06, "loss": 1.0668, "step": 12975 }, { "epoch": 1.31, "grad_norm": 13.740236086629558, "learning_rate": 6.93385684153921e-06, "loss": 1.0125, "step": 12980 }, { "epoch": 1.31, "grad_norm": 18.645595937400724, "learning_rate": 6.931151769812496e-06, "loss": 1.0658, "step": 12985 }, { "epoch": 1.31, "grad_norm": 15.141950619913802, "learning_rate": 6.928446033578299e-06, "loss": 1.0382, "step": 12990 }, { "epoch": 1.31, "grad_norm": 11.023119671967667, "learning_rate": 6.925739633767664e-06, "loss": 1.0282, "step": 12995 }, { "epoch": 1.31, "grad_norm": 24.068304301841803, "learning_rate": 6.923032571311863e-06, "loss": 1.0067, "step": 13000 }, { "epoch": 1.31, "grad_norm": 27.996209165869637, "learning_rate": 6.920324847142388e-06, "loss": 1.0103, "step": 13005 }, { "epoch": 1.31, "grad_norm": 6.417572565792332, "learning_rate": 6.917616462190968e-06, "loss": 1.0201, "step": 13010 }, { "epoch": 1.31, "grad_norm": 7.657853453808597, "learning_rate": 6.914907417389556e-06, "loss": 1.0766, "step": 13015 }, { "epoch": 1.31, "grad_norm": 10.65758461824999, "learning_rate": 6.9121977136703285e-06, "loss": 1.0146, "step": 13020 }, { "epoch": 1.31, "grad_norm": 8.007045939186899, "learning_rate": 6.9094873519656955e-06, "loss": 0.9793, "step": 13025 }, { "epoch": 1.31, "grad_norm": 15.578988873364581, "learning_rate": 6.90677633320829e-06, "loss": 1.0458, "step": 13030 }, { "epoch": 1.31, "grad_norm": 16.951781002259686, "learning_rate": 6.904064658330967e-06, "loss": 1.0555, "step": 13035 }, { "epoch": 1.31, "grad_norm": 16.35412417238803, "learning_rate": 6.901352328266814e-06, "loss": 1.0519, "step": 13040 }, { "epoch": 1.32, "grad_norm": 7.959471511350942, "learning_rate": 6.898639343949141e-06, "loss": 1.0015, "step": 13045 }, { "epoch": 1.32, "grad_norm": 15.821702563866582, "learning_rate": 6.895925706311484e-06, "loss": 1.0324, "step": 13050 }, { "epoch": 1.32, "grad_norm": 8.07143200791322, "learning_rate": 6.893211416287601e-06, "loss": 1.0619, "step": 13055 }, { "epoch": 1.32, "grad_norm": 7.163118330139236, "learning_rate": 6.890496474811478e-06, "loss": 1.0037, "step": 13060 }, { "epoch": 1.32, "grad_norm": 22.921157404139223, "learning_rate": 6.887780882817325e-06, "loss": 1.0314, "step": 13065 }, { "epoch": 1.32, "grad_norm": 21.488001146763192, "learning_rate": 6.885064641239572e-06, "loss": 1.0426, "step": 13070 }, { "epoch": 1.32, "grad_norm": 14.64297873012137, "learning_rate": 6.882347751012877e-06, "loss": 1.0084, "step": 13075 }, { "epoch": 1.32, "grad_norm": 7.848258797644815, "learning_rate": 6.879630213072119e-06, "loss": 1.0571, "step": 13080 }, { "epoch": 1.32, "grad_norm": 8.16142770751216, "learning_rate": 6.8769120283524e-06, "loss": 0.9836, "step": 13085 }, { "epoch": 1.32, "grad_norm": 17.09570378515828, "learning_rate": 6.874193197789044e-06, "loss": 1.009, "step": 13090 }, { "epoch": 1.32, "grad_norm": 22.433742219274283, "learning_rate": 6.8714737223176e-06, "loss": 1.024, "step": 13095 }, { "epoch": 1.32, "grad_norm": 5.383163564586508, "learning_rate": 6.868753602873834e-06, "loss": 1.0429, "step": 13100 }, { "epoch": 1.32, "grad_norm": 8.936132776012842, "learning_rate": 6.866032840393738e-06, "loss": 1.0228, "step": 13105 }, { "epoch": 1.32, "grad_norm": 6.030396329550592, "learning_rate": 6.863311435813525e-06, "loss": 1.0008, "step": 13110 }, { "epoch": 1.32, "grad_norm": 11.397392977750727, "learning_rate": 6.860589390069626e-06, "loss": 1.0177, "step": 13115 }, { "epoch": 1.32, "grad_norm": 18.55638265726068, "learning_rate": 6.857866704098695e-06, "loss": 1.0323, "step": 13120 }, { "epoch": 1.32, "grad_norm": 9.559656051710714, "learning_rate": 6.8551433788376066e-06, "loss": 1.0315, "step": 13125 }, { "epoch": 1.32, "grad_norm": 13.641928541047324, "learning_rate": 6.852419415223451e-06, "loss": 1.0941, "step": 13130 }, { "epoch": 1.32, "grad_norm": 8.212436486827265, "learning_rate": 6.849694814193546e-06, "loss": 1.0541, "step": 13135 }, { "epoch": 1.32, "grad_norm": 5.8214796520446495, "learning_rate": 6.846969576685422e-06, "loss": 1.0366, "step": 13140 }, { "epoch": 1.33, "grad_norm": 8.157033991262095, "learning_rate": 6.84424370363683e-06, "loss": 0.9949, "step": 13145 }, { "epoch": 1.33, "grad_norm": 14.45530704947713, "learning_rate": 6.841517195985741e-06, "loss": 1.0393, "step": 13150 }, { "epoch": 1.33, "grad_norm": 16.143450026136613, "learning_rate": 6.838790054670345e-06, "loss": 1.0291, "step": 13155 }, { "epoch": 1.33, "grad_norm": 16.624898335910864, "learning_rate": 6.836062280629046e-06, "loss": 1.0026, "step": 13160 }, { "epoch": 1.33, "grad_norm": 18.84600348155945, "learning_rate": 6.83333387480047e-06, "loss": 1.018, "step": 13165 }, { "epoch": 1.33, "grad_norm": 43.80741925650718, "learning_rate": 6.830604838123459e-06, "loss": 1.0479, "step": 13170 }, { "epoch": 1.33, "grad_norm": 41.92711636866129, "learning_rate": 6.827875171537071e-06, "loss": 1.0578, "step": 13175 }, { "epoch": 1.33, "grad_norm": 19.51673300351281, "learning_rate": 6.8251448759805824e-06, "loss": 1.0711, "step": 13180 }, { "epoch": 1.33, "grad_norm": 8.556515933677582, "learning_rate": 6.8224139523934865e-06, "loss": 1.0446, "step": 13185 }, { "epoch": 1.33, "grad_norm": 37.85927373587871, "learning_rate": 6.81968240171549e-06, "loss": 1.0247, "step": 13190 }, { "epoch": 1.33, "grad_norm": 18.662322953416247, "learning_rate": 6.816950224886515e-06, "loss": 1.0237, "step": 13195 }, { "epoch": 1.33, "grad_norm": 7.907269676994511, "learning_rate": 6.814217422846705e-06, "loss": 1.0179, "step": 13200 }, { "epoch": 1.33, "grad_norm": 26.99504945572047, "learning_rate": 6.811483996536412e-06, "loss": 1.006, "step": 13205 }, { "epoch": 1.33, "grad_norm": 6.444229002874957, "learning_rate": 6.808749946896206e-06, "loss": 1.0098, "step": 13210 }, { "epoch": 1.33, "grad_norm": 6.473057195996981, "learning_rate": 6.80601527486687e-06, "loss": 1.0325, "step": 13215 }, { "epoch": 1.33, "grad_norm": 14.804280215536732, "learning_rate": 6.8032799813894055e-06, "loss": 1.0363, "step": 13220 }, { "epoch": 1.33, "grad_norm": 6.313675131537839, "learning_rate": 6.800544067405019e-06, "loss": 1.0658, "step": 13225 }, { "epoch": 1.33, "grad_norm": 5.706735278471638, "learning_rate": 6.7978075338551395e-06, "loss": 1.057, "step": 13230 }, { "epoch": 1.33, "grad_norm": 15.756023104740889, "learning_rate": 6.795070381681405e-06, "loss": 1.0309, "step": 13235 }, { "epoch": 1.33, "grad_norm": 22.676044196697127, "learning_rate": 6.792332611825667e-06, "loss": 1.0114, "step": 13240 }, { "epoch": 1.34, "grad_norm": 26.87964038992477, "learning_rate": 6.789594225229987e-06, "loss": 1.0055, "step": 13245 }, { "epoch": 1.34, "grad_norm": 10.512523423043234, "learning_rate": 6.7868552228366455e-06, "loss": 1.0165, "step": 13250 }, { "epoch": 1.34, "grad_norm": 23.665933617476504, "learning_rate": 6.784115605588129e-06, "loss": 1.0666, "step": 13255 }, { "epoch": 1.34, "grad_norm": 47.00172811271067, "learning_rate": 6.781375374427135e-06, "loss": 1.0533, "step": 13260 }, { "epoch": 1.34, "grad_norm": 50.63177323507287, "learning_rate": 6.778634530296577e-06, "loss": 1.0132, "step": 13265 }, { "epoch": 1.34, "grad_norm": 12.536518733773919, "learning_rate": 6.775893074139575e-06, "loss": 1.0746, "step": 13270 }, { "epoch": 1.34, "grad_norm": 29.53133532836771, "learning_rate": 6.773151006899462e-06, "loss": 1.0575, "step": 13275 }, { "epoch": 1.34, "grad_norm": 77.40998959421712, "learning_rate": 6.770408329519783e-06, "loss": 1.0598, "step": 13280 }, { "epoch": 1.34, "grad_norm": 30.24829150706806, "learning_rate": 6.767665042944287e-06, "loss": 1.0107, "step": 13285 }, { "epoch": 1.34, "grad_norm": 13.192949203694534, "learning_rate": 6.764921148116938e-06, "loss": 1.0108, "step": 13290 }, { "epoch": 1.34, "grad_norm": 7.9501053589897985, "learning_rate": 6.7621766459819095e-06, "loss": 1.0401, "step": 13295 }, { "epoch": 1.34, "grad_norm": 6.111409883932251, "learning_rate": 6.759431537483578e-06, "loss": 1.0443, "step": 13300 }, { "epoch": 1.34, "grad_norm": 8.679607540016216, "learning_rate": 6.756685823566537e-06, "loss": 1.0365, "step": 13305 }, { "epoch": 1.34, "grad_norm": 11.376957592481851, "learning_rate": 6.753939505175581e-06, "loss": 1.0523, "step": 13310 }, { "epoch": 1.34, "grad_norm": 7.391994052623102, "learning_rate": 6.751192583255716e-06, "loss": 1.0234, "step": 13315 }, { "epoch": 1.34, "grad_norm": 19.930521794582923, "learning_rate": 6.748445058752155e-06, "loss": 1.0303, "step": 13320 }, { "epoch": 1.34, "grad_norm": 7.669913265990586, "learning_rate": 6.745696932610322e-06, "loss": 1.0876, "step": 13325 }, { "epoch": 1.34, "grad_norm": 7.146589085411706, "learning_rate": 6.742948205775839e-06, "loss": 1.0389, "step": 13330 }, { "epoch": 1.34, "grad_norm": 6.971442314488811, "learning_rate": 6.740198879194544e-06, "loss": 1.0264, "step": 13335 }, { "epoch": 1.34, "grad_norm": 15.669283894212775, "learning_rate": 6.7374489538124775e-06, "loss": 1.0593, "step": 13340 }, { "epoch": 1.35, "grad_norm": 12.953616910642543, "learning_rate": 6.734698430575885e-06, "loss": 1.0129, "step": 13345 }, { "epoch": 1.35, "grad_norm": 11.523231959616394, "learning_rate": 6.731947310431219e-06, "loss": 1.0135, "step": 13350 }, { "epoch": 1.35, "grad_norm": 17.804972747545044, "learning_rate": 6.7291955943251385e-06, "loss": 1.0272, "step": 13355 }, { "epoch": 1.35, "grad_norm": 10.953005179019751, "learning_rate": 6.726443283204506e-06, "loss": 1.0357, "step": 13360 }, { "epoch": 1.35, "grad_norm": 6.35695685522286, "learning_rate": 6.723690378016387e-06, "loss": 0.9987, "step": 13365 }, { "epoch": 1.35, "grad_norm": 12.64247175046237, "learning_rate": 6.720936879708055e-06, "loss": 0.9996, "step": 13370 }, { "epoch": 1.35, "grad_norm": 12.212790234072378, "learning_rate": 6.718182789226988e-06, "loss": 1.0104, "step": 13375 }, { "epoch": 1.35, "grad_norm": 5.377406455852372, "learning_rate": 6.715428107520864e-06, "loss": 1.0282, "step": 13380 }, { "epoch": 1.35, "grad_norm": 16.39394240132829, "learning_rate": 6.712672835537566e-06, "loss": 1.049, "step": 13385 }, { "epoch": 1.35, "grad_norm": 13.035697471945214, "learning_rate": 6.709916974225181e-06, "loss": 1.0153, "step": 13390 }, { "epoch": 1.35, "grad_norm": 9.64975431761266, "learning_rate": 6.707160524532e-06, "loss": 1.0552, "step": 13395 }, { "epoch": 1.35, "grad_norm": 17.294643581025277, "learning_rate": 6.70440348740651e-06, "loss": 1.073, "step": 13400 }, { "epoch": 1.35, "grad_norm": 18.488330345940135, "learning_rate": 6.70164586379741e-06, "loss": 1.0568, "step": 13405 }, { "epoch": 1.35, "grad_norm": 13.870119310854673, "learning_rate": 6.698887654653593e-06, "loss": 1.0285, "step": 13410 }, { "epoch": 1.35, "grad_norm": 13.912999058837164, "learning_rate": 6.6961288609241555e-06, "loss": 1.0441, "step": 13415 }, { "epoch": 1.35, "grad_norm": 26.74928942560138, "learning_rate": 6.693369483558399e-06, "loss": 1.0799, "step": 13420 }, { "epoch": 1.35, "grad_norm": 8.9684024815037, "learning_rate": 6.69060952350582e-06, "loss": 1.016, "step": 13425 }, { "epoch": 1.35, "grad_norm": 9.46064987134036, "learning_rate": 6.687848981716118e-06, "loss": 0.9943, "step": 13430 }, { "epoch": 1.35, "grad_norm": 7.169127122095071, "learning_rate": 6.6850878591391945e-06, "loss": 1.0552, "step": 13435 }, { "epoch": 1.36, "grad_norm": 5.165354843115252, "learning_rate": 6.682326156725147e-06, "loss": 1.0508, "step": 13440 }, { "epoch": 1.36, "grad_norm": 6.2432862349534695, "learning_rate": 6.679563875424276e-06, "loss": 1.0393, "step": 13445 }, { "epoch": 1.36, "grad_norm": 10.935217332921848, "learning_rate": 6.67680101618708e-06, "loss": 1.0513, "step": 13450 }, { "epoch": 1.36, "grad_norm": 6.607537224645915, "learning_rate": 6.674037579964256e-06, "loss": 1.0162, "step": 13455 }, { "epoch": 1.36, "grad_norm": 15.120510077099906, "learning_rate": 6.671273567706699e-06, "loss": 1.0126, "step": 13460 }, { "epoch": 1.36, "grad_norm": 7.850181977228456, "learning_rate": 6.668508980365505e-06, "loss": 1.0038, "step": 13465 }, { "epoch": 1.36, "grad_norm": 7.137039326824293, "learning_rate": 6.665743818891963e-06, "loss": 1.0238, "step": 13470 }, { "epoch": 1.36, "grad_norm": 11.516447441123454, "learning_rate": 6.662978084237565e-06, "loss": 1.0241, "step": 13475 }, { "epoch": 1.36, "grad_norm": 27.20639222893311, "learning_rate": 6.6602117773539954e-06, "loss": 0.9892, "step": 13480 }, { "epoch": 1.36, "grad_norm": 19.739972267618672, "learning_rate": 6.65744489919314e-06, "loss": 1.0032, "step": 13485 }, { "epoch": 1.36, "grad_norm": 21.551587919524792, "learning_rate": 6.654677450707077e-06, "loss": 1.0572, "step": 13490 }, { "epoch": 1.36, "grad_norm": 6.2760398499502985, "learning_rate": 6.651909432848083e-06, "loss": 1.0114, "step": 13495 }, { "epoch": 1.36, "grad_norm": 9.902126713444899, "learning_rate": 6.6491408465686324e-06, "loss": 1.1127, "step": 13500 }, { "epoch": 1.36, "grad_norm": 12.188235790272506, "learning_rate": 6.646371692821391e-06, "loss": 0.9909, "step": 13505 }, { "epoch": 1.36, "grad_norm": 12.421880256038474, "learning_rate": 6.6436019725592215e-06, "loss": 1.0017, "step": 13510 }, { "epoch": 1.36, "grad_norm": 7.010103002828106, "learning_rate": 6.640831686735186e-06, "loss": 1.0216, "step": 13515 }, { "epoch": 1.36, "grad_norm": 6.120440748200015, "learning_rate": 6.638060836302531e-06, "loss": 1.0354, "step": 13520 }, { "epoch": 1.36, "grad_norm": 13.263586848738726, "learning_rate": 6.635289422214708e-06, "loss": 1.0607, "step": 13525 }, { "epoch": 1.36, "grad_norm": 16.134589255668278, "learning_rate": 6.632517445425357e-06, "loss": 1.011, "step": 13530 }, { "epoch": 1.36, "grad_norm": 12.013205345946286, "learning_rate": 6.6297449068883125e-06, "loss": 1.0028, "step": 13535 }, { "epoch": 1.37, "grad_norm": 10.790604203729266, "learning_rate": 6.6269718075576e-06, "loss": 1.0237, "step": 13540 }, { "epoch": 1.37, "grad_norm": 7.71188736972473, "learning_rate": 6.624198148387446e-06, "loss": 1.0279, "step": 13545 }, { "epoch": 1.37, "grad_norm": 7.169575350843859, "learning_rate": 6.621423930332258e-06, "loss": 1.0798, "step": 13550 }, { "epoch": 1.37, "grad_norm": 11.090805139004452, "learning_rate": 6.618649154346645e-06, "loss": 1.0083, "step": 13555 }, { "epoch": 1.37, "grad_norm": 11.675451134812084, "learning_rate": 6.615873821385404e-06, "loss": 1.0365, "step": 13560 }, { "epoch": 1.37, "grad_norm": 8.993575443756557, "learning_rate": 6.613097932403524e-06, "loss": 0.9776, "step": 13565 }, { "epoch": 1.37, "grad_norm": 5.518707514073316, "learning_rate": 6.610321488356186e-06, "loss": 1.0298, "step": 13570 }, { "epoch": 1.37, "grad_norm": 16.39762762125881, "learning_rate": 6.607544490198763e-06, "loss": 1.011, "step": 13575 }, { "epoch": 1.37, "grad_norm": 19.01928544072449, "learning_rate": 6.6047669388868155e-06, "loss": 1.0303, "step": 13580 }, { "epoch": 1.37, "grad_norm": 27.189320273451663, "learning_rate": 6.601988835376096e-06, "loss": 1.0292, "step": 13585 }, { "epoch": 1.37, "grad_norm": 39.491268467130276, "learning_rate": 6.599210180622551e-06, "loss": 1.0243, "step": 13590 }, { "epoch": 1.37, "grad_norm": 26.732490527271786, "learning_rate": 6.5964309755823075e-06, "loss": 1.0122, "step": 13595 }, { "epoch": 1.37, "grad_norm": 15.135663071949276, "learning_rate": 6.593651221211691e-06, "loss": 1.0185, "step": 13600 }, { "epoch": 1.37, "grad_norm": 7.233287153650046, "learning_rate": 6.590870918467214e-06, "loss": 0.971, "step": 13605 }, { "epoch": 1.37, "grad_norm": 6.495907008406722, "learning_rate": 6.588090068305569e-06, "loss": 1.0558, "step": 13610 }, { "epoch": 1.37, "grad_norm": 44.42453635403032, "learning_rate": 6.58530867168365e-06, "loss": 0.9835, "step": 13615 }, { "epoch": 1.37, "grad_norm": 28.144903089867967, "learning_rate": 6.582526729558533e-06, "loss": 1.0596, "step": 13620 }, { "epoch": 1.37, "grad_norm": 12.024692701567258, "learning_rate": 6.579744242887478e-06, "loss": 1.0064, "step": 13625 }, { "epoch": 1.37, "grad_norm": 6.463744260982136, "learning_rate": 6.576961212627938e-06, "loss": 1.0177, "step": 13630 }, { "epoch": 1.37, "grad_norm": 8.95274300642296, "learning_rate": 6.574177639737551e-06, "loss": 1.0668, "step": 13635 }, { "epoch": 1.38, "grad_norm": 8.42371767681941, "learning_rate": 6.571393525174142e-06, "loss": 1.0396, "step": 13640 }, { "epoch": 1.38, "grad_norm": 11.357186063769456, "learning_rate": 6.568608869895722e-06, "loss": 1.0231, "step": 13645 }, { "epoch": 1.38, "grad_norm": 12.015801963572391, "learning_rate": 6.565823674860487e-06, "loss": 1.0472, "step": 13650 }, { "epoch": 1.38, "grad_norm": 14.069250439384112, "learning_rate": 6.563037941026822e-06, "loss": 0.981, "step": 13655 }, { "epoch": 1.38, "grad_norm": 24.130766918714176, "learning_rate": 6.560251669353294e-06, "loss": 1.0042, "step": 13660 }, { "epoch": 1.38, "grad_norm": 7.899136249675347, "learning_rate": 6.557464860798657e-06, "loss": 1.0442, "step": 13665 }, { "epoch": 1.38, "grad_norm": 8.27856661226327, "learning_rate": 6.554677516321849e-06, "loss": 1.026, "step": 13670 }, { "epoch": 1.38, "grad_norm": 13.703392661787985, "learning_rate": 6.551889636881992e-06, "loss": 1.0072, "step": 13675 }, { "epoch": 1.38, "grad_norm": 14.521669786912787, "learning_rate": 6.549101223438394e-06, "loss": 0.9798, "step": 13680 }, { "epoch": 1.38, "grad_norm": 8.132208029869739, "learning_rate": 6.546312276950544e-06, "loss": 1.0406, "step": 13685 }, { "epoch": 1.38, "grad_norm": 5.337966994467242, "learning_rate": 6.5435227983781155e-06, "loss": 1.0336, "step": 13690 }, { "epoch": 1.38, "grad_norm": 10.018133752379166, "learning_rate": 6.540732788680968e-06, "loss": 1.0596, "step": 13695 }, { "epoch": 1.38, "grad_norm": 8.981410299463178, "learning_rate": 6.537942248819139e-06, "loss": 1.052, "step": 13700 }, { "epoch": 1.38, "grad_norm": 13.807726025401056, "learning_rate": 6.53515117975285e-06, "loss": 1.0451, "step": 13705 }, { "epoch": 1.38, "grad_norm": 12.258256090032113, "learning_rate": 6.532359582442509e-06, "loss": 1.0257, "step": 13710 }, { "epoch": 1.38, "grad_norm": 13.830220039221603, "learning_rate": 6.529567457848697e-06, "loss": 1.0574, "step": 13715 }, { "epoch": 1.38, "grad_norm": 14.951624879155915, "learning_rate": 6.526774806932184e-06, "loss": 1.053, "step": 13720 }, { "epoch": 1.38, "grad_norm": 10.10751827537318, "learning_rate": 6.523981630653918e-06, "loss": 1.0139, "step": 13725 }, { "epoch": 1.38, "grad_norm": 7.374129464914586, "learning_rate": 6.521187929975028e-06, "loss": 1.0713, "step": 13730 }, { "epoch": 1.38, "grad_norm": 8.969936644729158, "learning_rate": 6.518393705856826e-06, "loss": 1.0491, "step": 13735 }, { "epoch": 1.39, "grad_norm": 17.11796650989927, "learning_rate": 6.515598959260798e-06, "loss": 1.0005, "step": 13740 }, { "epoch": 1.39, "grad_norm": 9.871941947917126, "learning_rate": 6.512803691148618e-06, "loss": 1.0545, "step": 13745 }, { "epoch": 1.39, "grad_norm": 14.622679603307276, "learning_rate": 6.510007902482132e-06, "loss": 1.051, "step": 13750 }, { "epoch": 1.39, "grad_norm": 61.39754193248918, "learning_rate": 6.507211594223367e-06, "loss": 0.9929, "step": 13755 }, { "epoch": 1.39, "grad_norm": 17.488750011595506, "learning_rate": 6.504414767334535e-06, "loss": 0.9972, "step": 13760 }, { "epoch": 1.39, "grad_norm": 10.12429589602954, "learning_rate": 6.501617422778016e-06, "loss": 1.0398, "step": 13765 }, { "epoch": 1.39, "grad_norm": 9.879413296776791, "learning_rate": 6.498819561516376e-06, "loss": 0.9726, "step": 13770 }, { "epoch": 1.39, "grad_norm": 23.75657216326405, "learning_rate": 6.4960211845123574e-06, "loss": 1.0689, "step": 13775 }, { "epoch": 1.39, "grad_norm": 11.45006080589518, "learning_rate": 6.493222292728878e-06, "loss": 1.0506, "step": 13780 }, { "epoch": 1.39, "grad_norm": 12.991676780315553, "learning_rate": 6.490422887129034e-06, "loss": 1.0286, "step": 13785 }, { "epoch": 1.39, "grad_norm": 16.18143877800714, "learning_rate": 6.487622968676098e-06, "loss": 1.0247, "step": 13790 }, { "epoch": 1.39, "grad_norm": 6.793232464061429, "learning_rate": 6.48482253833352e-06, "loss": 1.0389, "step": 13795 }, { "epoch": 1.39, "grad_norm": 7.608126922471388, "learning_rate": 6.482021597064923e-06, "loss": 1.0548, "step": 13800 }, { "epoch": 1.39, "grad_norm": 10.140751653201086, "learning_rate": 6.479220145834111e-06, "loss": 1.0124, "step": 13805 }, { "epoch": 1.39, "grad_norm": 5.505552673280925, "learning_rate": 6.47641818560506e-06, "loss": 1.0583, "step": 13810 }, { "epoch": 1.39, "grad_norm": 6.627856677353117, "learning_rate": 6.47361571734192e-06, "loss": 1.0085, "step": 13815 }, { "epoch": 1.39, "grad_norm": 8.41666298648295, "learning_rate": 6.470812742009021e-06, "loss": 0.9837, "step": 13820 }, { "epoch": 1.39, "grad_norm": 10.724829915820955, "learning_rate": 6.468009260570861e-06, "loss": 1.0064, "step": 13825 }, { "epoch": 1.39, "grad_norm": 15.376678289487328, "learning_rate": 6.4652052739921165e-06, "loss": 1.0592, "step": 13830 }, { "epoch": 1.39, "grad_norm": 6.893900152860663, "learning_rate": 6.462400783237638e-06, "loss": 0.9976, "step": 13835 }, { "epoch": 1.4, "grad_norm": 5.700056112704488, "learning_rate": 6.459595789272446e-06, "loss": 0.9963, "step": 13840 }, { "epoch": 1.4, "grad_norm": 7.192370893705413, "learning_rate": 6.456790293061735e-06, "loss": 1.0182, "step": 13845 }, { "epoch": 1.4, "grad_norm": 16.006904290572265, "learning_rate": 6.453984295570879e-06, "loss": 1.038, "step": 13850 }, { "epoch": 1.4, "grad_norm": 17.040487629761007, "learning_rate": 6.451177797765414e-06, "loss": 1.0622, "step": 13855 }, { "epoch": 1.4, "grad_norm": 11.209879361348221, "learning_rate": 6.4483708006110546e-06, "loss": 1.0178, "step": 13860 }, { "epoch": 1.4, "grad_norm": 29.12635124130482, "learning_rate": 6.445563305073685e-06, "loss": 0.996, "step": 13865 }, { "epoch": 1.4, "grad_norm": 30.88874303744498, "learning_rate": 6.4427553121193665e-06, "loss": 1.0448, "step": 13870 }, { "epoch": 1.4, "grad_norm": 31.535720727730226, "learning_rate": 6.4399468227143204e-06, "loss": 1.0054, "step": 13875 }, { "epoch": 1.4, "grad_norm": 15.864907947690293, "learning_rate": 6.437137837824947e-06, "loss": 1.0738, "step": 13880 }, { "epoch": 1.4, "grad_norm": 30.114081794271012, "learning_rate": 6.434328358417819e-06, "loss": 1.0386, "step": 13885 }, { "epoch": 1.4, "grad_norm": 23.122982889635825, "learning_rate": 6.43151838545967e-06, "loss": 1.0401, "step": 13890 }, { "epoch": 1.4, "grad_norm": 45.08967697873334, "learning_rate": 6.428707919917412e-06, "loss": 1.0247, "step": 13895 }, { "epoch": 1.4, "grad_norm": 8.635129933519156, "learning_rate": 6.425896962758126e-06, "loss": 1.0081, "step": 13900 }, { "epoch": 1.4, "grad_norm": 12.054772921652399, "learning_rate": 6.423085514949055e-06, "loss": 1.0032, "step": 13905 }, { "epoch": 1.4, "grad_norm": 16.190495259014995, "learning_rate": 6.420273577457617e-06, "loss": 1.0165, "step": 13910 }, { "epoch": 1.4, "grad_norm": 29.90283692177583, "learning_rate": 6.417461151251399e-06, "loss": 1.1068, "step": 13915 }, { "epoch": 1.4, "grad_norm": 23.32826749738727, "learning_rate": 6.414648237298151e-06, "loss": 1.0314, "step": 13920 }, { "epoch": 1.4, "grad_norm": 17.451426247775657, "learning_rate": 6.411834836565797e-06, "loss": 1.018, "step": 13925 }, { "epoch": 1.4, "grad_norm": 5.1216586854247375, "learning_rate": 6.409020950022424e-06, "loss": 1.0708, "step": 13930 }, { "epoch": 1.4, "grad_norm": 7.443637085100315, "learning_rate": 6.406206578636288e-06, "loss": 1.0659, "step": 13935 }, { "epoch": 1.41, "grad_norm": 20.288942801522673, "learning_rate": 6.403391723375812e-06, "loss": 1.0014, "step": 13940 }, { "epoch": 1.41, "grad_norm": 18.847174012916593, "learning_rate": 6.400576385209583e-06, "loss": 0.9968, "step": 13945 }, { "epoch": 1.41, "grad_norm": 27.161722995764585, "learning_rate": 6.397760565106358e-06, "loss": 1.0816, "step": 13950 }, { "epoch": 1.41, "grad_norm": 11.175698198589409, "learning_rate": 6.394944264035057e-06, "loss": 1.0533, "step": 13955 }, { "epoch": 1.41, "grad_norm": 6.558573764246493, "learning_rate": 6.3921274829647685e-06, "loss": 0.9715, "step": 13960 }, { "epoch": 1.41, "grad_norm": 10.821086188227273, "learning_rate": 6.389310222864742e-06, "loss": 1.022, "step": 13965 }, { "epoch": 1.41, "grad_norm": 10.366573020455437, "learning_rate": 6.386492484704394e-06, "loss": 1.0075, "step": 13970 }, { "epoch": 1.41, "grad_norm": 6.897677438642832, "learning_rate": 6.3836742694533084e-06, "loss": 1.0126, "step": 13975 }, { "epoch": 1.41, "grad_norm": 9.255974567583273, "learning_rate": 6.380855578081227e-06, "loss": 1.0278, "step": 13980 }, { "epoch": 1.41, "grad_norm": 6.620443852704517, "learning_rate": 6.378036411558058e-06, "loss": 1.0286, "step": 13985 }, { "epoch": 1.41, "grad_norm": 8.237252298755164, "learning_rate": 6.3752167708538795e-06, "loss": 1.0324, "step": 13990 }, { "epoch": 1.41, "grad_norm": 15.148748108774727, "learning_rate": 6.3723966569389215e-06, "loss": 0.9985, "step": 13995 }, { "epoch": 1.41, "grad_norm": 8.73717599860813, "learning_rate": 6.369576070783585e-06, "loss": 1.0606, "step": 14000 }, { "epoch": 1.41, "grad_norm": 5.626033599591908, "learning_rate": 6.366755013358431e-06, "loss": 1.0858, "step": 14005 }, { "epoch": 1.41, "grad_norm": 13.170414197647256, "learning_rate": 6.3639334856341824e-06, "loss": 1.038, "step": 14010 }, { "epoch": 1.41, "grad_norm": 14.102484949514457, "learning_rate": 6.361111488581724e-06, "loss": 0.9809, "step": 14015 }, { "epoch": 1.41, "grad_norm": 28.116597064326502, "learning_rate": 6.358289023172102e-06, "loss": 1.0353, "step": 14020 }, { "epoch": 1.41, "grad_norm": 38.69362673107948, "learning_rate": 6.355466090376526e-06, "loss": 1.0562, "step": 14025 }, { "epoch": 1.41, "grad_norm": 47.16918166965689, "learning_rate": 6.35264269116636e-06, "loss": 1.0332, "step": 14030 }, { "epoch": 1.42, "grad_norm": 17.533420893919676, "learning_rate": 6.349818826513137e-06, "loss": 1.0296, "step": 14035 }, { "epoch": 1.42, "grad_norm": 11.442797392178308, "learning_rate": 6.346994497388545e-06, "loss": 1.0713, "step": 14040 }, { "epoch": 1.42, "grad_norm": 16.04787718380431, "learning_rate": 6.344169704764433e-06, "loss": 1.0068, "step": 14045 }, { "epoch": 1.42, "grad_norm": 10.064768698412545, "learning_rate": 6.341344449612811e-06, "loss": 1.0295, "step": 14050 }, { "epoch": 1.42, "grad_norm": 8.302200586498188, "learning_rate": 6.338518732905844e-06, "loss": 1.0236, "step": 14055 }, { "epoch": 1.42, "grad_norm": 21.329774898188415, "learning_rate": 6.3356925556158586e-06, "loss": 1.058, "step": 14060 }, { "epoch": 1.42, "grad_norm": 29.134880862354656, "learning_rate": 6.332865918715341e-06, "loss": 1.0199, "step": 14065 }, { "epoch": 1.42, "grad_norm": 32.23187860799201, "learning_rate": 6.330038823176935e-06, "loss": 1.048, "step": 14070 }, { "epoch": 1.42, "grad_norm": 37.799283905948954, "learning_rate": 6.327211269973439e-06, "loss": 1.0262, "step": 14075 }, { "epoch": 1.42, "grad_norm": 12.704421125137417, "learning_rate": 6.324383260077813e-06, "loss": 1.0037, "step": 14080 }, { "epoch": 1.42, "grad_norm": 7.9294961136118, "learning_rate": 6.321554794463173e-06, "loss": 1.0039, "step": 14085 }, { "epoch": 1.42, "grad_norm": 6.850289258704223, "learning_rate": 6.318725874102789e-06, "loss": 1.0635, "step": 14090 }, { "epoch": 1.42, "grad_norm": 11.226221318889067, "learning_rate": 6.315896499970091e-06, "loss": 0.9888, "step": 14095 }, { "epoch": 1.42, "grad_norm": 12.130575799026504, "learning_rate": 6.313066673038666e-06, "loss": 1.0261, "step": 14100 }, { "epoch": 1.42, "grad_norm": 9.480941531356995, "learning_rate": 6.310236394282252e-06, "loss": 1.0188, "step": 14105 }, { "epoch": 1.42, "grad_norm": 7.185087851042856, "learning_rate": 6.307405664674745e-06, "loss": 1.0114, "step": 14110 }, { "epoch": 1.42, "grad_norm": 6.910149014895005, "learning_rate": 6.304574485190199e-06, "loss": 1.0491, "step": 14115 }, { "epoch": 1.42, "grad_norm": 5.739412132695425, "learning_rate": 6.301742856802819e-06, "loss": 0.9905, "step": 14120 }, { "epoch": 1.42, "grad_norm": 7.0777644734152, "learning_rate": 6.298910780486964e-06, "loss": 1.021, "step": 14125 }, { "epoch": 1.42, "grad_norm": 5.950787765281109, "learning_rate": 6.29607825721715e-06, "loss": 1.036, "step": 14130 }, { "epoch": 1.43, "grad_norm": 10.30674289285351, "learning_rate": 6.2932452879680475e-06, "loss": 1.0568, "step": 14135 }, { "epoch": 1.43, "grad_norm": 8.131594426838518, "learning_rate": 6.290411873714475e-06, "loss": 1.0144, "step": 14140 }, { "epoch": 1.43, "grad_norm": 8.093782127641017, "learning_rate": 6.287578015431409e-06, "loss": 0.9836, "step": 14145 }, { "epoch": 1.43, "grad_norm": 10.006982750133712, "learning_rate": 6.284743714093979e-06, "loss": 1.072, "step": 14150 }, { "epoch": 1.43, "grad_norm": 5.524437370717353, "learning_rate": 6.281908970677463e-06, "loss": 1.0174, "step": 14155 }, { "epoch": 1.43, "grad_norm": 18.981786541530653, "learning_rate": 6.279073786157293e-06, "loss": 0.9756, "step": 14160 }, { "epoch": 1.43, "grad_norm": 8.638712127684034, "learning_rate": 6.276238161509058e-06, "loss": 1.0631, "step": 14165 }, { "epoch": 1.43, "grad_norm": 10.830997597851976, "learning_rate": 6.273402097708488e-06, "loss": 1.0142, "step": 14170 }, { "epoch": 1.43, "grad_norm": 9.764014225155503, "learning_rate": 6.270565595731475e-06, "loss": 1.0186, "step": 14175 }, { "epoch": 1.43, "grad_norm": 13.199229045647108, "learning_rate": 6.267728656554053e-06, "loss": 0.9966, "step": 14180 }, { "epoch": 1.43, "grad_norm": 18.008084456261397, "learning_rate": 6.26489128115241e-06, "loss": 1.0303, "step": 14185 }, { "epoch": 1.43, "grad_norm": 21.477202925427804, "learning_rate": 6.262053470502886e-06, "loss": 0.9913, "step": 14190 }, { "epoch": 1.43, "grad_norm": 6.387465020085141, "learning_rate": 6.259215225581968e-06, "loss": 1.0221, "step": 14195 }, { "epoch": 1.43, "grad_norm": 19.397843353781308, "learning_rate": 6.256376547366294e-06, "loss": 1.0107, "step": 14200 }, { "epoch": 1.43, "grad_norm": 8.135092010729808, "learning_rate": 6.25353743683265e-06, "loss": 1.0878, "step": 14205 }, { "epoch": 1.43, "grad_norm": 16.12569114899103, "learning_rate": 6.250697894957971e-06, "loss": 0.9939, "step": 14210 }, { "epoch": 1.43, "grad_norm": 5.956171692699573, "learning_rate": 6.247857922719341e-06, "loss": 1.0283, "step": 14215 }, { "epoch": 1.43, "grad_norm": 11.561231576101571, "learning_rate": 6.24501752109399e-06, "loss": 1.0195, "step": 14220 }, { "epoch": 1.43, "grad_norm": 5.562739598868662, "learning_rate": 6.242176691059302e-06, "loss": 1.0614, "step": 14225 }, { "epoch": 1.43, "grad_norm": 9.959521963283766, "learning_rate": 6.239335433592799e-06, "loss": 1.0354, "step": 14230 }, { "epoch": 1.44, "grad_norm": 14.3662667179511, "learning_rate": 6.2364937496721575e-06, "loss": 1.0758, "step": 14235 }, { "epoch": 1.44, "grad_norm": 12.379812977336815, "learning_rate": 6.233651640275199e-06, "loss": 1.009, "step": 14240 }, { "epoch": 1.44, "grad_norm": 13.883131577198084, "learning_rate": 6.2308091063798895e-06, "loss": 1.0122, "step": 14245 }, { "epoch": 1.44, "grad_norm": 6.14167183566433, "learning_rate": 6.227966148964339e-06, "loss": 1.018, "step": 14250 }, { "epoch": 1.44, "grad_norm": 13.308290140967893, "learning_rate": 6.2251227690068125e-06, "loss": 1.0288, "step": 14255 }, { "epoch": 1.44, "grad_norm": 14.664006828581652, "learning_rate": 6.22227896748571e-06, "loss": 0.9823, "step": 14260 }, { "epoch": 1.44, "grad_norm": 9.49020895969274, "learning_rate": 6.219434745379582e-06, "loss": 1.0258, "step": 14265 }, { "epoch": 1.44, "grad_norm": 12.663898407814036, "learning_rate": 6.216590103667124e-06, "loss": 1.0244, "step": 14270 }, { "epoch": 1.44, "grad_norm": 6.217872837879128, "learning_rate": 6.213745043327172e-06, "loss": 1.0175, "step": 14275 }, { "epoch": 1.44, "grad_norm": 7.949280368143053, "learning_rate": 6.210899565338711e-06, "loss": 1.001, "step": 14280 }, { "epoch": 1.44, "grad_norm": 30.094188801468604, "learning_rate": 6.208053670680864e-06, "loss": 1.048, "step": 14285 }, { "epoch": 1.44, "grad_norm": 20.347516427833664, "learning_rate": 6.205207360332906e-06, "loss": 0.9853, "step": 14290 }, { "epoch": 1.44, "grad_norm": 28.21486187237754, "learning_rate": 6.202360635274243e-06, "loss": 1.0146, "step": 14295 }, { "epoch": 1.44, "grad_norm": 12.837680300255675, "learning_rate": 6.199513496484436e-06, "loss": 0.9886, "step": 14300 }, { "epoch": 1.44, "grad_norm": 7.318146291443392, "learning_rate": 6.19666594494318e-06, "loss": 1.0477, "step": 14305 }, { "epoch": 1.44, "grad_norm": 8.860643957291648, "learning_rate": 6.193817981630314e-06, "loss": 1.0713, "step": 14310 }, { "epoch": 1.44, "grad_norm": 17.737601081480946, "learning_rate": 6.190969607525823e-06, "loss": 1.0437, "step": 14315 }, { "epoch": 1.44, "grad_norm": 11.679315730182553, "learning_rate": 6.188120823609826e-06, "loss": 1.0078, "step": 14320 }, { "epoch": 1.44, "grad_norm": 6.776719899109231, "learning_rate": 6.185271630862588e-06, "loss": 1.0335, "step": 14325 }, { "epoch": 1.44, "grad_norm": 7.095041730152499, "learning_rate": 6.1824220302645165e-06, "loss": 1.0284, "step": 14330 }, { "epoch": 1.45, "grad_norm": 9.704614074308678, "learning_rate": 6.179572022796151e-06, "loss": 1.0643, "step": 14335 }, { "epoch": 1.45, "grad_norm": 6.134028494062003, "learning_rate": 6.17672160943818e-06, "loss": 0.9957, "step": 14340 }, { "epoch": 1.45, "grad_norm": 7.781740867825054, "learning_rate": 6.173870791171428e-06, "loss": 1.0073, "step": 14345 }, { "epoch": 1.45, "grad_norm": 5.8775956473541, "learning_rate": 6.171019568976857e-06, "loss": 0.9979, "step": 14350 }, { "epoch": 1.45, "grad_norm": 9.887612818763753, "learning_rate": 6.16816794383557e-06, "loss": 1.0681, "step": 14355 }, { "epoch": 1.45, "grad_norm": 10.619252812637434, "learning_rate": 6.1653159167288115e-06, "loss": 1.0361, "step": 14360 }, { "epoch": 1.45, "grad_norm": 16.324555085616012, "learning_rate": 6.162463488637957e-06, "loss": 1.0245, "step": 14365 }, { "epoch": 1.45, "grad_norm": 9.315834199483762, "learning_rate": 6.159610660544526e-06, "loss": 1.0556, "step": 14370 }, { "epoch": 1.45, "grad_norm": 6.9917183655457675, "learning_rate": 6.156757433430176e-06, "loss": 1.051, "step": 14375 }, { "epoch": 1.45, "grad_norm": 10.626563270273751, "learning_rate": 6.153903808276698e-06, "loss": 1.0146, "step": 14380 }, { "epoch": 1.45, "grad_norm": 9.43592684093022, "learning_rate": 6.151049786066021e-06, "loss": 1.0138, "step": 14385 }, { "epoch": 1.45, "grad_norm": 11.265247039986576, "learning_rate": 6.148195367780211e-06, "loss": 1.0057, "step": 14390 }, { "epoch": 1.45, "grad_norm": 8.458256378539899, "learning_rate": 6.145340554401473e-06, "loss": 1.0076, "step": 14395 }, { "epoch": 1.45, "grad_norm": 5.2129788646983375, "learning_rate": 6.1424853469121436e-06, "loss": 1.0102, "step": 14400 }, { "epoch": 1.45, "grad_norm": 6.904045579179168, "learning_rate": 6.139629746294698e-06, "loss": 1.0414, "step": 14405 }, { "epoch": 1.45, "grad_norm": 12.254639251979803, "learning_rate": 6.1367737535317455e-06, "loss": 1.0348, "step": 14410 }, { "epoch": 1.45, "grad_norm": 24.60089000632015, "learning_rate": 6.133917369606028e-06, "loss": 1.0041, "step": 14415 }, { "epoch": 1.45, "grad_norm": 25.11777311760514, "learning_rate": 6.131060595500429e-06, "loss": 1.0148, "step": 14420 }, { "epoch": 1.45, "grad_norm": 13.428895668678699, "learning_rate": 6.128203432197959e-06, "loss": 1.0127, "step": 14425 }, { "epoch": 1.45, "grad_norm": 15.744034932615966, "learning_rate": 6.125345880681763e-06, "loss": 0.9718, "step": 14430 }, { "epoch": 1.46, "grad_norm": 9.820188753420734, "learning_rate": 6.1224879419351265e-06, "loss": 1.0571, "step": 14435 }, { "epoch": 1.46, "grad_norm": 7.721814644490256, "learning_rate": 6.119629616941462e-06, "loss": 1.0497, "step": 14440 }, { "epoch": 1.46, "grad_norm": 10.363061957577708, "learning_rate": 6.116770906684315e-06, "loss": 1.0061, "step": 14445 }, { "epoch": 1.46, "grad_norm": 11.604748909394797, "learning_rate": 6.113911812147364e-06, "loss": 1.0434, "step": 14450 }, { "epoch": 1.46, "grad_norm": 8.423518434542537, "learning_rate": 6.1110523343144245e-06, "loss": 0.9749, "step": 14455 }, { "epoch": 1.46, "grad_norm": 5.908629896757577, "learning_rate": 6.108192474169438e-06, "loss": 1.0043, "step": 14460 }, { "epoch": 1.46, "grad_norm": 14.146388089823004, "learning_rate": 6.105332232696478e-06, "loss": 1.023, "step": 14465 }, { "epoch": 1.46, "grad_norm": 6.164722658583353, "learning_rate": 6.1024716108797536e-06, "loss": 1.0099, "step": 14470 }, { "epoch": 1.46, "grad_norm": 9.13629495963452, "learning_rate": 6.0996106097036005e-06, "loss": 1.0635, "step": 14475 }, { "epoch": 1.46, "grad_norm": 7.800462873014633, "learning_rate": 6.096749230152486e-06, "loss": 1.0582, "step": 14480 }, { "epoch": 1.46, "grad_norm": 16.102963227939757, "learning_rate": 6.093887473211011e-06, "loss": 1.0162, "step": 14485 }, { "epoch": 1.46, "grad_norm": 6.403683996342634, "learning_rate": 6.0910253398638986e-06, "loss": 0.9756, "step": 14490 }, { "epoch": 1.46, "grad_norm": 7.1986272105334885, "learning_rate": 6.08816283109601e-06, "loss": 1.0398, "step": 14495 }, { "epoch": 1.46, "grad_norm": 10.499142846058897, "learning_rate": 6.085299947892331e-06, "loss": 1.0325, "step": 14500 }, { "epoch": 1.46, "grad_norm": 6.443651726931194, "learning_rate": 6.082436691237977e-06, "loss": 1.0316, "step": 14505 }, { "epoch": 1.46, "grad_norm": 5.599973887823316, "learning_rate": 6.079573062118192e-06, "loss": 0.9952, "step": 14510 }, { "epoch": 1.46, "grad_norm": 9.28820497848877, "learning_rate": 6.076709061518345e-06, "loss": 0.9898, "step": 14515 }, { "epoch": 1.46, "grad_norm": 9.505366057732097, "learning_rate": 6.07384469042394e-06, "loss": 1.0557, "step": 14520 }, { "epoch": 1.46, "grad_norm": 6.316945997554746, "learning_rate": 6.070979949820601e-06, "loss": 0.9976, "step": 14525 }, { "epoch": 1.46, "grad_norm": 9.461804826693626, "learning_rate": 6.068114840694085e-06, "loss": 1.0098, "step": 14530 }, { "epoch": 1.47, "grad_norm": 10.95271979417951, "learning_rate": 6.065249364030274e-06, "loss": 1.0732, "step": 14535 }, { "epoch": 1.47, "grad_norm": 5.253690343477784, "learning_rate": 6.062383520815173e-06, "loss": 1.0354, "step": 14540 }, { "epoch": 1.47, "grad_norm": 12.420956881537123, "learning_rate": 6.059517312034916e-06, "loss": 1.0241, "step": 14545 }, { "epoch": 1.47, "grad_norm": 9.335665009559412, "learning_rate": 6.056650738675765e-06, "loss": 1.039, "step": 14550 }, { "epoch": 1.47, "grad_norm": 6.074575706713052, "learning_rate": 6.053783801724104e-06, "loss": 1.026, "step": 14555 }, { "epoch": 1.47, "grad_norm": 8.785882460179483, "learning_rate": 6.050916502166444e-06, "loss": 1.0531, "step": 14560 }, { "epoch": 1.47, "grad_norm": 12.30238163595188, "learning_rate": 6.048048840989419e-06, "loss": 1.0576, "step": 14565 }, { "epoch": 1.47, "grad_norm": 7.506627651398785, "learning_rate": 6.045180819179788e-06, "loss": 1.072, "step": 14570 }, { "epoch": 1.47, "grad_norm": 11.198950377669895, "learning_rate": 6.042312437724436e-06, "loss": 1.0364, "step": 14575 }, { "epoch": 1.47, "grad_norm": 13.304318128207594, "learning_rate": 6.03944369761037e-06, "loss": 1.0088, "step": 14580 }, { "epoch": 1.47, "grad_norm": 7.41513228240807, "learning_rate": 6.036574599824719e-06, "loss": 1.0097, "step": 14585 }, { "epoch": 1.47, "grad_norm": 15.723566109044413, "learning_rate": 6.033705145354739e-06, "loss": 0.9864, "step": 14590 }, { "epoch": 1.47, "grad_norm": 13.220611774840002, "learning_rate": 6.0308353351878066e-06, "loss": 1.0636, "step": 14595 }, { "epoch": 1.47, "grad_norm": 28.326960131834948, "learning_rate": 6.0279651703114195e-06, "loss": 1.0307, "step": 14600 }, { "epoch": 1.47, "grad_norm": 21.15053825545421, "learning_rate": 6.0250946517131995e-06, "loss": 1.0315, "step": 14605 }, { "epoch": 1.47, "grad_norm": 19.95053160025602, "learning_rate": 6.0222237803808895e-06, "loss": 1.0259, "step": 14610 }, { "epoch": 1.47, "grad_norm": 23.706207217394876, "learning_rate": 6.01935255730235e-06, "loss": 1.0002, "step": 14615 }, { "epoch": 1.47, "grad_norm": 12.140859339836725, "learning_rate": 6.016480983465572e-06, "loss": 1.0286, "step": 14620 }, { "epoch": 1.47, "grad_norm": 7.605635830489675, "learning_rate": 6.013609059858657e-06, "loss": 1.0206, "step": 14625 }, { "epoch": 1.48, "grad_norm": 11.045248244303234, "learning_rate": 6.010736787469833e-06, "loss": 1.0038, "step": 14630 }, { "epoch": 1.48, "grad_norm": 13.588855130318489, "learning_rate": 6.007864167287447e-06, "loss": 1.034, "step": 14635 }, { "epoch": 1.48, "grad_norm": 6.09002337269585, "learning_rate": 6.004991200299962e-06, "loss": 0.9912, "step": 14640 }, { "epoch": 1.48, "grad_norm": 20.41358760134712, "learning_rate": 6.0021178874959665e-06, "loss": 1.0801, "step": 14645 }, { "epoch": 1.48, "grad_norm": 14.779793206949819, "learning_rate": 5.999244229864162e-06, "loss": 1.0057, "step": 14650 }, { "epoch": 1.48, "grad_norm": 9.243732728824146, "learning_rate": 5.996370228393371e-06, "loss": 1.0523, "step": 14655 }, { "epoch": 1.48, "grad_norm": 12.525122559658719, "learning_rate": 5.993495884072538e-06, "loss": 1.0188, "step": 14660 }, { "epoch": 1.48, "grad_norm": 16.96403205642408, "learning_rate": 5.99062119789072e-06, "loss": 1.0091, "step": 14665 }, { "epoch": 1.48, "grad_norm": 17.281253296587717, "learning_rate": 5.987746170837093e-06, "loss": 1.0365, "step": 14670 }, { "epoch": 1.48, "grad_norm": 5.516804753459836, "learning_rate": 5.984870803900953e-06, "loss": 1.0164, "step": 14675 }, { "epoch": 1.48, "grad_norm": 6.549956303475277, "learning_rate": 5.98199509807171e-06, "loss": 1.0113, "step": 14680 }, { "epoch": 1.48, "grad_norm": 5.293675045161462, "learning_rate": 5.979119054338891e-06, "loss": 1.0273, "step": 14685 }, { "epoch": 1.48, "grad_norm": 13.75212362247369, "learning_rate": 5.976242673692141e-06, "loss": 1.0119, "step": 14690 }, { "epoch": 1.48, "grad_norm": 12.283894924775016, "learning_rate": 5.973365957121219e-06, "loss": 1.0087, "step": 14695 }, { "epoch": 1.48, "grad_norm": 17.99199079388364, "learning_rate": 5.970488905616001e-06, "loss": 0.9819, "step": 14700 }, { "epoch": 1.48, "grad_norm": 8.31338495628191, "learning_rate": 5.967611520166479e-06, "loss": 0.9973, "step": 14705 }, { "epoch": 1.48, "grad_norm": 15.28664721508452, "learning_rate": 5.964733801762754e-06, "loss": 1.0251, "step": 14710 }, { "epoch": 1.48, "grad_norm": 16.402366788820295, "learning_rate": 5.961855751395052e-06, "loss": 1.0345, "step": 14715 }, { "epoch": 1.48, "grad_norm": 7.971032257036951, "learning_rate": 5.9589773700537035e-06, "loss": 1.0322, "step": 14720 }, { "epoch": 1.48, "grad_norm": 10.61183751369133, "learning_rate": 5.956098658729157e-06, "loss": 1.0088, "step": 14725 }, { "epoch": 1.49, "grad_norm": 7.953844617439651, "learning_rate": 5.953219618411976e-06, "loss": 1.0001, "step": 14730 }, { "epoch": 1.49, "grad_norm": 7.375623072845399, "learning_rate": 5.950340250092835e-06, "loss": 1.0226, "step": 14735 }, { "epoch": 1.49, "grad_norm": 8.030057617795244, "learning_rate": 5.947460554762522e-06, "loss": 0.9634, "step": 14740 }, { "epoch": 1.49, "grad_norm": 13.009853891348671, "learning_rate": 5.944580533411935e-06, "loss": 1.0118, "step": 14745 }, { "epoch": 1.49, "grad_norm": 28.990989603600664, "learning_rate": 5.9417001870320925e-06, "loss": 1.0167, "step": 14750 }, { "epoch": 1.49, "grad_norm": 6.118613775864574, "learning_rate": 5.9388195166141114e-06, "loss": 1.0082, "step": 14755 }, { "epoch": 1.49, "grad_norm": 13.273742102437229, "learning_rate": 5.935938523149234e-06, "loss": 1.0389, "step": 14760 }, { "epoch": 1.49, "grad_norm": 39.17306160606416, "learning_rate": 5.933057207628804e-06, "loss": 1.0493, "step": 14765 }, { "epoch": 1.49, "grad_norm": 24.024743927701977, "learning_rate": 5.930175571044279e-06, "loss": 0.9837, "step": 14770 }, { "epoch": 1.49, "grad_norm": 10.705609986527344, "learning_rate": 5.927293614387229e-06, "loss": 1.0272, "step": 14775 }, { "epoch": 1.49, "grad_norm": 23.230859114470846, "learning_rate": 5.924411338649332e-06, "loss": 1.0593, "step": 14780 }, { "epoch": 1.49, "grad_norm": 35.492015860924646, "learning_rate": 5.921528744822376e-06, "loss": 1.0298, "step": 14785 }, { "epoch": 1.49, "grad_norm": 9.562825349710867, "learning_rate": 5.918645833898257e-06, "loss": 1.0212, "step": 14790 }, { "epoch": 1.49, "grad_norm": 11.27956387033416, "learning_rate": 5.915762606868987e-06, "loss": 1.0027, "step": 14795 }, { "epoch": 1.49, "grad_norm": 26.21674719448401, "learning_rate": 5.912879064726679e-06, "loss": 0.98, "step": 14800 }, { "epoch": 1.49, "grad_norm": 8.700641274635641, "learning_rate": 5.909995208463555e-06, "loss": 1.0159, "step": 14805 }, { "epoch": 1.49, "grad_norm": 25.915543721817727, "learning_rate": 5.907111039071949e-06, "loss": 1.0333, "step": 14810 }, { "epoch": 1.49, "grad_norm": 10.689572006386276, "learning_rate": 5.9042265575443015e-06, "loss": 1.024, "step": 14815 }, { "epoch": 1.49, "grad_norm": 17.410061579336677, "learning_rate": 5.901341764873159e-06, "loss": 1.0091, "step": 14820 }, { "epoch": 1.49, "grad_norm": 20.74331671098658, "learning_rate": 5.898456662051175e-06, "loss": 0.9996, "step": 14825 }, { "epoch": 1.5, "grad_norm": 16.260279666494277, "learning_rate": 5.895571250071114e-06, "loss": 1.0082, "step": 14830 }, { "epoch": 1.5, "grad_norm": 6.515962590425934, "learning_rate": 5.892685529925839e-06, "loss": 1.0382, "step": 14835 }, { "epoch": 1.5, "grad_norm": 21.656660220190645, "learning_rate": 5.889799502608326e-06, "loss": 1.0119, "step": 14840 }, { "epoch": 1.5, "grad_norm": 13.943941394119102, "learning_rate": 5.886913169111655e-06, "loss": 1.0148, "step": 14845 }, { "epoch": 1.5, "grad_norm": 5.059556966773174, "learning_rate": 5.88402653042901e-06, "loss": 1.0267, "step": 14850 }, { "epoch": 1.5, "grad_norm": 10.230521020102254, "learning_rate": 5.881139587553679e-06, "loss": 1.0459, "step": 14855 }, { "epoch": 1.5, "grad_norm": 7.029114904796708, "learning_rate": 5.878252341479058e-06, "loss": 1.0302, "step": 14860 }, { "epoch": 1.5, "grad_norm": 7.191819892272424, "learning_rate": 5.8753647931986455e-06, "loss": 1.0411, "step": 14865 }, { "epoch": 1.5, "grad_norm": 10.431883483873603, "learning_rate": 5.872476943706043e-06, "loss": 1.0362, "step": 14870 }, { "epoch": 1.5, "grad_norm": 7.225020541416042, "learning_rate": 5.869588793994958e-06, "loss": 1.0278, "step": 14875 }, { "epoch": 1.5, "grad_norm": 6.718840666658366, "learning_rate": 5.8667003450592e-06, "loss": 0.9902, "step": 14880 }, { "epoch": 1.5, "grad_norm": 16.306096075768597, "learning_rate": 5.863811597892679e-06, "loss": 1.0415, "step": 14885 }, { "epoch": 1.5, "grad_norm": 17.330385188221122, "learning_rate": 5.860922553489416e-06, "loss": 1.045, "step": 14890 }, { "epoch": 1.5, "grad_norm": 10.898400490052744, "learning_rate": 5.858033212843521e-06, "loss": 1.0119, "step": 14895 }, { "epoch": 1.5, "grad_norm": 7.821759514758661, "learning_rate": 5.855143576949218e-06, "loss": 1.1096, "step": 14900 }, { "epoch": 1.5, "grad_norm": 16.145556238240854, "learning_rate": 5.852253646800829e-06, "loss": 1.0223, "step": 14905 }, { "epoch": 1.5, "grad_norm": 11.538354365516687, "learning_rate": 5.849363423392771e-06, "loss": 1.0003, "step": 14910 }, { "epoch": 1.5, "grad_norm": 8.376499493234194, "learning_rate": 5.846472907719571e-06, "loss": 1.0383, "step": 14915 }, { "epoch": 1.5, "grad_norm": 15.640406948927293, "learning_rate": 5.843582100775854e-06, "loss": 1.0545, "step": 14920 }, { "epoch": 1.5, "grad_norm": 41.71062596817106, "learning_rate": 5.84069100355634e-06, "loss": 1.0365, "step": 14925 }, { "epoch": 1.51, "grad_norm": 36.440213900509036, "learning_rate": 5.837799617055855e-06, "loss": 1.0287, "step": 14930 }, { "epoch": 1.51, "grad_norm": 13.231312091439358, "learning_rate": 5.834907942269323e-06, "loss": 1.0196, "step": 14935 }, { "epoch": 1.51, "grad_norm": 6.67987282339489, "learning_rate": 5.832015980191763e-06, "loss": 1.0247, "step": 14940 }, { "epoch": 1.51, "grad_norm": 6.027745730413337, "learning_rate": 5.8291237318183e-06, "loss": 1.0395, "step": 14945 }, { "epoch": 1.51, "grad_norm": 8.944458259992423, "learning_rate": 5.826231198144153e-06, "loss": 0.9978, "step": 14950 }, { "epoch": 1.51, "grad_norm": 7.375857405507643, "learning_rate": 5.82333838016464e-06, "loss": 1.0039, "step": 14955 }, { "epoch": 1.51, "grad_norm": 29.02177122478957, "learning_rate": 5.820445278875177e-06, "loss": 1.011, "step": 14960 }, { "epoch": 1.51, "grad_norm": 12.960662825766184, "learning_rate": 5.817551895271277e-06, "loss": 1.036, "step": 14965 }, { "epoch": 1.51, "grad_norm": 6.955402914365195, "learning_rate": 5.81465823034855e-06, "loss": 0.9873, "step": 14970 }, { "epoch": 1.51, "grad_norm": 19.03775796460291, "learning_rate": 5.811764285102704e-06, "loss": 1.002, "step": 14975 }, { "epoch": 1.51, "grad_norm": 20.372957156704015, "learning_rate": 5.8088700605295445e-06, "loss": 1.0141, "step": 14980 }, { "epoch": 1.51, "grad_norm": 5.777200256501984, "learning_rate": 5.805975557624969e-06, "loss": 1.0528, "step": 14985 }, { "epoch": 1.51, "grad_norm": 48.67917693510359, "learning_rate": 5.803080777384973e-06, "loss": 1.0117, "step": 14990 }, { "epoch": 1.51, "grad_norm": 9.497038082024737, "learning_rate": 5.800185720805649e-06, "loss": 0.9775, "step": 14995 }, { "epoch": 1.51, "grad_norm": 23.166715738992746, "learning_rate": 5.797290388883183e-06, "loss": 0.9993, "step": 15000 }, { "epoch": 1.51, "grad_norm": 13.057863642501305, "learning_rate": 5.794394782613855e-06, "loss": 1.001, "step": 15005 }, { "epoch": 1.51, "grad_norm": 6.830435844917975, "learning_rate": 5.7914989029940414e-06, "loss": 1.0265, "step": 15010 }, { "epoch": 1.51, "grad_norm": 35.212613349302586, "learning_rate": 5.788602751020211e-06, "loss": 1.0209, "step": 15015 }, { "epoch": 1.51, "grad_norm": 17.05136078336535, "learning_rate": 5.785706327688929e-06, "loss": 1.0196, "step": 15020 }, { "epoch": 1.51, "grad_norm": 15.07256425645445, "learning_rate": 5.782809633996849e-06, "loss": 0.9761, "step": 15025 }, { "epoch": 1.52, "grad_norm": 25.085596348207954, "learning_rate": 5.7799126709407215e-06, "loss": 1.0027, "step": 15030 }, { "epoch": 1.52, "grad_norm": 6.371754516177725, "learning_rate": 5.777015439517389e-06, "loss": 1.0181, "step": 15035 }, { "epoch": 1.52, "grad_norm": 5.5333864685009395, "learning_rate": 5.774117940723784e-06, "loss": 1.0047, "step": 15040 }, { "epoch": 1.52, "grad_norm": 7.500730030934907, "learning_rate": 5.771220175556938e-06, "loss": 1.0336, "step": 15045 }, { "epoch": 1.52, "grad_norm": 9.709625588493253, "learning_rate": 5.768322145013964e-06, "loss": 1.0299, "step": 15050 }, { "epoch": 1.52, "grad_norm": 11.338952099488806, "learning_rate": 5.765423850092073e-06, "loss": 1.036, "step": 15055 }, { "epoch": 1.52, "grad_norm": 5.169962548517268, "learning_rate": 5.762525291788569e-06, "loss": 0.982, "step": 15060 }, { "epoch": 1.52, "grad_norm": 5.900902537411488, "learning_rate": 5.759626471100839e-06, "loss": 0.9946, "step": 15065 }, { "epoch": 1.52, "grad_norm": 7.1707931642179235, "learning_rate": 5.756727389026365e-06, "loss": 1.0523, "step": 15070 }, { "epoch": 1.52, "grad_norm": 6.353224816483072, "learning_rate": 5.753828046562721e-06, "loss": 0.9908, "step": 15075 }, { "epoch": 1.52, "grad_norm": 13.573037894562587, "learning_rate": 5.7509284447075644e-06, "loss": 0.997, "step": 15080 }, { "epoch": 1.52, "grad_norm": 11.837813817381356, "learning_rate": 5.748028584458648e-06, "loss": 1.0719, "step": 15085 }, { "epoch": 1.52, "grad_norm": 12.876634308775737, "learning_rate": 5.745128466813811e-06, "loss": 1.0037, "step": 15090 }, { "epoch": 1.52, "grad_norm": 11.539536593451036, "learning_rate": 5.742228092770978e-06, "loss": 1.0185, "step": 15095 }, { "epoch": 1.52, "grad_norm": 33.397164571504376, "learning_rate": 5.739327463328168e-06, "loss": 1.0098, "step": 15100 }, { "epoch": 1.52, "grad_norm": 29.183823725475442, "learning_rate": 5.736426579483486e-06, "loss": 1.0048, "step": 15105 }, { "epoch": 1.52, "grad_norm": 24.587574963191283, "learning_rate": 5.733525442235119e-06, "loss": 1.0098, "step": 15110 }, { "epoch": 1.52, "grad_norm": 6.947952312742559, "learning_rate": 5.730624052581349e-06, "loss": 1.0179, "step": 15115 }, { "epoch": 1.52, "grad_norm": 10.89284380916069, "learning_rate": 5.727722411520541e-06, "loss": 0.9783, "step": 15120 }, { "epoch": 1.52, "grad_norm": 18.396496552929893, "learning_rate": 5.724820520051145e-06, "loss": 1.0223, "step": 15125 }, { "epoch": 1.53, "grad_norm": 23.512104397954204, "learning_rate": 5.7219183791717004e-06, "loss": 0.998, "step": 15130 }, { "epoch": 1.53, "grad_norm": 8.491487800240288, "learning_rate": 5.719015989880831e-06, "loss": 1.0189, "step": 15135 }, { "epoch": 1.53, "grad_norm": 7.884338842606663, "learning_rate": 5.716113353177247e-06, "loss": 1.0263, "step": 15140 }, { "epoch": 1.53, "grad_norm": 6.324532652016312, "learning_rate": 5.713210470059743e-06, "loss": 1.0832, "step": 15145 }, { "epoch": 1.53, "grad_norm": 7.037924639353008, "learning_rate": 5.710307341527196e-06, "loss": 1.0221, "step": 15150 }, { "epoch": 1.53, "grad_norm": 5.74501667455677, "learning_rate": 5.707403968578572e-06, "loss": 1.0076, "step": 15155 }, { "epoch": 1.53, "grad_norm": 7.5396246138440945, "learning_rate": 5.704500352212918e-06, "loss": 1.067, "step": 15160 }, { "epoch": 1.53, "grad_norm": 8.767467544066514, "learning_rate": 5.701596493429363e-06, "loss": 0.9922, "step": 15165 }, { "epoch": 1.53, "grad_norm": 9.52735107254121, "learning_rate": 5.6986923932271285e-06, "loss": 1.0567, "step": 15170 }, { "epoch": 1.53, "grad_norm": 5.0504532995719345, "learning_rate": 5.695788052605507e-06, "loss": 0.9651, "step": 15175 }, { "epoch": 1.53, "grad_norm": 13.310908479402293, "learning_rate": 5.69288347256388e-06, "loss": 1.038, "step": 15180 }, { "epoch": 1.53, "grad_norm": 7.202300147414778, "learning_rate": 5.689978654101715e-06, "loss": 0.9734, "step": 15185 }, { "epoch": 1.53, "grad_norm": 7.184742770785585, "learning_rate": 5.687073598218551e-06, "loss": 1.0026, "step": 15190 }, { "epoch": 1.53, "grad_norm": 6.543331062085483, "learning_rate": 5.68416830591402e-06, "loss": 1.0343, "step": 15195 }, { "epoch": 1.53, "grad_norm": 18.40410285840536, "learning_rate": 5.681262778187828e-06, "loss": 1.0112, "step": 15200 }, { "epoch": 1.53, "grad_norm": 8.552560720211156, "learning_rate": 5.678357016039764e-06, "loss": 0.9836, "step": 15205 }, { "epoch": 1.53, "grad_norm": 15.137282707856617, "learning_rate": 5.675451020469699e-06, "loss": 1.0329, "step": 15210 }, { "epoch": 1.53, "grad_norm": 12.269031411588143, "learning_rate": 5.672544792477584e-06, "loss": 0.9694, "step": 15215 }, { "epoch": 1.53, "grad_norm": 8.489519557977571, "learning_rate": 5.669638333063448e-06, "loss": 0.9874, "step": 15220 }, { "epoch": 1.54, "grad_norm": 13.159324332143338, "learning_rate": 5.666731643227399e-06, "loss": 0.9907, "step": 15225 }, { "epoch": 1.54, "grad_norm": 18.394066611868105, "learning_rate": 5.663824723969631e-06, "loss": 0.9899, "step": 15230 }, { "epoch": 1.54, "grad_norm": 18.369779247598046, "learning_rate": 5.6609175762904075e-06, "loss": 0.9992, "step": 15235 }, { "epoch": 1.54, "grad_norm": 7.373663946948557, "learning_rate": 5.658010201190078e-06, "loss": 1.0451, "step": 15240 }, { "epoch": 1.54, "grad_norm": 11.41526122202614, "learning_rate": 5.655102599669068e-06, "loss": 1.0139, "step": 15245 }, { "epoch": 1.54, "grad_norm": 6.543128116619436, "learning_rate": 5.652194772727878e-06, "loss": 1.0553, "step": 15250 }, { "epoch": 1.54, "grad_norm": 9.985843352021122, "learning_rate": 5.649286721367089e-06, "loss": 1.0342, "step": 15255 }, { "epoch": 1.54, "grad_norm": 6.4618556132775264, "learning_rate": 5.646378446587362e-06, "loss": 1.0222, "step": 15260 }, { "epoch": 1.54, "grad_norm": 6.018828072452745, "learning_rate": 5.643469949389426e-06, "loss": 1.0022, "step": 15265 }, { "epoch": 1.54, "grad_norm": 21.408005050217124, "learning_rate": 5.640561230774095e-06, "loss": 1.0343, "step": 15270 }, { "epoch": 1.54, "grad_norm": 27.216153751199354, "learning_rate": 5.6376522917422584e-06, "loss": 1.0465, "step": 15275 }, { "epoch": 1.54, "grad_norm": 6.033256161413907, "learning_rate": 5.634743133294877e-06, "loss": 1.0771, "step": 15280 }, { "epoch": 1.54, "grad_norm": 11.203846527256655, "learning_rate": 5.631833756432988e-06, "loss": 1.0364, "step": 15285 }, { "epoch": 1.54, "grad_norm": 11.766700739594345, "learning_rate": 5.628924162157707e-06, "loss": 1.002, "step": 15290 }, { "epoch": 1.54, "grad_norm": 13.70182695862929, "learning_rate": 5.6260143514702245e-06, "loss": 1.0249, "step": 15295 }, { "epoch": 1.54, "grad_norm": 6.402225039197062, "learning_rate": 5.6231043253718e-06, "loss": 1.0014, "step": 15300 }, { "epoch": 1.54, "grad_norm": 12.84456051133836, "learning_rate": 5.620194084863771e-06, "loss": 1.0315, "step": 15305 }, { "epoch": 1.54, "grad_norm": 7.13298034214026, "learning_rate": 5.617283630947553e-06, "loss": 1.0206, "step": 15310 }, { "epoch": 1.54, "grad_norm": 6.80736730732881, "learning_rate": 5.614372964624625e-06, "loss": 1.0183, "step": 15315 }, { "epoch": 1.54, "grad_norm": 12.23622607920322, "learning_rate": 5.6114620868965466e-06, "loss": 1.0323, "step": 15320 }, { "epoch": 1.55, "grad_norm": 11.502210869656103, "learning_rate": 5.608550998764949e-06, "loss": 1.0368, "step": 15325 }, { "epoch": 1.55, "grad_norm": 5.627686206640388, "learning_rate": 5.605639701231533e-06, "loss": 0.9889, "step": 15330 }, { "epoch": 1.55, "grad_norm": 4.981370174837831, "learning_rate": 5.602728195298075e-06, "loss": 0.9927, "step": 15335 }, { "epoch": 1.55, "grad_norm": 5.225736903417539, "learning_rate": 5.5998164819664215e-06, "loss": 1.0384, "step": 15340 }, { "epoch": 1.55, "grad_norm": 10.113727655453436, "learning_rate": 5.596904562238488e-06, "loss": 1.0588, "step": 15345 }, { "epoch": 1.55, "grad_norm": 18.306783354210584, "learning_rate": 5.593992437116264e-06, "loss": 1.0228, "step": 15350 }, { "epoch": 1.55, "grad_norm": 15.44502839539049, "learning_rate": 5.5910801076018115e-06, "loss": 0.9847, "step": 15355 }, { "epoch": 1.55, "grad_norm": 6.849547074253466, "learning_rate": 5.5881675746972585e-06, "loss": 1.0247, "step": 15360 }, { "epoch": 1.55, "grad_norm": 9.89612174609036, "learning_rate": 5.585254839404804e-06, "loss": 0.9802, "step": 15365 }, { "epoch": 1.55, "grad_norm": 6.112813776458878, "learning_rate": 5.582341902726719e-06, "loss": 1.0123, "step": 15370 }, { "epoch": 1.55, "grad_norm": 6.294517000571133, "learning_rate": 5.57942876566534e-06, "loss": 1.007, "step": 15375 }, { "epoch": 1.55, "grad_norm": 5.6312243252068495, "learning_rate": 5.576515429223077e-06, "loss": 0.9867, "step": 15380 }, { "epoch": 1.55, "grad_norm": 6.158982603092975, "learning_rate": 5.5736018944024065e-06, "loss": 0.9994, "step": 15385 }, { "epoch": 1.55, "grad_norm": 8.584956074326355, "learning_rate": 5.5706881622058704e-06, "loss": 1.0102, "step": 15390 }, { "epoch": 1.55, "grad_norm": 5.795599395721448, "learning_rate": 5.567774233636083e-06, "loss": 1.0619, "step": 15395 }, { "epoch": 1.55, "grad_norm": 8.595874741508421, "learning_rate": 5.564860109695726e-06, "loss": 1.0089, "step": 15400 }, { "epoch": 1.55, "grad_norm": 6.457989774283112, "learning_rate": 5.561945791387543e-06, "loss": 0.9862, "step": 15405 }, { "epoch": 1.55, "grad_norm": 5.683225599592505, "learning_rate": 5.55903127971435e-06, "loss": 1.0738, "step": 15410 }, { "epoch": 1.55, "grad_norm": 9.491692917430592, "learning_rate": 5.556116575679028e-06, "loss": 1.0031, "step": 15415 }, { "epoch": 1.55, "grad_norm": 13.804822153310992, "learning_rate": 5.553201680284523e-06, "loss": 1.042, "step": 15420 }, { "epoch": 1.56, "grad_norm": 5.054414839126639, "learning_rate": 5.550286594533848e-06, "loss": 1.0369, "step": 15425 }, { "epoch": 1.56, "grad_norm": 10.795255075267063, "learning_rate": 5.547371319430083e-06, "loss": 1.0149, "step": 15430 }, { "epoch": 1.56, "grad_norm": 8.29537836031942, "learning_rate": 5.544455855976369e-06, "loss": 1.0145, "step": 15435 }, { "epoch": 1.56, "grad_norm": 8.764843303631773, "learning_rate": 5.541540205175914e-06, "loss": 0.9834, "step": 15440 }, { "epoch": 1.56, "grad_norm": 24.348022899174037, "learning_rate": 5.538624368031992e-06, "loss": 1.059, "step": 15445 }, { "epoch": 1.56, "grad_norm": 42.15851586163241, "learning_rate": 5.535708345547939e-06, "loss": 1.0674, "step": 15450 }, { "epoch": 1.56, "grad_norm": 14.398321831145607, "learning_rate": 5.5327921387271565e-06, "loss": 1.0084, "step": 15455 }, { "epoch": 1.56, "grad_norm": 12.41292785937368, "learning_rate": 5.529875748573109e-06, "loss": 0.9958, "step": 15460 }, { "epoch": 1.56, "grad_norm": 31.31049057929927, "learning_rate": 5.52695917608932e-06, "loss": 1.0856, "step": 15465 }, { "epoch": 1.56, "grad_norm": 17.824836231611595, "learning_rate": 5.5240424222793836e-06, "loss": 1.0009, "step": 15470 }, { "epoch": 1.56, "grad_norm": 12.923284968966275, "learning_rate": 5.521125488146951e-06, "loss": 1.0319, "step": 15475 }, { "epoch": 1.56, "grad_norm": 25.84663898577755, "learning_rate": 5.5182083746957334e-06, "loss": 1.0175, "step": 15480 }, { "epoch": 1.56, "grad_norm": 8.9546403529747, "learning_rate": 5.51529108292951e-06, "loss": 1.0446, "step": 15485 }, { "epoch": 1.56, "grad_norm": 5.7153867003786205, "learning_rate": 5.512373613852117e-06, "loss": 1.0295, "step": 15490 }, { "epoch": 1.56, "grad_norm": 12.35043341931898, "learning_rate": 5.5094559684674545e-06, "loss": 1.0182, "step": 15495 }, { "epoch": 1.56, "grad_norm": 7.7129822787587905, "learning_rate": 5.506538147779478e-06, "loss": 0.9995, "step": 15500 }, { "epoch": 1.56, "grad_norm": 5.9358006177255, "learning_rate": 5.503620152792208e-06, "loss": 0.9772, "step": 15505 }, { "epoch": 1.56, "grad_norm": 8.139260154917016, "learning_rate": 5.500701984509727e-06, "loss": 1.0554, "step": 15510 }, { "epoch": 1.56, "grad_norm": 8.548520446037127, "learning_rate": 5.497783643936169e-06, "loss": 1.0202, "step": 15515 }, { "epoch": 1.56, "grad_norm": 6.310244375627785, "learning_rate": 5.494865132075735e-06, "loss": 0.9979, "step": 15520 }, { "epoch": 1.57, "grad_norm": 12.879052737594693, "learning_rate": 5.491946449932683e-06, "loss": 1.074, "step": 15525 }, { "epoch": 1.57, "grad_norm": 25.970468165459245, "learning_rate": 5.489027598511327e-06, "loss": 1.0097, "step": 15530 }, { "epoch": 1.57, "grad_norm": 16.811799979001137, "learning_rate": 5.48610857881604e-06, "loss": 0.9936, "step": 15535 }, { "epoch": 1.57, "grad_norm": 5.571587571136714, "learning_rate": 5.483189391851258e-06, "loss": 0.9873, "step": 15540 }, { "epoch": 1.57, "grad_norm": 9.477857504477589, "learning_rate": 5.480270038621466e-06, "loss": 0.9792, "step": 15545 }, { "epoch": 1.57, "grad_norm": 6.107218873468646, "learning_rate": 5.4773505201312125e-06, "loss": 1.0711, "step": 15550 }, { "epoch": 1.57, "grad_norm": 15.503662574518724, "learning_rate": 5.474430837385102e-06, "loss": 0.9318, "step": 15555 }, { "epoch": 1.57, "grad_norm": 23.904668772962744, "learning_rate": 5.471510991387792e-06, "loss": 1.001, "step": 15560 }, { "epoch": 1.57, "grad_norm": 9.82295750301058, "learning_rate": 5.4685909831439995e-06, "loss": 0.9886, "step": 15565 }, { "epoch": 1.57, "grad_norm": 13.798088528044294, "learning_rate": 5.4656708136584994e-06, "loss": 1.0119, "step": 15570 }, { "epoch": 1.57, "grad_norm": 18.4122532450321, "learning_rate": 5.462750483936116e-06, "loss": 1.0103, "step": 15575 }, { "epoch": 1.57, "grad_norm": 8.189506784850868, "learning_rate": 5.459829994981732e-06, "loss": 1.0242, "step": 15580 }, { "epoch": 1.57, "grad_norm": 6.985499988081523, "learning_rate": 5.456909347800289e-06, "loss": 1.0173, "step": 15585 }, { "epoch": 1.57, "grad_norm": 12.435487548336896, "learning_rate": 5.453988543396773e-06, "loss": 1.0303, "step": 15590 }, { "epoch": 1.57, "grad_norm": 11.9345012211472, "learning_rate": 5.451067582776233e-06, "loss": 1.0322, "step": 15595 }, { "epoch": 1.57, "grad_norm": 6.203630037662816, "learning_rate": 5.44814646694377e-06, "loss": 1.0119, "step": 15600 }, { "epoch": 1.57, "grad_norm": 5.700017667723494, "learning_rate": 5.445225196904536e-06, "loss": 1.0121, "step": 15605 }, { "epoch": 1.57, "grad_norm": 5.727849581352104, "learning_rate": 5.4423037736637354e-06, "loss": 0.9896, "step": 15610 }, { "epoch": 1.57, "grad_norm": 6.293525246706005, "learning_rate": 5.4393821982266296e-06, "loss": 0.988, "step": 15615 }, { "epoch": 1.57, "grad_norm": 16.412605276421097, "learning_rate": 5.436460471598528e-06, "loss": 1.0571, "step": 15620 }, { "epoch": 1.58, "grad_norm": 5.940978612544219, "learning_rate": 5.433538594784796e-06, "loss": 1.0098, "step": 15625 }, { "epoch": 1.58, "grad_norm": 8.230723437166615, "learning_rate": 5.4306165687908485e-06, "loss": 1.0514, "step": 15630 }, { "epoch": 1.58, "grad_norm": 7.221237771261272, "learning_rate": 5.4276943946221494e-06, "loss": 1.0028, "step": 15635 }, { "epoch": 1.58, "grad_norm": 10.657311547680987, "learning_rate": 5.424772073284218e-06, "loss": 1.0432, "step": 15640 }, { "epoch": 1.58, "grad_norm": 5.577141376755671, "learning_rate": 5.421849605782622e-06, "loss": 0.9769, "step": 15645 }, { "epoch": 1.58, "grad_norm": 6.860599441000725, "learning_rate": 5.418926993122979e-06, "loss": 1.0185, "step": 15650 }, { "epoch": 1.58, "grad_norm": 12.177441626567626, "learning_rate": 5.4160042363109585e-06, "loss": 0.9806, "step": 15655 }, { "epoch": 1.58, "grad_norm": 9.185151466420002, "learning_rate": 5.413081336352278e-06, "loss": 1.0278, "step": 15660 }, { "epoch": 1.58, "grad_norm": 10.775833291063558, "learning_rate": 5.410158294252704e-06, "loss": 1.0019, "step": 15665 }, { "epoch": 1.58, "grad_norm": 9.249542375778276, "learning_rate": 5.4072351110180525e-06, "loss": 1.0343, "step": 15670 }, { "epoch": 1.58, "grad_norm": 7.721745645435944, "learning_rate": 5.404311787654189e-06, "loss": 1.028, "step": 15675 }, { "epoch": 1.58, "grad_norm": 5.587955552947999, "learning_rate": 5.401388325167024e-06, "loss": 1.0212, "step": 15680 }, { "epoch": 1.58, "grad_norm": 5.269292112699187, "learning_rate": 5.39846472456252e-06, "loss": 1.0303, "step": 15685 }, { "epoch": 1.58, "grad_norm": 6.921660261357361, "learning_rate": 5.3955409868466845e-06, "loss": 0.9814, "step": 15690 }, { "epoch": 1.58, "grad_norm": 7.739963976484743, "learning_rate": 5.392617113025576e-06, "loss": 1.0304, "step": 15695 }, { "epoch": 1.58, "grad_norm": 8.29286986619866, "learning_rate": 5.38969310410529e-06, "loss": 1.0466, "step": 15700 }, { "epoch": 1.58, "grad_norm": 10.065776472665721, "learning_rate": 5.386768961091981e-06, "loss": 0.9981, "step": 15705 }, { "epoch": 1.58, "grad_norm": 16.73112924832002, "learning_rate": 5.3838446849918425e-06, "loss": 1.0516, "step": 15710 }, { "epoch": 1.58, "grad_norm": 10.114222624670669, "learning_rate": 5.3809202768111135e-06, "loss": 0.9975, "step": 15715 }, { "epoch": 1.58, "grad_norm": 20.69821552756159, "learning_rate": 5.377995737556081e-06, "loss": 1.0127, "step": 15720 }, { "epoch": 1.59, "grad_norm": 27.485717435936895, "learning_rate": 5.375071068233077e-06, "loss": 1.042, "step": 15725 }, { "epoch": 1.59, "grad_norm": 13.7777365075493, "learning_rate": 5.372146269848476e-06, "loss": 1.0256, "step": 15730 }, { "epoch": 1.59, "grad_norm": 9.602185252863674, "learning_rate": 5.3692213434086995e-06, "loss": 1.0205, "step": 15735 }, { "epoch": 1.59, "grad_norm": 7.2234703348929346, "learning_rate": 5.366296289920211e-06, "loss": 0.9911, "step": 15740 }, { "epoch": 1.59, "grad_norm": 8.4351426890756, "learning_rate": 5.363371110389519e-06, "loss": 1.0269, "step": 15745 }, { "epoch": 1.59, "grad_norm": 6.786363333398277, "learning_rate": 5.360445805823174e-06, "loss": 0.9598, "step": 15750 }, { "epoch": 1.59, "grad_norm": 4.97525024859164, "learning_rate": 5.357520377227774e-06, "loss": 0.9675, "step": 15755 }, { "epoch": 1.59, "grad_norm": 9.086909538105326, "learning_rate": 5.354594825609952e-06, "loss": 0.9931, "step": 15760 }, { "epoch": 1.59, "grad_norm": 10.159060953047028, "learning_rate": 5.351669151976389e-06, "loss": 1.0362, "step": 15765 }, { "epoch": 1.59, "grad_norm": 7.916611390361543, "learning_rate": 5.348743357333808e-06, "loss": 1.0337, "step": 15770 }, { "epoch": 1.59, "grad_norm": 8.805481262074903, "learning_rate": 5.34581744268897e-06, "loss": 1.0247, "step": 15775 }, { "epoch": 1.59, "grad_norm": 26.194657589325068, "learning_rate": 5.34289140904868e-06, "loss": 0.9847, "step": 15780 }, { "epoch": 1.59, "grad_norm": 14.415964782837731, "learning_rate": 5.339965257419784e-06, "loss": 1.0127, "step": 15785 }, { "epoch": 1.59, "grad_norm": 9.071868588891041, "learning_rate": 5.3370389888091675e-06, "loss": 1.0205, "step": 15790 }, { "epoch": 1.59, "grad_norm": 6.9732842913514395, "learning_rate": 5.334112604223757e-06, "loss": 1.0129, "step": 15795 }, { "epoch": 1.59, "grad_norm": 17.997310476911395, "learning_rate": 5.331186104670518e-06, "loss": 0.992, "step": 15800 }, { "epoch": 1.59, "grad_norm": 14.085613485435752, "learning_rate": 5.328259491156458e-06, "loss": 0.9577, "step": 15805 }, { "epoch": 1.59, "grad_norm": 12.168853592166277, "learning_rate": 5.325332764688619e-06, "loss": 0.9916, "step": 15810 }, { "epoch": 1.59, "grad_norm": 8.324216349508182, "learning_rate": 5.322405926274087e-06, "loss": 1.0157, "step": 15815 }, { "epoch": 1.59, "grad_norm": 17.376033806430183, "learning_rate": 5.319478976919984e-06, "loss": 1.0294, "step": 15820 }, { "epoch": 1.6, "grad_norm": 7.579910155353343, "learning_rate": 5.31655191763347e-06, "loss": 1.0061, "step": 15825 }, { "epoch": 1.6, "grad_norm": 5.2035533184641825, "learning_rate": 5.313624749421743e-06, "loss": 1.0234, "step": 15830 }, { "epoch": 1.6, "grad_norm": 5.575174310526426, "learning_rate": 5.31069747329204e-06, "loss": 0.971, "step": 15835 }, { "epoch": 1.6, "grad_norm": 11.968812764711304, "learning_rate": 5.307770090251633e-06, "loss": 1.0269, "step": 15840 }, { "epoch": 1.6, "grad_norm": 13.944910548216217, "learning_rate": 5.3048426013078315e-06, "loss": 1.0075, "step": 15845 }, { "epoch": 1.6, "grad_norm": 9.062487695154594, "learning_rate": 5.301915007467982e-06, "loss": 1.0303, "step": 15850 }, { "epoch": 1.6, "grad_norm": 8.147205587050276, "learning_rate": 5.298987309739467e-06, "loss": 0.9552, "step": 15855 }, { "epoch": 1.6, "grad_norm": 10.092372722717185, "learning_rate": 5.296059509129704e-06, "loss": 1.0003, "step": 15860 }, { "epoch": 1.6, "grad_norm": 10.38372461132063, "learning_rate": 5.293131606646148e-06, "loss": 0.9896, "step": 15865 }, { "epoch": 1.6, "grad_norm": 13.089507838844908, "learning_rate": 5.290203603296285e-06, "loss": 1.0019, "step": 15870 }, { "epoch": 1.6, "grad_norm": 9.69511411954582, "learning_rate": 5.287275500087639e-06, "loss": 1.024, "step": 15875 }, { "epoch": 1.6, "grad_norm": 13.584071369622801, "learning_rate": 5.284347298027769e-06, "loss": 0.9924, "step": 15880 }, { "epoch": 1.6, "grad_norm": 5.611077298524117, "learning_rate": 5.281418998124264e-06, "loss": 1.0062, "step": 15885 }, { "epoch": 1.6, "grad_norm": 13.579750167447928, "learning_rate": 5.278490601384752e-06, "loss": 1.002, "step": 15890 }, { "epoch": 1.6, "grad_norm": 13.858087425142072, "learning_rate": 5.275562108816889e-06, "loss": 1.003, "step": 15895 }, { "epoch": 1.6, "grad_norm": 10.62984739944366, "learning_rate": 5.2726335214283675e-06, "loss": 1.0321, "step": 15900 }, { "epoch": 1.6, "grad_norm": 15.914241867641904, "learning_rate": 5.269704840226911e-06, "loss": 0.9902, "step": 15905 }, { "epoch": 1.6, "grad_norm": 11.49404515966052, "learning_rate": 5.266776066220278e-06, "loss": 1.0109, "step": 15910 }, { "epoch": 1.6, "grad_norm": 10.567038240824985, "learning_rate": 5.2638472004162545e-06, "loss": 1.0079, "step": 15915 }, { "epoch": 1.61, "grad_norm": 6.009631683944291, "learning_rate": 5.260918243822662e-06, "loss": 1.0159, "step": 15920 }, { "epoch": 1.61, "grad_norm": 10.12079307982005, "learning_rate": 5.257989197447352e-06, "loss": 1.0071, "step": 15925 }, { "epoch": 1.61, "grad_norm": 8.507796359581, "learning_rate": 5.255060062298204e-06, "loss": 0.9743, "step": 15930 }, { "epoch": 1.61, "grad_norm": 7.731754541730131, "learning_rate": 5.252130839383133e-06, "loss": 1.0074, "step": 15935 }, { "epoch": 1.61, "grad_norm": 6.589248267275763, "learning_rate": 5.249201529710079e-06, "loss": 1.0055, "step": 15940 }, { "epoch": 1.61, "grad_norm": 5.784506805665072, "learning_rate": 5.24627213428702e-06, "loss": 0.9896, "step": 15945 }, { "epoch": 1.61, "grad_norm": 9.764375372843931, "learning_rate": 5.243342654121953e-06, "loss": 1.007, "step": 15950 }, { "epoch": 1.61, "grad_norm": 12.327918830755468, "learning_rate": 5.24041309022291e-06, "loss": 0.974, "step": 15955 }, { "epoch": 1.61, "grad_norm": 11.952405391443223, "learning_rate": 5.237483443597954e-06, "loss": 0.9841, "step": 15960 }, { "epoch": 1.61, "grad_norm": 17.890437029989172, "learning_rate": 5.234553715255171e-06, "loss": 0.9778, "step": 15965 }, { "epoch": 1.61, "grad_norm": 18.01316538586824, "learning_rate": 5.231623906202677e-06, "loss": 1.0248, "step": 15970 }, { "epoch": 1.61, "grad_norm": 15.853916748104757, "learning_rate": 5.228694017448621e-06, "loss": 1.0252, "step": 15975 }, { "epoch": 1.61, "grad_norm": 6.036828572797781, "learning_rate": 5.2257640500011704e-06, "loss": 1.0223, "step": 15980 }, { "epoch": 1.61, "grad_norm": 5.557133960109277, "learning_rate": 5.222834004868527e-06, "loss": 0.9986, "step": 15985 }, { "epoch": 1.61, "grad_norm": 11.190170767718795, "learning_rate": 5.219903883058916e-06, "loss": 0.9722, "step": 15990 }, { "epoch": 1.61, "grad_norm": 6.968707961937117, "learning_rate": 5.216973685580586e-06, "loss": 1.0007, "step": 15995 }, { "epoch": 1.61, "grad_norm": 5.695239415229787, "learning_rate": 5.214043413441819e-06, "loss": 1.0044, "step": 16000 }, { "epoch": 1.61, "grad_norm": 6.32502215343701, "learning_rate": 5.21111306765092e-06, "loss": 1.0325, "step": 16005 }, { "epoch": 1.61, "grad_norm": 7.064153141750366, "learning_rate": 5.208182649216213e-06, "loss": 1.0135, "step": 16010 }, { "epoch": 1.61, "grad_norm": 25.031161662044592, "learning_rate": 5.205252159146057e-06, "loss": 0.987, "step": 16015 }, { "epoch": 1.62, "grad_norm": 26.71026392221403, "learning_rate": 5.202321598448829e-06, "loss": 1.0106, "step": 16020 }, { "epoch": 1.62, "grad_norm": 6.861465699216564, "learning_rate": 5.1993909681329325e-06, "loss": 1.0096, "step": 16025 }, { "epoch": 1.62, "grad_norm": 12.95376418351282, "learning_rate": 5.196460269206794e-06, "loss": 1.0328, "step": 16030 }, { "epoch": 1.62, "grad_norm": 10.814262958608504, "learning_rate": 5.193529502678865e-06, "loss": 1.0316, "step": 16035 }, { "epoch": 1.62, "grad_norm": 7.929773352393471, "learning_rate": 5.190598669557618e-06, "loss": 1.0064, "step": 16040 }, { "epoch": 1.62, "grad_norm": 9.322807476578738, "learning_rate": 5.187667770851552e-06, "loss": 1.074, "step": 16045 }, { "epoch": 1.62, "grad_norm": 19.974955659049545, "learning_rate": 5.184736807569185e-06, "loss": 1.0602, "step": 16050 }, { "epoch": 1.62, "grad_norm": 6.79221091060615, "learning_rate": 5.1818057807190584e-06, "loss": 0.9562, "step": 16055 }, { "epoch": 1.62, "grad_norm": 12.40831543670176, "learning_rate": 5.178874691309736e-06, "loss": 0.9686, "step": 16060 }, { "epoch": 1.62, "grad_norm": 9.394481238822914, "learning_rate": 5.175943540349804e-06, "loss": 1.0069, "step": 16065 }, { "epoch": 1.62, "grad_norm": 26.236056559541815, "learning_rate": 5.173012328847867e-06, "loss": 1.0099, "step": 16070 }, { "epoch": 1.62, "grad_norm": 32.35843255079721, "learning_rate": 5.170081057812553e-06, "loss": 1.0193, "step": 16075 }, { "epoch": 1.62, "grad_norm": 39.151728665016165, "learning_rate": 5.167149728252511e-06, "loss": 1.0183, "step": 16080 }, { "epoch": 1.62, "grad_norm": 36.6253507644504, "learning_rate": 5.164218341176405e-06, "loss": 1.0064, "step": 16085 }, { "epoch": 1.62, "grad_norm": 10.653327592026665, "learning_rate": 5.161286897592925e-06, "loss": 1.0032, "step": 16090 }, { "epoch": 1.62, "grad_norm": 40.64514818536792, "learning_rate": 5.1583553985107785e-06, "loss": 1.0449, "step": 16095 }, { "epoch": 1.62, "grad_norm": 7.140483573782942, "learning_rate": 5.15542384493869e-06, "loss": 0.9884, "step": 16100 }, { "epoch": 1.62, "grad_norm": 10.083417716052871, "learning_rate": 5.152492237885405e-06, "loss": 0.9715, "step": 16105 }, { "epoch": 1.62, "grad_norm": 8.234421458690278, "learning_rate": 5.149560578359687e-06, "loss": 1.0033, "step": 16110 }, { "epoch": 1.62, "grad_norm": 36.12020745998665, "learning_rate": 5.146628867370316e-06, "loss": 0.9946, "step": 16115 }, { "epoch": 1.63, "grad_norm": 43.702150113062764, "learning_rate": 5.143697105926092e-06, "loss": 1.0268, "step": 16120 }, { "epoch": 1.63, "grad_norm": 26.741514849915372, "learning_rate": 5.140765295035832e-06, "loss": 0.9801, "step": 16125 }, { "epoch": 1.63, "grad_norm": 19.76751637787017, "learning_rate": 5.137833435708368e-06, "loss": 1.0012, "step": 16130 }, { "epoch": 1.63, "grad_norm": 7.159552963703967, "learning_rate": 5.13490152895255e-06, "loss": 0.9978, "step": 16135 }, { "epoch": 1.63, "grad_norm": 30.926519715727775, "learning_rate": 5.131969575777245e-06, "loss": 0.9883, "step": 16140 }, { "epoch": 1.63, "grad_norm": 25.473821960921036, "learning_rate": 5.129037577191335e-06, "loss": 1.0405, "step": 16145 }, { "epoch": 1.63, "grad_norm": 28.72089383268889, "learning_rate": 5.126105534203717e-06, "loss": 1.029, "step": 16150 }, { "epoch": 1.63, "grad_norm": 5.484114156842334, "learning_rate": 5.123173447823308e-06, "loss": 1.0108, "step": 16155 }, { "epoch": 1.63, "grad_norm": 11.445040855239327, "learning_rate": 5.120241319059031e-06, "loss": 1.0394, "step": 16160 }, { "epoch": 1.63, "grad_norm": 46.8445866363268, "learning_rate": 5.11730914891983e-06, "loss": 1.0269, "step": 16165 }, { "epoch": 1.63, "grad_norm": 7.866166214418564, "learning_rate": 5.114376938414665e-06, "loss": 1.0182, "step": 16170 }, { "epoch": 1.63, "grad_norm": 9.67096279704452, "learning_rate": 5.1114446885525046e-06, "loss": 1.0391, "step": 16175 }, { "epoch": 1.63, "grad_norm": 8.017881971905954, "learning_rate": 5.108512400342332e-06, "loss": 1.0013, "step": 16180 }, { "epoch": 1.63, "grad_norm": 7.040845594445138, "learning_rate": 5.105580074793146e-06, "loss": 0.9716, "step": 16185 }, { "epoch": 1.63, "grad_norm": 5.396904757008385, "learning_rate": 5.102647712913958e-06, "loss": 1.0135, "step": 16190 }, { "epoch": 1.63, "grad_norm": 6.974532267997327, "learning_rate": 5.09971531571379e-06, "loss": 0.9845, "step": 16195 }, { "epoch": 1.63, "grad_norm": 7.373761571462232, "learning_rate": 5.096782884201676e-06, "loss": 0.9918, "step": 16200 }, { "epoch": 1.63, "grad_norm": 8.083849869450802, "learning_rate": 5.093850419386667e-06, "loss": 1.0365, "step": 16205 }, { "epoch": 1.63, "grad_norm": 8.544053197681697, "learning_rate": 5.090917922277815e-06, "loss": 1.0435, "step": 16210 }, { "epoch": 1.63, "grad_norm": 12.317444178392435, "learning_rate": 5.087985393884194e-06, "loss": 0.9998, "step": 16215 }, { "epoch": 1.64, "grad_norm": 6.149487955374466, "learning_rate": 5.0850528352148846e-06, "loss": 0.9818, "step": 16220 }, { "epoch": 1.64, "grad_norm": 8.896241226392624, "learning_rate": 5.082120247278973e-06, "loss": 1.0041, "step": 16225 }, { "epoch": 1.64, "grad_norm": 12.43839173359589, "learning_rate": 5.079187631085564e-06, "loss": 1.0192, "step": 16230 }, { "epoch": 1.64, "grad_norm": 5.759400539341507, "learning_rate": 5.076254987643767e-06, "loss": 0.9977, "step": 16235 }, { "epoch": 1.64, "grad_norm": 8.259653918952248, "learning_rate": 5.073322317962701e-06, "loss": 1.0643, "step": 16240 }, { "epoch": 1.64, "grad_norm": 6.969349385953867, "learning_rate": 5.070389623051496e-06, "loss": 0.9602, "step": 16245 }, { "epoch": 1.64, "grad_norm": 13.913741738579871, "learning_rate": 5.067456903919289e-06, "loss": 0.9951, "step": 16250 }, { "epoch": 1.64, "grad_norm": 9.38234563013056, "learning_rate": 5.0645241615752246e-06, "loss": 0.973, "step": 16255 }, { "epoch": 1.64, "grad_norm": 7.500639363718501, "learning_rate": 5.0615913970284594e-06, "loss": 0.9942, "step": 16260 }, { "epoch": 1.64, "grad_norm": 12.000246455537164, "learning_rate": 5.058658611288153e-06, "loss": 0.9913, "step": 16265 }, { "epoch": 1.64, "grad_norm": 25.602213423387873, "learning_rate": 5.055725805363474e-06, "loss": 0.9963, "step": 16270 }, { "epoch": 1.64, "grad_norm": 14.792623271639881, "learning_rate": 5.052792980263598e-06, "loss": 1.0323, "step": 16275 }, { "epoch": 1.64, "grad_norm": 7.58658288712669, "learning_rate": 5.04986013699771e-06, "loss": 1.043, "step": 16280 }, { "epoch": 1.64, "grad_norm": 21.70761618039746, "learning_rate": 5.046927276574994e-06, "loss": 1.0099, "step": 16285 }, { "epoch": 1.64, "grad_norm": 19.85364796172033, "learning_rate": 5.043994400004648e-06, "loss": 1.0249, "step": 16290 }, { "epoch": 1.64, "grad_norm": 10.427242979146856, "learning_rate": 5.041061508295872e-06, "loss": 1.0583, "step": 16295 }, { "epoch": 1.64, "grad_norm": 6.200635499849241, "learning_rate": 5.038128602457868e-06, "loss": 1.0306, "step": 16300 }, { "epoch": 1.64, "grad_norm": 9.09617656159858, "learning_rate": 5.0351956834998504e-06, "loss": 0.9774, "step": 16305 }, { "epoch": 1.64, "grad_norm": 7.715743797183803, "learning_rate": 5.032262752431031e-06, "loss": 1.0376, "step": 16310 }, { "epoch": 1.64, "grad_norm": 6.819222953107973, "learning_rate": 5.029329810260629e-06, "loss": 1.0369, "step": 16315 }, { "epoch": 1.65, "grad_norm": 6.80103133398059, "learning_rate": 5.026396857997867e-06, "loss": 0.9847, "step": 16320 }, { "epoch": 1.65, "grad_norm": 5.888074167871263, "learning_rate": 5.023463896651972e-06, "loss": 0.9626, "step": 16325 }, { "epoch": 1.65, "grad_norm": 5.705718315442812, "learning_rate": 5.020530927232173e-06, "loss": 0.9942, "step": 16330 }, { "epoch": 1.65, "grad_norm": 5.41260032660405, "learning_rate": 5.017597950747701e-06, "loss": 0.9902, "step": 16335 }, { "epoch": 1.65, "grad_norm": 8.255042843953591, "learning_rate": 5.014664968207791e-06, "loss": 1.073, "step": 16340 }, { "epoch": 1.65, "grad_norm": 7.170200899510636, "learning_rate": 5.01173198062168e-06, "loss": 1.011, "step": 16345 }, { "epoch": 1.65, "grad_norm": 6.790745864427179, "learning_rate": 5.008798988998605e-06, "loss": 1.0245, "step": 16350 }, { "epoch": 1.65, "grad_norm": 7.923644495165293, "learning_rate": 5.005865994347805e-06, "loss": 0.9935, "step": 16355 }, { "epoch": 1.65, "grad_norm": 11.578460220226166, "learning_rate": 5.002932997678524e-06, "loss": 1.0125, "step": 16360 }, { "epoch": 1.65, "grad_norm": 6.753338369687813, "learning_rate": 5e-06, "loss": 0.956, "step": 16365 }, { "epoch": 1.65, "grad_norm": 8.70576280011481, "learning_rate": 4.997067002321478e-06, "loss": 0.9988, "step": 16370 }, { "epoch": 1.65, "grad_norm": 6.817150720143112, "learning_rate": 4.994134005652196e-06, "loss": 0.9805, "step": 16375 }, { "epoch": 1.65, "grad_norm": 8.45500405763143, "learning_rate": 4.991201011001398e-06, "loss": 0.9904, "step": 16380 }, { "epoch": 1.65, "grad_norm": 6.819425098234933, "learning_rate": 4.988268019378322e-06, "loss": 1.0371, "step": 16385 }, { "epoch": 1.65, "grad_norm": 5.1871866152562625, "learning_rate": 4.9853350317922105e-06, "loss": 1.0262, "step": 16390 }, { "epoch": 1.65, "grad_norm": 27.438805065280388, "learning_rate": 4.9824020492523e-06, "loss": 1.0582, "step": 16395 }, { "epoch": 1.65, "grad_norm": 9.955853598441074, "learning_rate": 4.979469072767829e-06, "loss": 0.9541, "step": 16400 }, { "epoch": 1.65, "grad_norm": 8.369021538950003, "learning_rate": 4.976536103348029e-06, "loss": 1.0106, "step": 16405 }, { "epoch": 1.65, "grad_norm": 17.15791993164313, "learning_rate": 4.973603142002134e-06, "loss": 0.9559, "step": 16410 }, { "epoch": 1.65, "grad_norm": 11.55419850396348, "learning_rate": 4.9706701897393715e-06, "loss": 1.0312, "step": 16415 }, { "epoch": 1.66, "grad_norm": 5.541969961872092, "learning_rate": 4.967737247568971e-06, "loss": 1.048, "step": 16420 }, { "epoch": 1.66, "grad_norm": 14.509974176445898, "learning_rate": 4.964804316500151e-06, "loss": 0.9561, "step": 16425 }, { "epoch": 1.66, "grad_norm": 6.444622668215799, "learning_rate": 4.961871397542133e-06, "loss": 1.0031, "step": 16430 }, { "epoch": 1.66, "grad_norm": 20.414380824876183, "learning_rate": 4.95893849170413e-06, "loss": 0.9993, "step": 16435 }, { "epoch": 1.66, "grad_norm": 12.008312874134626, "learning_rate": 4.956005599995354e-06, "loss": 1.0188, "step": 16440 }, { "epoch": 1.66, "grad_norm": 11.690679034928507, "learning_rate": 4.953072723425007e-06, "loss": 1.0049, "step": 16445 }, { "epoch": 1.66, "grad_norm": 7.541363194151192, "learning_rate": 4.950139863002293e-06, "loss": 1.0243, "step": 16450 }, { "epoch": 1.66, "grad_norm": 8.849347533184488, "learning_rate": 4.947207019736403e-06, "loss": 0.9921, "step": 16455 }, { "epoch": 1.66, "grad_norm": 15.513672027070383, "learning_rate": 4.944274194636528e-06, "loss": 1.0574, "step": 16460 }, { "epoch": 1.66, "grad_norm": 5.615937012219703, "learning_rate": 4.941341388711849e-06, "loss": 1.0081, "step": 16465 }, { "epoch": 1.66, "grad_norm": 9.633247658658282, "learning_rate": 4.938408602971543e-06, "loss": 0.9871, "step": 16470 }, { "epoch": 1.66, "grad_norm": 5.3010119117651735, "learning_rate": 4.935475838424775e-06, "loss": 0.9994, "step": 16475 }, { "epoch": 1.66, "grad_norm": 7.134598686246772, "learning_rate": 4.932543096080712e-06, "loss": 1.0322, "step": 16480 }, { "epoch": 1.66, "grad_norm": 10.047466822582225, "learning_rate": 4.929610376948505e-06, "loss": 0.9992, "step": 16485 }, { "epoch": 1.66, "grad_norm": 5.05062321545772, "learning_rate": 4.9266776820373e-06, "loss": 1.0001, "step": 16490 }, { "epoch": 1.66, "grad_norm": 6.863782270164636, "learning_rate": 4.923745012356235e-06, "loss": 1.0463, "step": 16495 }, { "epoch": 1.66, "grad_norm": 9.068238812118306, "learning_rate": 4.920812368914438e-06, "loss": 1.0067, "step": 16500 }, { "epoch": 1.66, "grad_norm": 16.288893208069187, "learning_rate": 4.917879752721029e-06, "loss": 1.0203, "step": 16505 }, { "epoch": 1.66, "grad_norm": 26.670296805020982, "learning_rate": 4.914947164785118e-06, "loss": 0.9884, "step": 16510 }, { "epoch": 1.67, "grad_norm": 6.97630357097606, "learning_rate": 4.9120146061158084e-06, "loss": 1.0277, "step": 16515 }, { "epoch": 1.67, "grad_norm": 5.184246433267411, "learning_rate": 4.909082077722186e-06, "loss": 0.9844, "step": 16520 }, { "epoch": 1.67, "grad_norm": 6.626723164196851, "learning_rate": 4.906149580613336e-06, "loss": 1.0073, "step": 16525 }, { "epoch": 1.67, "grad_norm": 4.8935410810085385, "learning_rate": 4.903217115798325e-06, "loss": 1.0257, "step": 16530 }, { "epoch": 1.67, "grad_norm": 6.181904513877318, "learning_rate": 4.900284684286213e-06, "loss": 1.0207, "step": 16535 }, { "epoch": 1.67, "grad_norm": 8.826227520357035, "learning_rate": 4.897352287086043e-06, "loss": 1.006, "step": 16540 }, { "epoch": 1.67, "grad_norm": 13.01286550419651, "learning_rate": 4.894419925206856e-06, "loss": 0.9993, "step": 16545 }, { "epoch": 1.67, "grad_norm": 7.8796431874940085, "learning_rate": 4.8914875996576695e-06, "loss": 1.0159, "step": 16550 }, { "epoch": 1.67, "grad_norm": 10.036244702718315, "learning_rate": 4.888555311447498e-06, "loss": 1.0214, "step": 16555 }, { "epoch": 1.67, "grad_norm": 5.9240077190368465, "learning_rate": 4.885623061585337e-06, "loss": 1.0047, "step": 16560 }, { "epoch": 1.67, "grad_norm": 5.430152786339917, "learning_rate": 4.8826908510801715e-06, "loss": 0.9777, "step": 16565 }, { "epoch": 1.67, "grad_norm": 5.585109078393798, "learning_rate": 4.87975868094097e-06, "loss": 1.009, "step": 16570 }, { "epoch": 1.67, "grad_norm": 9.150641168623665, "learning_rate": 4.876826552176695e-06, "loss": 1.0202, "step": 16575 }, { "epoch": 1.67, "grad_norm": 5.935491981122044, "learning_rate": 4.873894465796283e-06, "loss": 0.9867, "step": 16580 }, { "epoch": 1.67, "grad_norm": 9.611867042469141, "learning_rate": 4.8709624228086665e-06, "loss": 1.0331, "step": 16585 }, { "epoch": 1.67, "grad_norm": 5.611371344783553, "learning_rate": 4.868030424222756e-06, "loss": 1.0573, "step": 16590 }, { "epoch": 1.67, "grad_norm": 32.63255101680103, "learning_rate": 4.865098471047452e-06, "loss": 0.9927, "step": 16595 }, { "epoch": 1.67, "grad_norm": 19.81342606970215, "learning_rate": 4.862166564291633e-06, "loss": 1.0103, "step": 16600 }, { "epoch": 1.67, "grad_norm": 10.499850254147836, "learning_rate": 4.859234704964169e-06, "loss": 1.0191, "step": 16605 }, { "epoch": 1.67, "grad_norm": 10.785974686070636, "learning_rate": 4.856302894073908e-06, "loss": 1.0459, "step": 16610 }, { "epoch": 1.68, "grad_norm": 6.036809757407398, "learning_rate": 4.853371132629685e-06, "loss": 1.019, "step": 16615 }, { "epoch": 1.68, "grad_norm": 7.013408316700486, "learning_rate": 4.8504394216403145e-06, "loss": 0.9935, "step": 16620 }, { "epoch": 1.68, "grad_norm": 5.371096164511118, "learning_rate": 4.8475077621145965e-06, "loss": 1.0574, "step": 16625 }, { "epoch": 1.68, "grad_norm": 5.6340660387069, "learning_rate": 4.844576155061313e-06, "loss": 1.0259, "step": 16630 }, { "epoch": 1.68, "grad_norm": 5.442063040672251, "learning_rate": 4.841644601489222e-06, "loss": 0.9876, "step": 16635 }, { "epoch": 1.68, "grad_norm": 5.244679072108467, "learning_rate": 4.8387131024070775e-06, "loss": 1.0156, "step": 16640 }, { "epoch": 1.68, "grad_norm": 9.89473569238001, "learning_rate": 4.835781658823597e-06, "loss": 1.008, "step": 16645 }, { "epoch": 1.68, "grad_norm": 11.662598637323155, "learning_rate": 4.832850271747493e-06, "loss": 1.043, "step": 16650 }, { "epoch": 1.68, "grad_norm": 5.7050325167017855, "learning_rate": 4.829918942187449e-06, "loss": 0.9853, "step": 16655 }, { "epoch": 1.68, "grad_norm": 8.552096546462334, "learning_rate": 4.826987671152136e-06, "loss": 0.9813, "step": 16660 }, { "epoch": 1.68, "grad_norm": 9.231427286814666, "learning_rate": 4.8240564596501976e-06, "loss": 1.0655, "step": 16665 }, { "epoch": 1.68, "grad_norm": 10.973960826013965, "learning_rate": 4.821125308690267e-06, "loss": 1.0352, "step": 16670 }, { "epoch": 1.68, "grad_norm": 5.115569281328436, "learning_rate": 4.818194219280943e-06, "loss": 0.991, "step": 16675 }, { "epoch": 1.68, "grad_norm": 18.37398551239263, "learning_rate": 4.815263192430818e-06, "loss": 1.022, "step": 16680 }, { "epoch": 1.68, "grad_norm": 9.116026171727619, "learning_rate": 4.81233222914845e-06, "loss": 1.0154, "step": 16685 }, { "epoch": 1.68, "grad_norm": 9.88286013001611, "learning_rate": 4.809401330442384e-06, "loss": 0.985, "step": 16690 }, { "epoch": 1.68, "grad_norm": 10.794187658035513, "learning_rate": 4.806470497321135e-06, "loss": 0.9761, "step": 16695 }, { "epoch": 1.68, "grad_norm": 15.911140347032589, "learning_rate": 4.803539730793207e-06, "loss": 1.0205, "step": 16700 }, { "epoch": 1.68, "grad_norm": 25.425147897472836, "learning_rate": 4.8006090318670675e-06, "loss": 1.0148, "step": 16705 }, { "epoch": 1.68, "grad_norm": 8.894959658277786, "learning_rate": 4.797678401551172e-06, "loss": 1.046, "step": 16710 }, { "epoch": 1.69, "grad_norm": 6.720123354590075, "learning_rate": 4.794747840853943e-06, "loss": 0.9894, "step": 16715 }, { "epoch": 1.69, "grad_norm": 9.45932942523887, "learning_rate": 4.791817350783788e-06, "loss": 1.0095, "step": 16720 }, { "epoch": 1.69, "grad_norm": 7.482267961424742, "learning_rate": 4.7888869323490805e-06, "loss": 1.0267, "step": 16725 }, { "epoch": 1.69, "grad_norm": 7.750108166742298, "learning_rate": 4.785956586558182e-06, "loss": 1.01, "step": 16730 }, { "epoch": 1.69, "grad_norm": 7.196071199273943, "learning_rate": 4.783026314419414e-06, "loss": 1.0019, "step": 16735 }, { "epoch": 1.69, "grad_norm": 7.908790043567544, "learning_rate": 4.780096116941087e-06, "loss": 1.0099, "step": 16740 }, { "epoch": 1.69, "grad_norm": 6.650041814165618, "learning_rate": 4.777165995131473e-06, "loss": 1.0018, "step": 16745 }, { "epoch": 1.69, "grad_norm": 17.682189805974183, "learning_rate": 4.77423594999883e-06, "loss": 1.0463, "step": 16750 }, { "epoch": 1.69, "grad_norm": 5.3288850258912115, "learning_rate": 4.771305982551381e-06, "loss": 0.9903, "step": 16755 }, { "epoch": 1.69, "grad_norm": 26.904240163387783, "learning_rate": 4.7683760937973235e-06, "loss": 1.0455, "step": 16760 }, { "epoch": 1.69, "grad_norm": 40.74926258668739, "learning_rate": 4.7654462847448316e-06, "loss": 1.0471, "step": 16765 }, { "epoch": 1.69, "grad_norm": 8.09515246288266, "learning_rate": 4.762516556402048e-06, "loss": 0.9861, "step": 16770 }, { "epoch": 1.69, "grad_norm": 23.91326969614342, "learning_rate": 4.759586909777092e-06, "loss": 0.9822, "step": 16775 }, { "epoch": 1.69, "grad_norm": 5.47668710150236, "learning_rate": 4.75665734587805e-06, "loss": 1.0411, "step": 16780 }, { "epoch": 1.69, "grad_norm": 25.25118285825452, "learning_rate": 4.753727865712983e-06, "loss": 0.9719, "step": 16785 }, { "epoch": 1.69, "grad_norm": 8.448283026301109, "learning_rate": 4.750798470289922e-06, "loss": 1.0017, "step": 16790 }, { "epoch": 1.69, "grad_norm": 14.430106955844881, "learning_rate": 4.74786916061687e-06, "loss": 0.973, "step": 16795 }, { "epoch": 1.69, "grad_norm": 5.983024090698553, "learning_rate": 4.744939937701797e-06, "loss": 1.0874, "step": 16800 }, { "epoch": 1.69, "grad_norm": 48.062147831281614, "learning_rate": 4.74201080255265e-06, "loss": 0.9621, "step": 16805 }, { "epoch": 1.69, "grad_norm": 8.270595501676446, "learning_rate": 4.739081756177339e-06, "loss": 0.9702, "step": 16810 }, { "epoch": 1.7, "grad_norm": 13.539937937696077, "learning_rate": 4.736152799583746e-06, "loss": 0.9727, "step": 16815 }, { "epoch": 1.7, "grad_norm": 10.640223121696199, "learning_rate": 4.733223933779723e-06, "loss": 1.0139, "step": 16820 }, { "epoch": 1.7, "grad_norm": 10.791427548679868, "learning_rate": 4.73029515977309e-06, "loss": 1.0287, "step": 16825 }, { "epoch": 1.7, "grad_norm": 20.742115000163974, "learning_rate": 4.727366478571633e-06, "loss": 1.0327, "step": 16830 }, { "epoch": 1.7, "grad_norm": 6.944895323419959, "learning_rate": 4.724437891183112e-06, "loss": 0.9897, "step": 16835 }, { "epoch": 1.7, "grad_norm": 13.526581874208533, "learning_rate": 4.721509398615249e-06, "loss": 0.9737, "step": 16840 }, { "epoch": 1.7, "grad_norm": 9.652363648702273, "learning_rate": 4.718581001875737e-06, "loss": 1.0013, "step": 16845 }, { "epoch": 1.7, "grad_norm": 5.278136771376039, "learning_rate": 4.715652701972233e-06, "loss": 0.9977, "step": 16850 }, { "epoch": 1.7, "grad_norm": 5.61950884273999, "learning_rate": 4.712724499912362e-06, "loss": 1.0335, "step": 16855 }, { "epoch": 1.7, "grad_norm": 7.070495461354499, "learning_rate": 4.709796396703715e-06, "loss": 0.942, "step": 16860 }, { "epoch": 1.7, "grad_norm": 13.222729282641115, "learning_rate": 4.706868393353854e-06, "loss": 0.9613, "step": 16865 }, { "epoch": 1.7, "grad_norm": 5.940206553289965, "learning_rate": 4.703940490870296e-06, "loss": 1.0139, "step": 16870 }, { "epoch": 1.7, "grad_norm": 6.654456357112275, "learning_rate": 4.701012690260534e-06, "loss": 1.0285, "step": 16875 }, { "epoch": 1.7, "grad_norm": 7.150880433070066, "learning_rate": 4.69808499253202e-06, "loss": 0.9728, "step": 16880 }, { "epoch": 1.7, "grad_norm": 6.347270980874507, "learning_rate": 4.695157398692171e-06, "loss": 1.0049, "step": 16885 }, { "epoch": 1.7, "grad_norm": 8.608674093571464, "learning_rate": 4.69222990974837e-06, "loss": 0.9961, "step": 16890 }, { "epoch": 1.7, "grad_norm": 10.716642295325675, "learning_rate": 4.689302526707961e-06, "loss": 0.9758, "step": 16895 }, { "epoch": 1.7, "grad_norm": 22.581713134228753, "learning_rate": 4.686375250578259e-06, "loss": 0.9866, "step": 16900 }, { "epoch": 1.7, "grad_norm": 16.87550070685061, "learning_rate": 4.683448082366532e-06, "loss": 1.0443, "step": 16905 }, { "epoch": 1.7, "grad_norm": 5.306310736277971, "learning_rate": 4.680521023080018e-06, "loss": 1.0307, "step": 16910 }, { "epoch": 1.71, "grad_norm": 14.584978128407352, "learning_rate": 4.677594073725915e-06, "loss": 1.0003, "step": 16915 }, { "epoch": 1.71, "grad_norm": 16.88851693358926, "learning_rate": 4.674667235311384e-06, "loss": 1.0235, "step": 16920 }, { "epoch": 1.71, "grad_norm": 30.054488713226664, "learning_rate": 4.6717405088435445e-06, "loss": 0.9955, "step": 16925 }, { "epoch": 1.71, "grad_norm": 13.621390724593681, "learning_rate": 4.668813895329483e-06, "loss": 1.0425, "step": 16930 }, { "epoch": 1.71, "grad_norm": 11.266159283078437, "learning_rate": 4.6658873957762445e-06, "loss": 1.0256, "step": 16935 }, { "epoch": 1.71, "grad_norm": 5.748902310168273, "learning_rate": 4.662961011190835e-06, "loss": 0.9987, "step": 16940 }, { "epoch": 1.71, "grad_norm": 5.719847757693675, "learning_rate": 4.660034742580218e-06, "loss": 0.9898, "step": 16945 }, { "epoch": 1.71, "grad_norm": 7.953366372779367, "learning_rate": 4.657108590951323e-06, "loss": 0.9953, "step": 16950 }, { "epoch": 1.71, "grad_norm": 13.829895806922655, "learning_rate": 4.654182557311031e-06, "loss": 1.0197, "step": 16955 }, { "epoch": 1.71, "grad_norm": 9.987562304626971, "learning_rate": 4.651256642666194e-06, "loss": 1.0042, "step": 16960 }, { "epoch": 1.71, "grad_norm": 22.439513059825465, "learning_rate": 4.648330848023611e-06, "loss": 1.0018, "step": 16965 }, { "epoch": 1.71, "grad_norm": 6.647793940331765, "learning_rate": 4.645405174390049e-06, "loss": 1.0185, "step": 16970 }, { "epoch": 1.71, "grad_norm": 9.646874515262727, "learning_rate": 4.642479622772227e-06, "loss": 0.9786, "step": 16975 }, { "epoch": 1.71, "grad_norm": 10.023487564809672, "learning_rate": 4.639554194176827e-06, "loss": 1.0247, "step": 16980 }, { "epoch": 1.71, "grad_norm": 5.715892801327604, "learning_rate": 4.636628889610481e-06, "loss": 1.0001, "step": 16985 }, { "epoch": 1.71, "grad_norm": 18.11156588494887, "learning_rate": 4.63370371007979e-06, "loss": 1.0271, "step": 16990 }, { "epoch": 1.71, "grad_norm": 22.52875419448177, "learning_rate": 4.630778656591301e-06, "loss": 0.9778, "step": 16995 }, { "epoch": 1.71, "grad_norm": 10.726171691868798, "learning_rate": 4.6278537301515256e-06, "loss": 0.967, "step": 17000 }, { "epoch": 1.71, "grad_norm": 23.2971827059768, "learning_rate": 4.624928931766924e-06, "loss": 1.0191, "step": 17005 }, { "epoch": 1.71, "grad_norm": 9.843956527421028, "learning_rate": 4.62200426244392e-06, "loss": 1.0643, "step": 17010 }, { "epoch": 1.72, "grad_norm": 28.33813073646957, "learning_rate": 4.619079723188889e-06, "loss": 1.001, "step": 17015 }, { "epoch": 1.72, "grad_norm": 26.971320195799127, "learning_rate": 4.616155315008159e-06, "loss": 1.0223, "step": 17020 }, { "epoch": 1.72, "grad_norm": 7.941925646816498, "learning_rate": 4.6132310389080205e-06, "loss": 1.0409, "step": 17025 }, { "epoch": 1.72, "grad_norm": 9.057853691194898, "learning_rate": 4.610306895894711e-06, "loss": 1.0512, "step": 17030 }, { "epoch": 1.72, "grad_norm": 14.217344829324652, "learning_rate": 4.607382886974428e-06, "loss": 0.9383, "step": 17035 }, { "epoch": 1.72, "grad_norm": 7.626275290185591, "learning_rate": 4.604459013153316e-06, "loss": 0.9898, "step": 17040 }, { "epoch": 1.72, "grad_norm": 6.647514563751912, "learning_rate": 4.601535275437482e-06, "loss": 0.987, "step": 17045 }, { "epoch": 1.72, "grad_norm": 8.78296404389408, "learning_rate": 4.598611674832977e-06, "loss": 1.033, "step": 17050 }, { "epoch": 1.72, "grad_norm": 5.24405677217857, "learning_rate": 4.595688212345814e-06, "loss": 1.022, "step": 17055 }, { "epoch": 1.72, "grad_norm": 15.862725670052217, "learning_rate": 4.592764888981948e-06, "loss": 1.0246, "step": 17060 }, { "epoch": 1.72, "grad_norm": 9.559220803954123, "learning_rate": 4.589841705747298e-06, "loss": 0.9701, "step": 17065 }, { "epoch": 1.72, "grad_norm": 7.9863393651332055, "learning_rate": 4.586918663647723e-06, "loss": 1.0066, "step": 17070 }, { "epoch": 1.72, "grad_norm": 8.517380585020579, "learning_rate": 4.583995763689043e-06, "loss": 0.994, "step": 17075 }, { "epoch": 1.72, "grad_norm": 17.356856092450435, "learning_rate": 4.581073006877021e-06, "loss": 0.9981, "step": 17080 }, { "epoch": 1.72, "grad_norm": 7.891020181152935, "learning_rate": 4.578150394217379e-06, "loss": 0.9576, "step": 17085 }, { "epoch": 1.72, "grad_norm": 6.722072676995166, "learning_rate": 4.575227926715783e-06, "loss": 0.9896, "step": 17090 }, { "epoch": 1.72, "grad_norm": 10.102300230727144, "learning_rate": 4.572305605377852e-06, "loss": 0.9656, "step": 17095 }, { "epoch": 1.72, "grad_norm": 12.58749119120385, "learning_rate": 4.569383431209153e-06, "loss": 0.9775, "step": 17100 }, { "epoch": 1.72, "grad_norm": 7.591727232593307, "learning_rate": 4.5664614052152056e-06, "loss": 0.9955, "step": 17105 }, { "epoch": 1.73, "grad_norm": 8.519427533241876, "learning_rate": 4.5635395284014714e-06, "loss": 0.9577, "step": 17110 }, { "epoch": 1.73, "grad_norm": 9.03651336287879, "learning_rate": 4.560617801773371e-06, "loss": 0.9762, "step": 17115 }, { "epoch": 1.73, "grad_norm": 5.72300338152943, "learning_rate": 4.557696226336265e-06, "loss": 0.9808, "step": 17120 }, { "epoch": 1.73, "grad_norm": 10.967811199066373, "learning_rate": 4.554774803095467e-06, "loss": 0.9577, "step": 17125 }, { "epoch": 1.73, "grad_norm": 20.01930160997758, "learning_rate": 4.551853533056231e-06, "loss": 0.9873, "step": 17130 }, { "epoch": 1.73, "grad_norm": 6.00087246915051, "learning_rate": 4.548932417223768e-06, "loss": 0.9588, "step": 17135 }, { "epoch": 1.73, "grad_norm": 18.4835082019977, "learning_rate": 4.546011456603229e-06, "loss": 1.0342, "step": 17140 }, { "epoch": 1.73, "grad_norm": 10.10114229581885, "learning_rate": 4.543090652199713e-06, "loss": 0.9772, "step": 17145 }, { "epoch": 1.73, "grad_norm": 10.719237844124875, "learning_rate": 4.540170005018269e-06, "loss": 0.9921, "step": 17150 }, { "epoch": 1.73, "grad_norm": 38.72733323117287, "learning_rate": 4.537249516063886e-06, "loss": 1.0065, "step": 17155 }, { "epoch": 1.73, "grad_norm": 18.655554225283357, "learning_rate": 4.534329186341503e-06, "loss": 1.0126, "step": 17160 }, { "epoch": 1.73, "grad_norm": 6.583481223929379, "learning_rate": 4.531409016856001e-06, "loss": 1.0153, "step": 17165 }, { "epoch": 1.73, "grad_norm": 17.630329560996714, "learning_rate": 4.528489008612211e-06, "loss": 0.9809, "step": 17170 }, { "epoch": 1.73, "grad_norm": 19.1968269463294, "learning_rate": 4.5255691626149e-06, "loss": 1.0535, "step": 17175 }, { "epoch": 1.73, "grad_norm": 7.514058147973323, "learning_rate": 4.52264947986879e-06, "loss": 1.0545, "step": 17180 }, { "epoch": 1.73, "grad_norm": 37.06831696340187, "learning_rate": 4.5197299613785355e-06, "loss": 0.9911, "step": 17185 }, { "epoch": 1.73, "grad_norm": 18.270934411108115, "learning_rate": 4.516810608148744e-06, "loss": 1.0212, "step": 17190 }, { "epoch": 1.73, "grad_norm": 5.937408149311286, "learning_rate": 4.5138914211839605e-06, "loss": 0.9908, "step": 17195 }, { "epoch": 1.73, "grad_norm": 13.308259612931357, "learning_rate": 4.510972401488675e-06, "loss": 1.0033, "step": 17200 }, { "epoch": 1.73, "grad_norm": 5.430432297635973, "learning_rate": 4.5080535500673175e-06, "loss": 1.0123, "step": 17205 }, { "epoch": 1.74, "grad_norm": 5.6980510237613915, "learning_rate": 4.505134867924266e-06, "loss": 1.0147, "step": 17210 }, { "epoch": 1.74, "grad_norm": 6.142734641134295, "learning_rate": 4.502216356063831e-06, "loss": 0.9704, "step": 17215 }, { "epoch": 1.74, "grad_norm": 6.755769632860416, "learning_rate": 4.499298015490275e-06, "loss": 0.9695, "step": 17220 }, { "epoch": 1.74, "grad_norm": 7.707542636435186, "learning_rate": 4.496379847207792e-06, "loss": 1.0009, "step": 17225 }, { "epoch": 1.74, "grad_norm": 17.107552234617984, "learning_rate": 4.493461852220524e-06, "loss": 1.0208, "step": 17230 }, { "epoch": 1.74, "grad_norm": 17.540283502674953, "learning_rate": 4.490544031532546e-06, "loss": 0.9819, "step": 17235 }, { "epoch": 1.74, "grad_norm": 7.8252134143751455, "learning_rate": 4.487626386147884e-06, "loss": 1.0146, "step": 17240 }, { "epoch": 1.74, "grad_norm": 14.68033910105838, "learning_rate": 4.48470891707049e-06, "loss": 1.0048, "step": 17245 }, { "epoch": 1.74, "grad_norm": 15.716785015753265, "learning_rate": 4.481791625304267e-06, "loss": 1.0389, "step": 17250 }, { "epoch": 1.74, "grad_norm": 13.748595545093872, "learning_rate": 4.478874511853051e-06, "loss": 0.9763, "step": 17255 }, { "epoch": 1.74, "grad_norm": 7.787979772469156, "learning_rate": 4.475957577720617e-06, "loss": 0.9967, "step": 17260 }, { "epoch": 1.74, "grad_norm": 8.623623483227947, "learning_rate": 4.473040823910681e-06, "loss": 1.051, "step": 17265 }, { "epoch": 1.74, "grad_norm": 7.899670736913321, "learning_rate": 4.4701242514268925e-06, "loss": 1.0022, "step": 17270 }, { "epoch": 1.74, "grad_norm": 6.208714786916086, "learning_rate": 4.467207861272846e-06, "loss": 1.0399, "step": 17275 }, { "epoch": 1.74, "grad_norm": 22.947292874334988, "learning_rate": 4.464291654452062e-06, "loss": 1.0067, "step": 17280 }, { "epoch": 1.74, "grad_norm": 14.629056253001238, "learning_rate": 4.46137563196801e-06, "loss": 0.9718, "step": 17285 }, { "epoch": 1.74, "grad_norm": 12.206480636864152, "learning_rate": 4.458459794824088e-06, "loss": 0.9993, "step": 17290 }, { "epoch": 1.74, "grad_norm": 4.803311960029917, "learning_rate": 4.455544144023635e-06, "loss": 0.9975, "step": 17295 }, { "epoch": 1.74, "grad_norm": 8.348121825749278, "learning_rate": 4.452628680569919e-06, "loss": 0.9796, "step": 17300 }, { "epoch": 1.74, "grad_norm": 6.288188813858446, "learning_rate": 4.449713405466154e-06, "loss": 0.9764, "step": 17305 }, { "epoch": 1.75, "grad_norm": 6.461168584339323, "learning_rate": 4.446798319715478e-06, "loss": 0.9525, "step": 17310 }, { "epoch": 1.75, "grad_norm": 6.714426835122893, "learning_rate": 4.443883424320974e-06, "loss": 1.0277, "step": 17315 }, { "epoch": 1.75, "grad_norm": 6.288258486608056, "learning_rate": 4.440968720285651e-06, "loss": 0.9845, "step": 17320 }, { "epoch": 1.75, "grad_norm": 4.975300294674621, "learning_rate": 4.438054208612459e-06, "loss": 0.9864, "step": 17325 }, { "epoch": 1.75, "grad_norm": 13.98775495317326, "learning_rate": 4.435139890304274e-06, "loss": 1.0285, "step": 17330 }, { "epoch": 1.75, "grad_norm": 8.307857755049275, "learning_rate": 4.4322257663639186e-06, "loss": 0.9848, "step": 17335 }, { "epoch": 1.75, "grad_norm": 6.699145118913506, "learning_rate": 4.4293118377941295e-06, "loss": 1.0009, "step": 17340 }, { "epoch": 1.75, "grad_norm": 8.902087498401224, "learning_rate": 4.426398105597595e-06, "loss": 1.0017, "step": 17345 }, { "epoch": 1.75, "grad_norm": 15.285934871439602, "learning_rate": 4.423484570776923e-06, "loss": 0.9993, "step": 17350 }, { "epoch": 1.75, "grad_norm": 10.455064209350546, "learning_rate": 4.420571234334661e-06, "loss": 1.0108, "step": 17355 }, { "epoch": 1.75, "grad_norm": 6.4557331429864115, "learning_rate": 4.417658097273282e-06, "loss": 0.9708, "step": 17360 }, { "epoch": 1.75, "grad_norm": 6.605121792110902, "learning_rate": 4.414745160595198e-06, "loss": 0.9765, "step": 17365 }, { "epoch": 1.75, "grad_norm": 5.619675388451704, "learning_rate": 4.411832425302742e-06, "loss": 1.0003, "step": 17370 }, { "epoch": 1.75, "grad_norm": 12.416456959181561, "learning_rate": 4.408919892398189e-06, "loss": 0.9591, "step": 17375 }, { "epoch": 1.75, "grad_norm": 5.601776990215138, "learning_rate": 4.406007562883736e-06, "loss": 1.0323, "step": 17380 }, { "epoch": 1.75, "grad_norm": 5.827378110856624, "learning_rate": 4.403095437761514e-06, "loss": 0.9753, "step": 17385 }, { "epoch": 1.75, "grad_norm": 10.286835453274794, "learning_rate": 4.400183518033579e-06, "loss": 0.9774, "step": 17390 }, { "epoch": 1.75, "grad_norm": 10.106658867927537, "learning_rate": 4.397271804701926e-06, "loss": 0.9667, "step": 17395 }, { "epoch": 1.75, "grad_norm": 10.385230411470802, "learning_rate": 4.394360298768469e-06, "loss": 0.9837, "step": 17400 }, { "epoch": 1.75, "grad_norm": 10.907928573203082, "learning_rate": 4.391449001235052e-06, "loss": 0.9549, "step": 17405 }, { "epoch": 1.76, "grad_norm": 9.133982511906204, "learning_rate": 4.388537913103454e-06, "loss": 1.0296, "step": 17410 }, { "epoch": 1.76, "grad_norm": 7.0215145597485265, "learning_rate": 4.385627035375377e-06, "loss": 1.0016, "step": 17415 }, { "epoch": 1.76, "grad_norm": 10.266710216270274, "learning_rate": 4.38271636905245e-06, "loss": 1.0154, "step": 17420 }, { "epoch": 1.76, "grad_norm": 7.678286453039773, "learning_rate": 4.3798059151362295e-06, "loss": 0.9848, "step": 17425 }, { "epoch": 1.76, "grad_norm": 6.549703960442, "learning_rate": 4.3768956746282026e-06, "loss": 1.0005, "step": 17430 }, { "epoch": 1.76, "grad_norm": 29.5138649807499, "learning_rate": 4.373985648529777e-06, "loss": 0.9626, "step": 17435 }, { "epoch": 1.76, "grad_norm": 5.480192541997506, "learning_rate": 4.371075837842294e-06, "loss": 0.9756, "step": 17440 }, { "epoch": 1.76, "grad_norm": 14.568325674625413, "learning_rate": 4.368166243567013e-06, "loss": 1.006, "step": 17445 }, { "epoch": 1.76, "grad_norm": 12.115886297919321, "learning_rate": 4.365256866705126e-06, "loss": 0.9651, "step": 17450 }, { "epoch": 1.76, "grad_norm": 5.275895266537964, "learning_rate": 4.362347708257743e-06, "loss": 0.962, "step": 17455 }, { "epoch": 1.76, "grad_norm": 7.259529619695632, "learning_rate": 4.359438769225906e-06, "loss": 1.0017, "step": 17460 }, { "epoch": 1.76, "grad_norm": 5.857933774426776, "learning_rate": 4.3565300506105745e-06, "loss": 0.9852, "step": 17465 }, { "epoch": 1.76, "grad_norm": 6.629255297123736, "learning_rate": 4.3536215534126404e-06, "loss": 0.9695, "step": 17470 }, { "epoch": 1.76, "grad_norm": 5.3825534811967355, "learning_rate": 4.350713278632911e-06, "loss": 0.9809, "step": 17475 }, { "epoch": 1.76, "grad_norm": 5.394150217809622, "learning_rate": 4.3478052272721234e-06, "loss": 0.9942, "step": 17480 }, { "epoch": 1.76, "grad_norm": 6.22363972614146, "learning_rate": 4.344897400330933e-06, "loss": 0.9782, "step": 17485 }, { "epoch": 1.76, "grad_norm": 9.202572061556888, "learning_rate": 4.341989798809923e-06, "loss": 0.9811, "step": 17490 }, { "epoch": 1.76, "grad_norm": 6.541440915741455, "learning_rate": 4.339082423709592e-06, "loss": 0.9589, "step": 17495 }, { "epoch": 1.76, "grad_norm": 5.146462234492245, "learning_rate": 4.33617527603037e-06, "loss": 0.9939, "step": 17500 }, { "epoch": 1.76, "grad_norm": 5.2775085198018195, "learning_rate": 4.3332683567726e-06, "loss": 0.9663, "step": 17505 }, { "epoch": 1.77, "grad_norm": 13.61356411742695, "learning_rate": 4.330361666936555e-06, "loss": 0.9924, "step": 17510 }, { "epoch": 1.77, "grad_norm": 16.77351936985382, "learning_rate": 4.327455207522417e-06, "loss": 1.0308, "step": 17515 }, { "epoch": 1.77, "grad_norm": 14.68995331942081, "learning_rate": 4.3245489795303025e-06, "loss": 1.0281, "step": 17520 }, { "epoch": 1.77, "grad_norm": 15.917899439391954, "learning_rate": 4.321642983960238e-06, "loss": 0.9828, "step": 17525 }, { "epoch": 1.77, "grad_norm": 6.713513046333969, "learning_rate": 4.318737221812173e-06, "loss": 1.0316, "step": 17530 }, { "epoch": 1.77, "grad_norm": 7.220594770385739, "learning_rate": 4.315831694085982e-06, "loss": 0.9288, "step": 17535 }, { "epoch": 1.77, "grad_norm": 9.360087324298462, "learning_rate": 4.31292640178145e-06, "loss": 0.9691, "step": 17540 }, { "epoch": 1.77, "grad_norm": 14.104011531131553, "learning_rate": 4.310021345898288e-06, "loss": 0.9856, "step": 17545 }, { "epoch": 1.77, "grad_norm": 6.394733795721605, "learning_rate": 4.307116527436121e-06, "loss": 0.9483, "step": 17550 }, { "epoch": 1.77, "grad_norm": 5.6137553038406685, "learning_rate": 4.304211947394496e-06, "loss": 0.9874, "step": 17555 }, { "epoch": 1.77, "grad_norm": 17.249399564508376, "learning_rate": 4.301307606772873e-06, "loss": 0.9847, "step": 17560 }, { "epoch": 1.77, "grad_norm": 6.970658585095424, "learning_rate": 4.298403506570638e-06, "loss": 0.992, "step": 17565 }, { "epoch": 1.77, "grad_norm": 20.018871333149153, "learning_rate": 4.295499647787085e-06, "loss": 0.9623, "step": 17570 }, { "epoch": 1.77, "grad_norm": 8.33446100071459, "learning_rate": 4.292596031421431e-06, "loss": 0.9659, "step": 17575 }, { "epoch": 1.77, "grad_norm": 5.655812375433922, "learning_rate": 4.289692658472806e-06, "loss": 0.9549, "step": 17580 }, { "epoch": 1.77, "grad_norm": 16.006061782628333, "learning_rate": 4.28678952994026e-06, "loss": 0.978, "step": 17585 }, { "epoch": 1.77, "grad_norm": 10.31919048127266, "learning_rate": 4.283886646822753e-06, "loss": 0.9939, "step": 17590 }, { "epoch": 1.77, "grad_norm": 4.898513394142511, "learning_rate": 4.2809840101191695e-06, "loss": 0.9559, "step": 17595 }, { "epoch": 1.77, "grad_norm": 22.10211628788823, "learning_rate": 4.2780816208282995e-06, "loss": 0.9865, "step": 17600 }, { "epoch": 1.77, "grad_norm": 13.142924272084487, "learning_rate": 4.275179479948856e-06, "loss": 0.9863, "step": 17605 }, { "epoch": 1.78, "grad_norm": 43.70523431723558, "learning_rate": 4.272277588479461e-06, "loss": 0.9565, "step": 17610 }, { "epoch": 1.78, "grad_norm": 22.507175546613677, "learning_rate": 4.269375947418652e-06, "loss": 0.9726, "step": 17615 }, { "epoch": 1.78, "grad_norm": 17.30146559379677, "learning_rate": 4.2664745577648806e-06, "loss": 1.0624, "step": 17620 }, { "epoch": 1.78, "grad_norm": 25.06644250902353, "learning_rate": 4.2635734205165155e-06, "loss": 1.0045, "step": 17625 }, { "epoch": 1.78, "grad_norm": 8.910629056259536, "learning_rate": 4.260672536671832e-06, "loss": 0.9652, "step": 17630 }, { "epoch": 1.78, "grad_norm": 15.806887353642972, "learning_rate": 4.257771907229023e-06, "loss": 0.969, "step": 17635 }, { "epoch": 1.78, "grad_norm": 5.1094952043062385, "learning_rate": 4.254871533186191e-06, "loss": 1.003, "step": 17640 }, { "epoch": 1.78, "grad_norm": 20.289905829013996, "learning_rate": 4.2519714155413534e-06, "loss": 1.0062, "step": 17645 }, { "epoch": 1.78, "grad_norm": 6.23840709833831, "learning_rate": 4.249071555292438e-06, "loss": 0.964, "step": 17650 }, { "epoch": 1.78, "grad_norm": 37.41253753110922, "learning_rate": 4.246171953437281e-06, "loss": 1.0014, "step": 17655 }, { "epoch": 1.78, "grad_norm": 6.888293950500067, "learning_rate": 4.2432726109736365e-06, "loss": 0.9665, "step": 17660 }, { "epoch": 1.78, "grad_norm": 5.644402360932366, "learning_rate": 4.240373528899163e-06, "loss": 0.9564, "step": 17665 }, { "epoch": 1.78, "grad_norm": 6.026582771228052, "learning_rate": 4.237474708211434e-06, "loss": 0.9931, "step": 17670 }, { "epoch": 1.78, "grad_norm": 10.193252882210064, "learning_rate": 4.234576149907928e-06, "loss": 1.0125, "step": 17675 }, { "epoch": 1.78, "grad_norm": 13.234158378729202, "learning_rate": 4.2316778549860395e-06, "loss": 0.9782, "step": 17680 }, { "epoch": 1.78, "grad_norm": 7.290440088877322, "learning_rate": 4.228779824443065e-06, "loss": 1.0064, "step": 17685 }, { "epoch": 1.78, "grad_norm": 11.854939976517208, "learning_rate": 4.225882059276217e-06, "loss": 1.0144, "step": 17690 }, { "epoch": 1.78, "grad_norm": 6.235898705036122, "learning_rate": 4.222984560482614e-06, "loss": 0.9722, "step": 17695 }, { "epoch": 1.78, "grad_norm": 18.231748698058492, "learning_rate": 4.220087329059281e-06, "loss": 0.9929, "step": 17700 }, { "epoch": 1.79, "grad_norm": 6.612949224990907, "learning_rate": 4.2171903660031535e-06, "loss": 0.9802, "step": 17705 }, { "epoch": 1.79, "grad_norm": 18.64871835788384, "learning_rate": 4.214293672311073e-06, "loss": 1.0134, "step": 17710 }, { "epoch": 1.79, "grad_norm": 27.804267649031544, "learning_rate": 4.211397248979789e-06, "loss": 0.9994, "step": 17715 }, { "epoch": 1.79, "grad_norm": 6.232331289327623, "learning_rate": 4.208501097005959e-06, "loss": 0.9865, "step": 17720 }, { "epoch": 1.79, "grad_norm": 7.36852730763666, "learning_rate": 4.205605217386145e-06, "loss": 0.9561, "step": 17725 }, { "epoch": 1.79, "grad_norm": 5.900038904042403, "learning_rate": 4.202709611116818e-06, "loss": 0.996, "step": 17730 }, { "epoch": 1.79, "grad_norm": 29.413539099777385, "learning_rate": 4.1998142791943515e-06, "loss": 1.0042, "step": 17735 }, { "epoch": 1.79, "grad_norm": 11.892005516743971, "learning_rate": 4.196919222615029e-06, "loss": 1.0257, "step": 17740 }, { "epoch": 1.79, "grad_norm": 6.966387701303805, "learning_rate": 4.194024442375032e-06, "loss": 0.9833, "step": 17745 }, { "epoch": 1.79, "grad_norm": 8.231042995618557, "learning_rate": 4.191129939470457e-06, "loss": 0.9778, "step": 17750 }, { "epoch": 1.79, "grad_norm": 16.639381946636583, "learning_rate": 4.188235714897296e-06, "loss": 0.9758, "step": 17755 }, { "epoch": 1.79, "grad_norm": 5.232707031871469, "learning_rate": 4.185341769651451e-06, "loss": 0.9965, "step": 17760 }, { "epoch": 1.79, "grad_norm": 4.799057501236086, "learning_rate": 4.1824481047287244e-06, "loss": 0.9631, "step": 17765 }, { "epoch": 1.79, "grad_norm": 18.93211267186369, "learning_rate": 4.179554721124825e-06, "loss": 1.0005, "step": 17770 }, { "epoch": 1.79, "grad_norm": 6.342829499951731, "learning_rate": 4.17666161983536e-06, "loss": 0.9595, "step": 17775 }, { "epoch": 1.79, "grad_norm": 7.298086003533476, "learning_rate": 4.1737688018558476e-06, "loss": 1.0069, "step": 17780 }, { "epoch": 1.79, "grad_norm": 9.309454412926847, "learning_rate": 4.170876268181703e-06, "loss": 0.9926, "step": 17785 }, { "epoch": 1.79, "grad_norm": 8.48349237724496, "learning_rate": 4.167984019808238e-06, "loss": 0.9817, "step": 17790 }, { "epoch": 1.79, "grad_norm": 7.696426460645399, "learning_rate": 4.165092057730681e-06, "loss": 1.0428, "step": 17795 }, { "epoch": 1.79, "grad_norm": 5.215360712972935, "learning_rate": 4.1622003829441474e-06, "loss": 0.987, "step": 17800 }, { "epoch": 1.8, "grad_norm": 5.644196565159671, "learning_rate": 4.1593089964436625e-06, "loss": 0.977, "step": 17805 }, { "epoch": 1.8, "grad_norm": 5.107974839971601, "learning_rate": 4.156417899224147e-06, "loss": 0.9941, "step": 17810 }, { "epoch": 1.8, "grad_norm": 5.301233945541358, "learning_rate": 4.153527092280431e-06, "loss": 1.0134, "step": 17815 }, { "epoch": 1.8, "grad_norm": 7.220120395735668, "learning_rate": 4.15063657660723e-06, "loss": 1.0002, "step": 17820 }, { "epoch": 1.8, "grad_norm": 15.811694128818516, "learning_rate": 4.147746353199174e-06, "loss": 0.9888, "step": 17825 }, { "epoch": 1.8, "grad_norm": 12.713258453209393, "learning_rate": 4.144856423050783e-06, "loss": 0.9514, "step": 17830 }, { "epoch": 1.8, "grad_norm": 10.175345447281716, "learning_rate": 4.14196678715648e-06, "loss": 0.9518, "step": 17835 }, { "epoch": 1.8, "grad_norm": 7.196682747430522, "learning_rate": 4.139077446510585e-06, "loss": 1.0259, "step": 17840 }, { "epoch": 1.8, "grad_norm": 10.721920938939746, "learning_rate": 4.136188402107322e-06, "loss": 0.9831, "step": 17845 }, { "epoch": 1.8, "grad_norm": 16.312284928694034, "learning_rate": 4.133299654940801e-06, "loss": 0.9578, "step": 17850 }, { "epoch": 1.8, "grad_norm": 8.727399474249944, "learning_rate": 4.130411206005043e-06, "loss": 1.0198, "step": 17855 }, { "epoch": 1.8, "grad_norm": 8.203930546045322, "learning_rate": 4.127523056293958e-06, "loss": 1.0151, "step": 17860 }, { "epoch": 1.8, "grad_norm": 6.905780390122556, "learning_rate": 4.124635206801356e-06, "loss": 0.9554, "step": 17865 }, { "epoch": 1.8, "grad_norm": 14.270094254344347, "learning_rate": 4.121747658520942e-06, "loss": 0.9901, "step": 17870 }, { "epoch": 1.8, "grad_norm": 24.142268165349467, "learning_rate": 4.1188604124463224e-06, "loss": 1.0144, "step": 17875 }, { "epoch": 1.8, "grad_norm": 6.472023048193844, "learning_rate": 4.115973469570991e-06, "loss": 0.9952, "step": 17880 }, { "epoch": 1.8, "grad_norm": 7.9995434492814255, "learning_rate": 4.113086830888346e-06, "loss": 1.0237, "step": 17885 }, { "epoch": 1.8, "grad_norm": 6.160793829862443, "learning_rate": 4.1102004973916744e-06, "loss": 0.9899, "step": 17890 }, { "epoch": 1.8, "grad_norm": 9.314565949813037, "learning_rate": 4.107314470074163e-06, "loss": 0.9722, "step": 17895 }, { "epoch": 1.8, "grad_norm": 11.753414499732502, "learning_rate": 4.104428749928887e-06, "loss": 0.9672, "step": 17900 }, { "epoch": 1.81, "grad_norm": 11.277581642321612, "learning_rate": 4.101543337948826e-06, "loss": 0.9993, "step": 17905 }, { "epoch": 1.81, "grad_norm": 9.02303347969287, "learning_rate": 4.0986582351268445e-06, "loss": 0.9497, "step": 17910 }, { "epoch": 1.81, "grad_norm": 22.410472744076394, "learning_rate": 4.0957734424557e-06, "loss": 1.0122, "step": 17915 }, { "epoch": 1.81, "grad_norm": 25.44505269635536, "learning_rate": 4.092888960928053e-06, "loss": 0.9531, "step": 17920 }, { "epoch": 1.81, "grad_norm": 6.758375815804422, "learning_rate": 4.090004791536447e-06, "loss": 1.0082, "step": 17925 }, { "epoch": 1.81, "grad_norm": 7.558547984958102, "learning_rate": 4.0871209352733244e-06, "loss": 1.0401, "step": 17930 }, { "epoch": 1.81, "grad_norm": 4.957332675445743, "learning_rate": 4.084237393131015e-06, "loss": 1.0092, "step": 17935 }, { "epoch": 1.81, "grad_norm": 5.890734746420278, "learning_rate": 4.081354166101744e-06, "loss": 0.9885, "step": 17940 }, { "epoch": 1.81, "grad_norm": 6.54145049689193, "learning_rate": 4.078471255177626e-06, "loss": 0.981, "step": 17945 }, { "epoch": 1.81, "grad_norm": 23.910132442804926, "learning_rate": 4.075588661350671e-06, "loss": 1.0282, "step": 17950 }, { "epoch": 1.81, "grad_norm": 5.24292684826029, "learning_rate": 4.072706385612773e-06, "loss": 0.9975, "step": 17955 }, { "epoch": 1.81, "grad_norm": 19.115389829935094, "learning_rate": 4.069824428955724e-06, "loss": 0.9957, "step": 17960 }, { "epoch": 1.81, "grad_norm": 4.945914484492379, "learning_rate": 4.066942792371198e-06, "loss": 0.9641, "step": 17965 }, { "epoch": 1.81, "grad_norm": 10.521339685687494, "learning_rate": 4.064061476850769e-06, "loss": 0.9963, "step": 17970 }, { "epoch": 1.81, "grad_norm": 5.894472343268108, "learning_rate": 4.0611804833858885e-06, "loss": 1.0377, "step": 17975 }, { "epoch": 1.81, "grad_norm": 13.576633426606861, "learning_rate": 4.05829981296791e-06, "loss": 0.9905, "step": 17980 }, { "epoch": 1.81, "grad_norm": 24.05532145531544, "learning_rate": 4.055419466588064e-06, "loss": 1.0365, "step": 17985 }, { "epoch": 1.81, "grad_norm": 6.015289000674614, "learning_rate": 4.05253944523748e-06, "loss": 0.9566, "step": 17990 }, { "epoch": 1.81, "grad_norm": 8.899950989787076, "learning_rate": 4.049659749907165e-06, "loss": 1.0058, "step": 17995 }, { "epoch": 1.81, "grad_norm": 7.243197684418895, "learning_rate": 4.046780381588025e-06, "loss": 1.0171, "step": 18000 }, { "epoch": 1.82, "grad_norm": 6.474829652542478, "learning_rate": 4.043901341270843e-06, "loss": 0.9419, "step": 18005 }, { "epoch": 1.82, "grad_norm": 6.102354302507712, "learning_rate": 4.041022629946298e-06, "loss": 0.9974, "step": 18010 }, { "epoch": 1.82, "grad_norm": 6.581577273091294, "learning_rate": 4.038144248604949e-06, "loss": 0.9694, "step": 18015 }, { "epoch": 1.82, "grad_norm": 5.7366265831491345, "learning_rate": 4.035266198237247e-06, "loss": 1.0029, "step": 18020 }, { "epoch": 1.82, "grad_norm": 11.450022518303586, "learning_rate": 4.032388479833522e-06, "loss": 0.9699, "step": 18025 }, { "epoch": 1.82, "grad_norm": 21.668572220867887, "learning_rate": 4.029511094384001e-06, "loss": 0.9723, "step": 18030 }, { "epoch": 1.82, "grad_norm": 9.873050740877582, "learning_rate": 4.026634042878782e-06, "loss": 0.9608, "step": 18035 }, { "epoch": 1.82, "grad_norm": 4.936992079591061, "learning_rate": 4.02375732630786e-06, "loss": 0.9841, "step": 18040 }, { "epoch": 1.82, "grad_norm": 16.356020950439156, "learning_rate": 4.020880945661111e-06, "loss": 0.9837, "step": 18045 }, { "epoch": 1.82, "grad_norm": 5.460548595726973, "learning_rate": 4.0180049019282916e-06, "loss": 0.9915, "step": 18050 }, { "epoch": 1.82, "grad_norm": 7.417525899749307, "learning_rate": 4.015129196099049e-06, "loss": 1.0166, "step": 18055 }, { "epoch": 1.82, "grad_norm": 10.20607296161159, "learning_rate": 4.012253829162908e-06, "loss": 0.9829, "step": 18060 }, { "epoch": 1.82, "grad_norm": 4.930258224866766, "learning_rate": 4.0093788021092826e-06, "loss": 0.991, "step": 18065 }, { "epoch": 1.82, "grad_norm": 7.401745585192158, "learning_rate": 4.006504115927463e-06, "loss": 1.0106, "step": 18070 }, { "epoch": 1.82, "grad_norm": 7.793097611353607, "learning_rate": 4.0036297716066295e-06, "loss": 1.0181, "step": 18075 }, { "epoch": 1.82, "grad_norm": 6.639441474692842, "learning_rate": 4.0007557701358405e-06, "loss": 0.9854, "step": 18080 }, { "epoch": 1.82, "grad_norm": 4.979689799633668, "learning_rate": 3.997882112504036e-06, "loss": 0.9966, "step": 18085 }, { "epoch": 1.82, "grad_norm": 18.30019551431489, "learning_rate": 3.995008799700039e-06, "loss": 0.9399, "step": 18090 }, { "epoch": 1.82, "grad_norm": 9.010236910045, "learning_rate": 3.992135832712555e-06, "loss": 0.973, "step": 18095 }, { "epoch": 1.82, "grad_norm": 14.357174340154753, "learning_rate": 3.9892632125301664e-06, "loss": 0.9703, "step": 18100 }, { "epoch": 1.83, "grad_norm": 6.800647275884193, "learning_rate": 3.986390940141344e-06, "loss": 1.0361, "step": 18105 }, { "epoch": 1.83, "grad_norm": 10.66733919132119, "learning_rate": 3.983519016534429e-06, "loss": 0.9551, "step": 18110 }, { "epoch": 1.83, "grad_norm": 10.08796648741792, "learning_rate": 3.980647442697651e-06, "loss": 0.9354, "step": 18115 }, { "epoch": 1.83, "grad_norm": 4.8986084699880825, "learning_rate": 3.977776219619113e-06, "loss": 0.9813, "step": 18120 }, { "epoch": 1.83, "grad_norm": 4.779198222654169, "learning_rate": 3.974905348286803e-06, "loss": 1.004, "step": 18125 }, { "epoch": 1.83, "grad_norm": 11.207525881092016, "learning_rate": 3.9720348296885805e-06, "loss": 0.9679, "step": 18130 }, { "epoch": 1.83, "grad_norm": 12.756377143981297, "learning_rate": 3.969164664812194e-06, "loss": 0.9359, "step": 18135 }, { "epoch": 1.83, "grad_norm": 7.282128100521323, "learning_rate": 3.966294854645261e-06, "loss": 0.943, "step": 18140 }, { "epoch": 1.83, "grad_norm": 8.343543523016564, "learning_rate": 3.963425400175282e-06, "loss": 0.9858, "step": 18145 }, { "epoch": 1.83, "grad_norm": 14.959195280855614, "learning_rate": 3.960556302389632e-06, "loss": 0.9732, "step": 18150 }, { "epoch": 1.83, "grad_norm": 5.4525478859237255, "learning_rate": 3.957687562275566e-06, "loss": 0.9777, "step": 18155 }, { "epoch": 1.83, "grad_norm": 6.0295155709128565, "learning_rate": 3.954819180820213e-06, "loss": 1.0042, "step": 18160 }, { "epoch": 1.83, "grad_norm": 5.130634304881189, "learning_rate": 3.951951159010583e-06, "loss": 0.9856, "step": 18165 }, { "epoch": 1.83, "grad_norm": 9.513383734699328, "learning_rate": 3.949083497833558e-06, "loss": 0.9726, "step": 18170 }, { "epoch": 1.83, "grad_norm": 5.615005606728285, "learning_rate": 3.946216198275897e-06, "loss": 0.9881, "step": 18175 }, { "epoch": 1.83, "grad_norm": 5.967220890591053, "learning_rate": 3.943349261324237e-06, "loss": 0.9645, "step": 18180 }, { "epoch": 1.83, "grad_norm": 8.03922077754858, "learning_rate": 3.940482687965085e-06, "loss": 1.0003, "step": 18185 }, { "epoch": 1.83, "grad_norm": 9.307895107855172, "learning_rate": 3.937616479184831e-06, "loss": 0.9933, "step": 18190 }, { "epoch": 1.83, "grad_norm": 18.744700072412474, "learning_rate": 3.934750635969728e-06, "loss": 0.9654, "step": 18195 }, { "epoch": 1.83, "grad_norm": 11.952335132138158, "learning_rate": 3.931885159305916e-06, "loss": 1.0467, "step": 18200 }, { "epoch": 1.84, "grad_norm": 15.245737402574543, "learning_rate": 3.9290200501794e-06, "loss": 0.9912, "step": 18205 }, { "epoch": 1.84, "grad_norm": 8.934123527505806, "learning_rate": 3.926155309576063e-06, "loss": 0.9621, "step": 18210 }, { "epoch": 1.84, "grad_norm": 6.822756484981494, "learning_rate": 3.923290938481657e-06, "loss": 0.9913, "step": 18215 }, { "epoch": 1.84, "grad_norm": 7.3094605135069015, "learning_rate": 3.920426937881812e-06, "loss": 1.0135, "step": 18220 }, { "epoch": 1.84, "grad_norm": 8.234278005327626, "learning_rate": 3.917563308762024e-06, "loss": 1.024, "step": 18225 }, { "epoch": 1.84, "grad_norm": 14.143253937630613, "learning_rate": 3.9147000521076695e-06, "loss": 1.0049, "step": 18230 }, { "epoch": 1.84, "grad_norm": 8.190834230959963, "learning_rate": 3.9118371689039905e-06, "loss": 1.0115, "step": 18235 }, { "epoch": 1.84, "grad_norm": 7.641550092853329, "learning_rate": 3.908974660136102e-06, "loss": 0.9952, "step": 18240 }, { "epoch": 1.84, "grad_norm": 8.542498068012963, "learning_rate": 3.906112526788991e-06, "loss": 0.9804, "step": 18245 }, { "epoch": 1.84, "grad_norm": 12.868475394958528, "learning_rate": 3.903250769847515e-06, "loss": 1.0248, "step": 18250 }, { "epoch": 1.84, "grad_norm": 10.90066644420374, "learning_rate": 3.9003893902964e-06, "loss": 0.9451, "step": 18255 }, { "epoch": 1.84, "grad_norm": 27.968509274783298, "learning_rate": 3.897528389120247e-06, "loss": 0.9905, "step": 18260 }, { "epoch": 1.84, "grad_norm": 8.71773024020709, "learning_rate": 3.894667767303523e-06, "loss": 0.9664, "step": 18265 }, { "epoch": 1.84, "grad_norm": 9.158395474912115, "learning_rate": 3.891807525830564e-06, "loss": 1.0286, "step": 18270 }, { "epoch": 1.84, "grad_norm": 5.322537717175965, "learning_rate": 3.888947665685576e-06, "loss": 1.0008, "step": 18275 }, { "epoch": 1.84, "grad_norm": 5.961458444995351, "learning_rate": 3.8860881878526365e-06, "loss": 0.9912, "step": 18280 }, { "epoch": 1.84, "grad_norm": 7.145722663367105, "learning_rate": 3.883229093315686e-06, "loss": 1.0161, "step": 18285 }, { "epoch": 1.84, "grad_norm": 5.074742049472983, "learning_rate": 3.880370383058539e-06, "loss": 0.9978, "step": 18290 }, { "epoch": 1.84, "grad_norm": 4.970232085990174, "learning_rate": 3.877512058064876e-06, "loss": 0.9639, "step": 18295 }, { "epoch": 1.85, "grad_norm": 8.412604579770981, "learning_rate": 3.8746541193182375e-06, "loss": 1.0341, "step": 18300 }, { "epoch": 1.85, "grad_norm": 8.680626298857828, "learning_rate": 3.871796567802044e-06, "loss": 0.9469, "step": 18305 }, { "epoch": 1.85, "grad_norm": 4.796765209774283, "learning_rate": 3.868939404499573e-06, "loss": 0.942, "step": 18310 }, { "epoch": 1.85, "grad_norm": 17.149810215370284, "learning_rate": 3.8660826303939745e-06, "loss": 0.9614, "step": 18315 }, { "epoch": 1.85, "grad_norm": 21.602876098567382, "learning_rate": 3.863226246468256e-06, "loss": 1.0041, "step": 18320 }, { "epoch": 1.85, "grad_norm": 5.5164022506358705, "learning_rate": 3.860370253705304e-06, "loss": 0.9953, "step": 18325 }, { "epoch": 1.85, "grad_norm": 6.346002360848893, "learning_rate": 3.857514653087857e-06, "loss": 0.9913, "step": 18330 }, { "epoch": 1.85, "grad_norm": 5.696269583932353, "learning_rate": 3.854659445598529e-06, "loss": 0.9501, "step": 18335 }, { "epoch": 1.85, "grad_norm": 13.613799479606872, "learning_rate": 3.85180463221979e-06, "loss": 0.9782, "step": 18340 }, { "epoch": 1.85, "grad_norm": 5.906200366057462, "learning_rate": 3.848950213933982e-06, "loss": 0.9995, "step": 18345 }, { "epoch": 1.85, "grad_norm": 9.985103632638992, "learning_rate": 3.846096191723303e-06, "loss": 1.0518, "step": 18350 }, { "epoch": 1.85, "grad_norm": 7.450855140786176, "learning_rate": 3.843242566569826e-06, "loss": 1.0015, "step": 18355 }, { "epoch": 1.85, "grad_norm": 7.17042311212853, "learning_rate": 3.840389339455474e-06, "loss": 0.9941, "step": 18360 }, { "epoch": 1.85, "grad_norm": 6.19466857070708, "learning_rate": 3.837536511362045e-06, "loss": 0.9699, "step": 18365 }, { "epoch": 1.85, "grad_norm": 10.901155997440986, "learning_rate": 3.834684083271191e-06, "loss": 0.9548, "step": 18370 }, { "epoch": 1.85, "grad_norm": 19.352982354625738, "learning_rate": 3.831832056164431e-06, "loss": 0.9756, "step": 18375 }, { "epoch": 1.85, "grad_norm": 6.70998177042718, "learning_rate": 3.8289804310231434e-06, "loss": 1.0063, "step": 18380 }, { "epoch": 1.85, "grad_norm": 25.9351320245653, "learning_rate": 3.826129208828573e-06, "loss": 0.9915, "step": 18385 }, { "epoch": 1.85, "grad_norm": 5.987625138296798, "learning_rate": 3.82327839056182e-06, "loss": 0.9812, "step": 18390 }, { "epoch": 1.85, "grad_norm": 13.41267269943096, "learning_rate": 3.8204279772038495e-06, "loss": 0.9616, "step": 18395 }, { "epoch": 1.86, "grad_norm": 5.9943162502046015, "learning_rate": 3.817577969735485e-06, "loss": 0.9764, "step": 18400 }, { "epoch": 1.86, "grad_norm": 5.261829409492166, "learning_rate": 3.8147283691374124e-06, "loss": 0.9682, "step": 18405 }, { "epoch": 1.86, "grad_norm": 6.718804953227591, "learning_rate": 3.811879176390174e-06, "loss": 0.9882, "step": 18410 }, { "epoch": 1.86, "grad_norm": 7.7449916557750855, "learning_rate": 3.8090303924741784e-06, "loss": 1.0081, "step": 18415 }, { "epoch": 1.86, "grad_norm": 19.46955376180154, "learning_rate": 3.8061820183696857e-06, "loss": 0.9514, "step": 18420 }, { "epoch": 1.86, "grad_norm": 6.0383605638679505, "learning_rate": 3.803334055056822e-06, "loss": 0.9281, "step": 18425 }, { "epoch": 1.86, "grad_norm": 7.50864837032302, "learning_rate": 3.8004865035155662e-06, "loss": 0.9906, "step": 18430 }, { "epoch": 1.86, "grad_norm": 4.858201015924298, "learning_rate": 3.797639364725758e-06, "loss": 0.928, "step": 18435 }, { "epoch": 1.86, "grad_norm": 12.82481969041467, "learning_rate": 3.7947926396670975e-06, "loss": 0.9848, "step": 18440 }, { "epoch": 1.86, "grad_norm": 6.861048594296718, "learning_rate": 3.791946329319136e-06, "loss": 0.9813, "step": 18445 }, { "epoch": 1.86, "grad_norm": 6.975396611645755, "learning_rate": 3.7891004346612925e-06, "loss": 0.9723, "step": 18450 }, { "epoch": 1.86, "grad_norm": 20.182853926554543, "learning_rate": 3.786254956672829e-06, "loss": 0.9931, "step": 18455 }, { "epoch": 1.86, "grad_norm": 13.837429575125533, "learning_rate": 3.7834098963328784e-06, "loss": 0.9738, "step": 18460 }, { "epoch": 1.86, "grad_norm": 5.216565893064262, "learning_rate": 3.7805652546204193e-06, "loss": 0.9949, "step": 18465 }, { "epoch": 1.86, "grad_norm": 11.355112575571148, "learning_rate": 3.777721032514292e-06, "loss": 0.9918, "step": 18470 }, { "epoch": 1.86, "grad_norm": 6.094017204819921, "learning_rate": 3.7748772309931887e-06, "loss": 0.9381, "step": 18475 }, { "epoch": 1.86, "grad_norm": 12.374752647571407, "learning_rate": 3.7720338510356623e-06, "loss": 0.9747, "step": 18480 }, { "epoch": 1.86, "grad_norm": 5.5966625675434125, "learning_rate": 3.7691908936201126e-06, "loss": 0.9333, "step": 18485 }, { "epoch": 1.86, "grad_norm": 5.305843062394117, "learning_rate": 3.7663483597248023e-06, "loss": 0.9548, "step": 18490 }, { "epoch": 1.86, "grad_norm": 9.114471686460963, "learning_rate": 3.7635062503278425e-06, "loss": 1.0119, "step": 18495 }, { "epoch": 1.87, "grad_norm": 5.816051820282711, "learning_rate": 3.7606645664072016e-06, "loss": 0.9708, "step": 18500 }, { "epoch": 1.87, "grad_norm": 9.49667170674841, "learning_rate": 3.7578233089406984e-06, "loss": 0.9589, "step": 18505 }, { "epoch": 1.87, "grad_norm": 5.761558730111664, "learning_rate": 3.754982478906011e-06, "loss": 0.9756, "step": 18510 }, { "epoch": 1.87, "grad_norm": 10.967302332582332, "learning_rate": 3.75214207728066e-06, "loss": 0.9411, "step": 18515 }, { "epoch": 1.87, "grad_norm": 5.116253224622286, "learning_rate": 3.7493021050420308e-06, "loss": 0.9907, "step": 18520 }, { "epoch": 1.87, "grad_norm": 24.120440143339238, "learning_rate": 3.746462563167351e-06, "loss": 0.9318, "step": 18525 }, { "epoch": 1.87, "grad_norm": 10.41418126468614, "learning_rate": 3.7436234526337077e-06, "loss": 0.9539, "step": 18530 }, { "epoch": 1.87, "grad_norm": 6.873694965923555, "learning_rate": 3.740784774418032e-06, "loss": 1.0013, "step": 18535 }, { "epoch": 1.87, "grad_norm": 21.799567648823835, "learning_rate": 3.7379465294971164e-06, "loss": 1.0051, "step": 18540 }, { "epoch": 1.87, "grad_norm": 7.05982153807311, "learning_rate": 3.7351087188475904e-06, "loss": 0.9677, "step": 18545 }, { "epoch": 1.87, "grad_norm": 32.014472362161165, "learning_rate": 3.732271343445949e-06, "loss": 0.9886, "step": 18550 }, { "epoch": 1.87, "grad_norm": 8.146122922065649, "learning_rate": 3.729434404268527e-06, "loss": 0.9481, "step": 18555 }, { "epoch": 1.87, "grad_norm": 17.909251593077418, "learning_rate": 3.726597902291512e-06, "loss": 1.0004, "step": 18560 }, { "epoch": 1.87, "grad_norm": 6.9456126927850335, "learning_rate": 3.7237618384909446e-06, "loss": 0.9513, "step": 18565 }, { "epoch": 1.87, "grad_norm": 7.361620530332281, "learning_rate": 3.720926213842707e-06, "loss": 0.9708, "step": 18570 }, { "epoch": 1.87, "grad_norm": 7.811469339075542, "learning_rate": 3.7180910293225404e-06, "loss": 1.0052, "step": 18575 }, { "epoch": 1.87, "grad_norm": 14.513163674141524, "learning_rate": 3.715256285906023e-06, "loss": 1.0281, "step": 18580 }, { "epoch": 1.87, "grad_norm": 10.774195223815436, "learning_rate": 3.712421984568593e-06, "loss": 1.0158, "step": 18585 }, { "epoch": 1.87, "grad_norm": 4.960282979334194, "learning_rate": 3.7095881262855267e-06, "loss": 0.9662, "step": 18590 }, { "epoch": 1.87, "grad_norm": 7.978757212662668, "learning_rate": 3.7067547120319554e-06, "loss": 0.9774, "step": 18595 }, { "epoch": 1.88, "grad_norm": 6.034834780122345, "learning_rate": 3.7039217427828513e-06, "loss": 0.9752, "step": 18600 }, { "epoch": 1.88, "grad_norm": 15.004516099561922, "learning_rate": 3.701089219513038e-06, "loss": 0.9734, "step": 18605 }, { "epoch": 1.88, "grad_norm": 9.402085580760845, "learning_rate": 3.6982571431971824e-06, "loss": 1.0251, "step": 18610 }, { "epoch": 1.88, "grad_norm": 23.56500346844539, "learning_rate": 3.695425514809802e-06, "loss": 0.9798, "step": 18615 }, { "epoch": 1.88, "grad_norm": 7.346783743132763, "learning_rate": 3.692594335325255e-06, "loss": 1.0118, "step": 18620 }, { "epoch": 1.88, "grad_norm": 10.117378850707686, "learning_rate": 3.68976360571775e-06, "loss": 0.9884, "step": 18625 }, { "epoch": 1.88, "grad_norm": 8.6563959867572, "learning_rate": 3.686933326961335e-06, "loss": 0.9778, "step": 18630 }, { "epoch": 1.88, "grad_norm": 4.718362628760115, "learning_rate": 3.6841035000299104e-06, "loss": 0.9712, "step": 18635 }, { "epoch": 1.88, "grad_norm": 11.901977228691198, "learning_rate": 3.681274125897212e-06, "loss": 1.0074, "step": 18640 }, { "epoch": 1.88, "grad_norm": 5.376219367954663, "learning_rate": 3.6784452055368293e-06, "loss": 0.9927, "step": 18645 }, { "epoch": 1.88, "grad_norm": 7.261738860162723, "learning_rate": 3.6756167399221875e-06, "loss": 0.9726, "step": 18650 }, { "epoch": 1.88, "grad_norm": 11.322575516017178, "learning_rate": 3.6727887300265623e-06, "loss": 0.9967, "step": 18655 }, { "epoch": 1.88, "grad_norm": 6.396128479592039, "learning_rate": 3.669961176823065e-06, "loss": 1.0004, "step": 18660 }, { "epoch": 1.88, "grad_norm": 5.572859527055436, "learning_rate": 3.66713408128466e-06, "loss": 0.9574, "step": 18665 }, { "epoch": 1.88, "grad_norm": 5.566152355775521, "learning_rate": 3.664307444384141e-06, "loss": 0.944, "step": 18670 }, { "epoch": 1.88, "grad_norm": 6.814616898071068, "learning_rate": 3.661481267094157e-06, "loss": 0.9645, "step": 18675 }, { "epoch": 1.88, "grad_norm": 7.4491753760139545, "learning_rate": 3.658655550387191e-06, "loss": 0.971, "step": 18680 }, { "epoch": 1.88, "grad_norm": 9.650043757641736, "learning_rate": 3.655830295235567e-06, "loss": 0.994, "step": 18685 }, { "epoch": 1.88, "grad_norm": 4.950233639909765, "learning_rate": 3.6530055026114564e-06, "loss": 0.9695, "step": 18690 }, { "epoch": 1.88, "grad_norm": 10.336979569142324, "learning_rate": 3.6501811734868644e-06, "loss": 0.9697, "step": 18695 }, { "epoch": 1.89, "grad_norm": 7.485667474533593, "learning_rate": 3.6473573088336424e-06, "loss": 0.9788, "step": 18700 }, { "epoch": 1.89, "grad_norm": 11.737953968885742, "learning_rate": 3.6445339096234765e-06, "loss": 0.9528, "step": 18705 }, { "epoch": 1.89, "grad_norm": 11.420294666661196, "learning_rate": 3.6417109768279004e-06, "loss": 0.9939, "step": 18710 }, { "epoch": 1.89, "grad_norm": 18.063368906404644, "learning_rate": 3.638888511418278e-06, "loss": 0.9813, "step": 18715 }, { "epoch": 1.89, "grad_norm": 8.256498293961954, "learning_rate": 3.63606651436582e-06, "loss": 0.946, "step": 18720 }, { "epoch": 1.89, "grad_norm": 11.775343917226046, "learning_rate": 3.633244986641571e-06, "loss": 1.0092, "step": 18725 }, { "epoch": 1.89, "grad_norm": 9.327990203243036, "learning_rate": 3.630423929216417e-06, "loss": 0.9956, "step": 18730 }, { "epoch": 1.89, "grad_norm": 10.684176316275288, "learning_rate": 3.6276033430610793e-06, "loss": 0.9586, "step": 18735 }, { "epoch": 1.89, "grad_norm": 8.370734052190713, "learning_rate": 3.6247832291461226e-06, "loss": 0.9927, "step": 18740 }, { "epoch": 1.89, "grad_norm": 8.603847329973096, "learning_rate": 3.621963588441942e-06, "loss": 0.9735, "step": 18745 }, { "epoch": 1.89, "grad_norm": 14.70117140483938, "learning_rate": 3.6191444219187753e-06, "loss": 0.913, "step": 18750 }, { "epoch": 1.89, "grad_norm": 7.259241140398004, "learning_rate": 3.6163257305466937e-06, "loss": 1.0054, "step": 18755 }, { "epoch": 1.89, "grad_norm": 5.831064986749914, "learning_rate": 3.6135075152956074e-06, "loss": 0.9717, "step": 18760 }, { "epoch": 1.89, "grad_norm": 5.268942377717585, "learning_rate": 3.610689777135259e-06, "loss": 0.9348, "step": 18765 }, { "epoch": 1.89, "grad_norm": 6.077651255287886, "learning_rate": 3.6078725170352323e-06, "loss": 0.9825, "step": 18770 }, { "epoch": 1.89, "grad_norm": 6.9833842942122235, "learning_rate": 3.605055735964943e-06, "loss": 0.9822, "step": 18775 }, { "epoch": 1.89, "grad_norm": 4.798592166369944, "learning_rate": 3.6022394348936436e-06, "loss": 0.9778, "step": 18780 }, { "epoch": 1.89, "grad_norm": 5.34851456599785, "learning_rate": 3.5994236147904182e-06, "loss": 0.9623, "step": 18785 }, { "epoch": 1.89, "grad_norm": 6.684841424069073, "learning_rate": 3.5966082766241904e-06, "loss": 0.9787, "step": 18790 }, { "epoch": 1.89, "grad_norm": 11.479047892827804, "learning_rate": 3.593793421363712e-06, "loss": 0.9816, "step": 18795 }, { "epoch": 1.9, "grad_norm": 7.040538898804096, "learning_rate": 3.5909790499775765e-06, "loss": 0.9905, "step": 18800 }, { "epoch": 1.9, "grad_norm": 5.958641544227882, "learning_rate": 3.588165163434203e-06, "loss": 0.9728, "step": 18805 }, { "epoch": 1.9, "grad_norm": 8.054065495967254, "learning_rate": 3.5853517627018495e-06, "loss": 0.9511, "step": 18810 }, { "epoch": 1.9, "grad_norm": 5.890771687952744, "learning_rate": 3.5825388487486034e-06, "loss": 0.995, "step": 18815 }, { "epoch": 1.9, "grad_norm": 5.0730729877051965, "learning_rate": 3.5797264225423846e-06, "loss": 0.9923, "step": 18820 }, { "epoch": 1.9, "grad_norm": 6.636257745017197, "learning_rate": 3.576914485050948e-06, "loss": 0.9761, "step": 18825 }, { "epoch": 1.9, "grad_norm": 6.803076414174225, "learning_rate": 3.5741030372418763e-06, "loss": 0.9786, "step": 18830 }, { "epoch": 1.9, "grad_norm": 12.692665844872918, "learning_rate": 3.571292080082589e-06, "loss": 0.9742, "step": 18835 }, { "epoch": 1.9, "grad_norm": 6.049139761426119, "learning_rate": 3.568481614540332e-06, "loss": 0.961, "step": 18840 }, { "epoch": 1.9, "grad_norm": 7.999274994098982, "learning_rate": 3.5656716415821848e-06, "loss": 0.9545, "step": 18845 }, { "epoch": 1.9, "grad_norm": 7.444020322274279, "learning_rate": 3.5628621621750546e-06, "loss": 0.9868, "step": 18850 }, { "epoch": 1.9, "grad_norm": 6.935898973123527, "learning_rate": 3.560053177285683e-06, "loss": 1.0229, "step": 18855 }, { "epoch": 1.9, "grad_norm": 8.652059327456081, "learning_rate": 3.5572446878806356e-06, "loss": 0.9985, "step": 18860 }, { "epoch": 1.9, "grad_norm": 5.470800195437701, "learning_rate": 3.5544366949263154e-06, "loss": 0.974, "step": 18865 }, { "epoch": 1.9, "grad_norm": 5.540500894353744, "learning_rate": 3.5516291993889463e-06, "loss": 0.9712, "step": 18870 }, { "epoch": 1.9, "grad_norm": 4.903932248809819, "learning_rate": 3.548822202234588e-06, "loss": 1.0247, "step": 18875 }, { "epoch": 1.9, "grad_norm": 5.255685901803376, "learning_rate": 3.546015704429123e-06, "loss": 0.9893, "step": 18880 }, { "epoch": 1.9, "grad_norm": 7.7442920466087815, "learning_rate": 3.5432097069382652e-06, "loss": 0.9865, "step": 18885 }, { "epoch": 1.9, "grad_norm": 6.451898088603783, "learning_rate": 3.540404210727555e-06, "loss": 1.0302, "step": 18890 }, { "epoch": 1.91, "grad_norm": 5.078002477744629, "learning_rate": 3.5375992167623635e-06, "loss": 0.9517, "step": 18895 }, { "epoch": 1.91, "grad_norm": 6.8441422291882414, "learning_rate": 3.534794726007884e-06, "loss": 1.0022, "step": 18900 }, { "epoch": 1.91, "grad_norm": 4.820516107934559, "learning_rate": 3.531990739429141e-06, "loss": 0.9455, "step": 18905 }, { "epoch": 1.91, "grad_norm": 6.986413654613702, "learning_rate": 3.52918725799098e-06, "loss": 0.9679, "step": 18910 }, { "epoch": 1.91, "grad_norm": 5.527089802422374, "learning_rate": 3.5263842826580813e-06, "loss": 0.9618, "step": 18915 }, { "epoch": 1.91, "grad_norm": 5.444281911919659, "learning_rate": 3.523581814394941e-06, "loss": 0.993, "step": 18920 }, { "epoch": 1.91, "grad_norm": 6.782005747340329, "learning_rate": 3.52077985416589e-06, "loss": 0.9753, "step": 18925 }, { "epoch": 1.91, "grad_norm": 8.74891044127738, "learning_rate": 3.517978402935077e-06, "loss": 0.9723, "step": 18930 }, { "epoch": 1.91, "grad_norm": 9.758541435324947, "learning_rate": 3.5151774616664825e-06, "loss": 0.9603, "step": 18935 }, { "epoch": 1.91, "grad_norm": 7.337683199317084, "learning_rate": 3.5123770313239035e-06, "loss": 0.9969, "step": 18940 }, { "epoch": 1.91, "grad_norm": 6.297019442567877, "learning_rate": 3.5095771128709678e-06, "loss": 0.9789, "step": 18945 }, { "epoch": 1.91, "grad_norm": 4.913586941747046, "learning_rate": 3.506777707271124e-06, "loss": 0.9492, "step": 18950 }, { "epoch": 1.91, "grad_norm": 15.048817929045729, "learning_rate": 3.503978815487643e-06, "loss": 1.0014, "step": 18955 }, { "epoch": 1.91, "grad_norm": 9.630358819181739, "learning_rate": 3.501180438483626e-06, "loss": 0.9702, "step": 18960 }, { "epoch": 1.91, "grad_norm": 7.888623974867344, "learning_rate": 3.4983825772219858e-06, "loss": 0.9936, "step": 18965 }, { "epoch": 1.91, "grad_norm": 4.889481060326786, "learning_rate": 3.4955852326654683e-06, "loss": 0.987, "step": 18970 }, { "epoch": 1.91, "grad_norm": 7.851092223043249, "learning_rate": 3.4927884057766344e-06, "loss": 0.9532, "step": 18975 }, { "epoch": 1.91, "grad_norm": 21.96452203997625, "learning_rate": 3.4899920975178714e-06, "loss": 0.9801, "step": 18980 }, { "epoch": 1.91, "grad_norm": 16.662043974615656, "learning_rate": 3.4871963088513833e-06, "loss": 0.9786, "step": 18985 }, { "epoch": 1.91, "grad_norm": 5.3324544231113515, "learning_rate": 3.4844010407392035e-06, "loss": 0.9684, "step": 18990 }, { "epoch": 1.92, "grad_norm": 8.410229456737007, "learning_rate": 3.481606294143175e-06, "loss": 0.9266, "step": 18995 }, { "epoch": 1.92, "grad_norm": 7.723097172662212, "learning_rate": 3.4788120700249726e-06, "loss": 0.967, "step": 19000 }, { "epoch": 1.92, "grad_norm": 6.042198027911561, "learning_rate": 3.476018369346083e-06, "loss": 1.0244, "step": 19005 }, { "epoch": 1.92, "grad_norm": 10.48724264317641, "learning_rate": 3.4732251930678184e-06, "loss": 0.9755, "step": 19010 }, { "epoch": 1.92, "grad_norm": 6.070287251272303, "learning_rate": 3.470432542151304e-06, "loss": 0.9775, "step": 19015 }, { "epoch": 1.92, "grad_norm": 7.767740288674974, "learning_rate": 3.467640417557493e-06, "loss": 0.983, "step": 19020 }, { "epoch": 1.92, "grad_norm": 6.745129466956522, "learning_rate": 3.4648488202471497e-06, "loss": 0.9818, "step": 19025 }, { "epoch": 1.92, "grad_norm": 12.653727961877179, "learning_rate": 3.4620577511808623e-06, "loss": 0.9817, "step": 19030 }, { "epoch": 1.92, "grad_norm": 9.797091615802747, "learning_rate": 3.459267211319033e-06, "loss": 1.0586, "step": 19035 }, { "epoch": 1.92, "grad_norm": 11.293391404977797, "learning_rate": 3.456477201621885e-06, "loss": 0.9574, "step": 19040 }, { "epoch": 1.92, "grad_norm": 5.905429069319764, "learning_rate": 3.4536877230494562e-06, "loss": 0.9757, "step": 19045 }, { "epoch": 1.92, "grad_norm": 5.478601961947565, "learning_rate": 3.4508987765616075e-06, "loss": 0.9561, "step": 19050 }, { "epoch": 1.92, "grad_norm": 12.63894396760112, "learning_rate": 3.4481103631180087e-06, "loss": 0.9583, "step": 19055 }, { "epoch": 1.92, "grad_norm": 6.789057388553128, "learning_rate": 3.4453224836781526e-06, "loss": 1.0379, "step": 19060 }, { "epoch": 1.92, "grad_norm": 7.728543474370344, "learning_rate": 3.442535139201345e-06, "loss": 0.9697, "step": 19065 }, { "epoch": 1.92, "grad_norm": 8.968113376227555, "learning_rate": 3.439748330646707e-06, "loss": 0.9938, "step": 19070 }, { "epoch": 1.92, "grad_norm": 11.356592255128312, "learning_rate": 3.43696205897318e-06, "loss": 0.9788, "step": 19075 }, { "epoch": 1.92, "grad_norm": 15.07420793847312, "learning_rate": 3.434176325139513e-06, "loss": 0.9548, "step": 19080 }, { "epoch": 1.92, "grad_norm": 12.565583992430048, "learning_rate": 3.4313911301042813e-06, "loss": 1.0289, "step": 19085 }, { "epoch": 1.92, "grad_norm": 12.439259933471051, "learning_rate": 3.4286064748258597e-06, "loss": 0.9554, "step": 19090 }, { "epoch": 1.93, "grad_norm": 12.951354380025263, "learning_rate": 3.4258223602624507e-06, "loss": 1.0033, "step": 19095 }, { "epoch": 1.93, "grad_norm": 7.20930073969771, "learning_rate": 3.4230387873720637e-06, "loss": 0.9792, "step": 19100 }, { "epoch": 1.93, "grad_norm": 7.283083435209268, "learning_rate": 3.4202557571125245e-06, "loss": 0.9718, "step": 19105 }, { "epoch": 1.93, "grad_norm": 5.211683982481918, "learning_rate": 3.417473270441468e-06, "loss": 0.9909, "step": 19110 }, { "epoch": 1.93, "grad_norm": 12.417928327129392, "learning_rate": 3.414691328316352e-06, "loss": 0.9361, "step": 19115 }, { "epoch": 1.93, "grad_norm": 5.890075038626392, "learning_rate": 3.4119099316944315e-06, "loss": 0.9219, "step": 19120 }, { "epoch": 1.93, "grad_norm": 15.139042437693465, "learning_rate": 3.4091290815327893e-06, "loss": 0.9656, "step": 19125 }, { "epoch": 1.93, "grad_norm": 6.149139570733197, "learning_rate": 3.40634877878831e-06, "loss": 0.9365, "step": 19130 }, { "epoch": 1.93, "grad_norm": 14.084743495998689, "learning_rate": 3.4035690244176946e-06, "loss": 0.9949, "step": 19135 }, { "epoch": 1.93, "grad_norm": 6.122993624208971, "learning_rate": 3.4007898193774503e-06, "loss": 0.9489, "step": 19140 }, { "epoch": 1.93, "grad_norm": 6.45783174606669, "learning_rate": 3.3980111646239056e-06, "loss": 0.9574, "step": 19145 }, { "epoch": 1.93, "grad_norm": 17.3945960690069, "learning_rate": 3.3952330611131857e-06, "loss": 1.0276, "step": 19150 }, { "epoch": 1.93, "grad_norm": 9.510608770178116, "learning_rate": 3.392455509801239e-06, "loss": 0.9959, "step": 19155 }, { "epoch": 1.93, "grad_norm": 5.704575515763283, "learning_rate": 3.3896785116438145e-06, "loss": 0.9894, "step": 19160 }, { "epoch": 1.93, "grad_norm": 10.801312030785498, "learning_rate": 3.3869020675964777e-06, "loss": 1.0108, "step": 19165 }, { "epoch": 1.93, "grad_norm": 8.595579495569428, "learning_rate": 3.3841261786145963e-06, "loss": 0.9596, "step": 19170 }, { "epoch": 1.93, "grad_norm": 5.823529261050941, "learning_rate": 3.3813508456533566e-06, "loss": 1.0178, "step": 19175 }, { "epoch": 1.93, "grad_norm": 11.252521117776325, "learning_rate": 3.3785760696677424e-06, "loss": 0.966, "step": 19180 }, { "epoch": 1.93, "grad_norm": 8.811292947117277, "learning_rate": 3.3758018516125556e-06, "loss": 0.9748, "step": 19185 }, { "epoch": 1.93, "grad_norm": 4.916508964969845, "learning_rate": 3.3730281924423993e-06, "loss": 0.9212, "step": 19190 }, { "epoch": 1.94, "grad_norm": 26.873127984778893, "learning_rate": 3.370255093111689e-06, "loss": 0.9369, "step": 19195 }, { "epoch": 1.94, "grad_norm": 14.070726015621453, "learning_rate": 3.367482554574645e-06, "loss": 1.0289, "step": 19200 }, { "epoch": 1.94, "grad_norm": 5.370934105921357, "learning_rate": 3.364710577785293e-06, "loss": 0.9708, "step": 19205 }, { "epoch": 1.94, "grad_norm": 4.821227512347984, "learning_rate": 3.361939163697471e-06, "loss": 0.9436, "step": 19210 }, { "epoch": 1.94, "grad_norm": 6.814182159827276, "learning_rate": 3.3591683132648156e-06, "loss": 0.9094, "step": 19215 }, { "epoch": 1.94, "grad_norm": 7.957443256059856, "learning_rate": 3.3563980274407793e-06, "loss": 0.9984, "step": 19220 }, { "epoch": 1.94, "grad_norm": 6.935732607255355, "learning_rate": 3.3536283071786103e-06, "loss": 1.0013, "step": 19225 }, { "epoch": 1.94, "grad_norm": 11.855885109582275, "learning_rate": 3.3508591534313697e-06, "loss": 0.9671, "step": 19230 }, { "epoch": 1.94, "grad_norm": 5.1394970315511355, "learning_rate": 3.3480905671519183e-06, "loss": 0.9849, "step": 19235 }, { "epoch": 1.94, "grad_norm": 5.813034310776297, "learning_rate": 3.3453225492929255e-06, "loss": 1.0069, "step": 19240 }, { "epoch": 1.94, "grad_norm": 6.7170498460960255, "learning_rate": 3.3425551008068613e-06, "loss": 0.9643, "step": 19245 }, { "epoch": 1.94, "grad_norm": 6.363155157270702, "learning_rate": 3.339788222646006e-06, "loss": 0.9958, "step": 19250 }, { "epoch": 1.94, "grad_norm": 11.452011498593757, "learning_rate": 3.3370219157624363e-06, "loss": 1.0531, "step": 19255 }, { "epoch": 1.94, "grad_norm": 5.577825743787345, "learning_rate": 3.334256181108039e-06, "loss": 0.9736, "step": 19260 }, { "epoch": 1.94, "grad_norm": 16.352973183288714, "learning_rate": 3.331491019634496e-06, "loss": 0.987, "step": 19265 }, { "epoch": 1.94, "grad_norm": 8.162757418382164, "learning_rate": 3.3287264322933023e-06, "loss": 0.9249, "step": 19270 }, { "epoch": 1.94, "grad_norm": 20.131892472404356, "learning_rate": 3.3259624200357444e-06, "loss": 0.979, "step": 19275 }, { "epoch": 1.94, "grad_norm": 5.198175991509779, "learning_rate": 3.323198983812921e-06, "loss": 0.953, "step": 19280 }, { "epoch": 1.94, "grad_norm": 5.437106390168473, "learning_rate": 3.3204361245757242e-06, "loss": 0.9871, "step": 19285 }, { "epoch": 1.94, "grad_norm": 7.5062723968359535, "learning_rate": 3.3176738432748547e-06, "loss": 1.0045, "step": 19290 }, { "epoch": 1.95, "grad_norm": 5.170068060454293, "learning_rate": 3.3149121408608063e-06, "loss": 0.9387, "step": 19295 }, { "epoch": 1.95, "grad_norm": 13.939999849704586, "learning_rate": 3.3121510182838833e-06, "loss": 0.9707, "step": 19300 }, { "epoch": 1.95, "grad_norm": 13.588659684745384, "learning_rate": 3.3093904764941804e-06, "loss": 1.0645, "step": 19305 }, { "epoch": 1.95, "grad_norm": 5.260854066268349, "learning_rate": 3.306630516441602e-06, "loss": 1.0016, "step": 19310 }, { "epoch": 1.95, "grad_norm": 5.626575610265271, "learning_rate": 3.3038711390758436e-06, "loss": 0.9482, "step": 19315 }, { "epoch": 1.95, "grad_norm": 6.659383220388749, "learning_rate": 3.3011123453464088e-06, "loss": 0.9535, "step": 19320 }, { "epoch": 1.95, "grad_norm": 17.15180703898682, "learning_rate": 3.298354136202592e-06, "loss": 0.9649, "step": 19325 }, { "epoch": 1.95, "grad_norm": 5.692113458158882, "learning_rate": 3.2955965125934913e-06, "loss": 0.9702, "step": 19330 }, { "epoch": 1.95, "grad_norm": 5.830959718212891, "learning_rate": 3.2928394754680037e-06, "loss": 0.9656, "step": 19335 }, { "epoch": 1.95, "grad_norm": 6.298851888358607, "learning_rate": 3.29008302577482e-06, "loss": 1.0396, "step": 19340 }, { "epoch": 1.95, "grad_norm": 10.562326510048589, "learning_rate": 3.287327164462436e-06, "loss": 0.9582, "step": 19345 }, { "epoch": 1.95, "grad_norm": 7.80680385409349, "learning_rate": 3.284571892479138e-06, "loss": 0.9311, "step": 19350 }, { "epoch": 1.95, "grad_norm": 7.586249629628685, "learning_rate": 3.281817210773014e-06, "loss": 0.9851, "step": 19355 }, { "epoch": 1.95, "grad_norm": 7.72997718645396, "learning_rate": 3.279063120291946e-06, "loss": 0.9776, "step": 19360 }, { "epoch": 1.95, "grad_norm": 5.134953220297068, "learning_rate": 3.2763096219836153e-06, "loss": 1.0368, "step": 19365 }, { "epoch": 1.95, "grad_norm": 20.821681569213887, "learning_rate": 3.273556716795496e-06, "loss": 0.9414, "step": 19370 }, { "epoch": 1.95, "grad_norm": 7.563927082040128, "learning_rate": 3.270804405674863e-06, "loss": 0.968, "step": 19375 }, { "epoch": 1.95, "grad_norm": 15.408947715661137, "learning_rate": 3.2680526895687813e-06, "loss": 0.9527, "step": 19380 }, { "epoch": 1.95, "grad_norm": 16.582628546020413, "learning_rate": 3.2653015694241166e-06, "loss": 0.9667, "step": 19385 }, { "epoch": 1.95, "grad_norm": 6.582224579638049, "learning_rate": 3.2625510461875238e-06, "loss": 0.9966, "step": 19390 }, { "epoch": 1.96, "grad_norm": 5.985900200531787, "learning_rate": 3.259801120805457e-06, "loss": 0.9541, "step": 19395 }, { "epoch": 1.96, "grad_norm": 6.757010012138148, "learning_rate": 3.257051794224161e-06, "loss": 0.9379, "step": 19400 }, { "epoch": 1.96, "grad_norm": 5.71433219862553, "learning_rate": 3.25430306738968e-06, "loss": 0.944, "step": 19405 }, { "epoch": 1.96, "grad_norm": 6.147343628896916, "learning_rate": 3.2515549412478446e-06, "loss": 0.9509, "step": 19410 }, { "epoch": 1.96, "grad_norm": 6.409899027959961, "learning_rate": 3.2488074167442852e-06, "loss": 0.9639, "step": 19415 }, { "epoch": 1.96, "grad_norm": 6.426059907250027, "learning_rate": 3.24606049482442e-06, "loss": 0.9638, "step": 19420 }, { "epoch": 1.96, "grad_norm": 6.580312941706315, "learning_rate": 3.2433141764334654e-06, "loss": 0.914, "step": 19425 }, { "epoch": 1.96, "grad_norm": 20.71500348245436, "learning_rate": 3.2405684625164213e-06, "loss": 0.9696, "step": 19430 }, { "epoch": 1.96, "grad_norm": 9.001431989529735, "learning_rate": 3.2378233540180913e-06, "loss": 0.938, "step": 19435 }, { "epoch": 1.96, "grad_norm": 25.185513924432797, "learning_rate": 3.2350788518830617e-06, "loss": 0.9514, "step": 19440 }, { "epoch": 1.96, "grad_norm": 7.790210054110683, "learning_rate": 3.232334957055714e-06, "loss": 0.9617, "step": 19445 }, { "epoch": 1.96, "grad_norm": 8.369432007064662, "learning_rate": 3.2295916704802194e-06, "loss": 0.9363, "step": 19450 }, { "epoch": 1.96, "grad_norm": 5.8460514741934, "learning_rate": 3.2268489931005385e-06, "loss": 0.9848, "step": 19455 }, { "epoch": 1.96, "grad_norm": 14.77954889982426, "learning_rate": 3.2241069258604273e-06, "loss": 0.9394, "step": 19460 }, { "epoch": 1.96, "grad_norm": 4.971049752470999, "learning_rate": 3.2213654697034246e-06, "loss": 0.9953, "step": 19465 }, { "epoch": 1.96, "grad_norm": 9.583984995729754, "learning_rate": 3.218624625572867e-06, "loss": 0.9636, "step": 19470 }, { "epoch": 1.96, "grad_norm": 12.056659589103473, "learning_rate": 3.2158843944118735e-06, "loss": 1.0098, "step": 19475 }, { "epoch": 1.96, "grad_norm": 5.1372502108362665, "learning_rate": 3.213144777163356e-06, "loss": 0.9576, "step": 19480 }, { "epoch": 1.96, "grad_norm": 8.742973523888802, "learning_rate": 3.2104057747700133e-06, "loss": 1.012, "step": 19485 }, { "epoch": 1.97, "grad_norm": 4.8678377346229755, "learning_rate": 3.207667388174336e-06, "loss": 0.9343, "step": 19490 }, { "epoch": 1.97, "grad_norm": 4.993960911914766, "learning_rate": 3.204929618318596e-06, "loss": 0.9503, "step": 19495 }, { "epoch": 1.97, "grad_norm": 7.897187382469572, "learning_rate": 3.2021924661448617e-06, "loss": 0.9685, "step": 19500 }, { "epoch": 1.97, "grad_norm": 8.06865849915388, "learning_rate": 3.199455932594982e-06, "loss": 0.9807, "step": 19505 }, { "epoch": 1.97, "grad_norm": 5.445746655511793, "learning_rate": 3.1967200186105975e-06, "loss": 0.9827, "step": 19510 }, { "epoch": 1.97, "grad_norm": 8.303285449271074, "learning_rate": 3.19398472513313e-06, "loss": 0.9557, "step": 19515 }, { "epoch": 1.97, "grad_norm": 5.096354209566563, "learning_rate": 3.191250053103796e-06, "loss": 1.0006, "step": 19520 }, { "epoch": 1.97, "grad_norm": 9.044059243281657, "learning_rate": 3.1885160034635885e-06, "loss": 0.9825, "step": 19525 }, { "epoch": 1.97, "grad_norm": 8.238962956982464, "learning_rate": 3.185782577153296e-06, "loss": 0.986, "step": 19530 }, { "epoch": 1.97, "grad_norm": 8.294757392118468, "learning_rate": 3.183049775113485e-06, "loss": 0.947, "step": 19535 }, { "epoch": 1.97, "grad_norm": 10.90709327082817, "learning_rate": 3.1803175982845124e-06, "loss": 0.9887, "step": 19540 }, { "epoch": 1.97, "grad_norm": 7.1987411375622825, "learning_rate": 3.1775860476065147e-06, "loss": 0.9221, "step": 19545 }, { "epoch": 1.97, "grad_norm": 5.084108859024125, "learning_rate": 3.1748551240194192e-06, "loss": 0.992, "step": 19550 }, { "epoch": 1.97, "grad_norm": 4.930419426018079, "learning_rate": 3.1721248284629293e-06, "loss": 0.9964, "step": 19555 }, { "epoch": 1.97, "grad_norm": 5.018806857675019, "learning_rate": 3.169395161876543e-06, "loss": 0.9697, "step": 19560 }, { "epoch": 1.97, "grad_norm": 9.210110614847695, "learning_rate": 3.1666661251995313e-06, "loss": 0.9453, "step": 19565 }, { "epoch": 1.97, "grad_norm": 5.061761204356091, "learning_rate": 3.1639377193709564e-06, "loss": 0.9771, "step": 19570 }, { "epoch": 1.97, "grad_norm": 5.907670275453538, "learning_rate": 3.1612099453296575e-06, "loss": 0.9937, "step": 19575 }, { "epoch": 1.97, "grad_norm": 6.268881229619832, "learning_rate": 3.158482804014261e-06, "loss": 0.9415, "step": 19580 }, { "epoch": 1.97, "grad_norm": 6.410057104629809, "learning_rate": 3.1557562963631728e-06, "loss": 0.9235, "step": 19585 }, { "epoch": 1.98, "grad_norm": 6.225154512292546, "learning_rate": 3.153030423314579e-06, "loss": 0.9581, "step": 19590 }, { "epoch": 1.98, "grad_norm": 7.5527394175980005, "learning_rate": 3.150305185806457e-06, "loss": 0.9861, "step": 19595 }, { "epoch": 1.98, "grad_norm": 5.332123472990733, "learning_rate": 3.1475805847765497e-06, "loss": 0.993, "step": 19600 }, { "epoch": 1.98, "grad_norm": 10.367055598439357, "learning_rate": 3.1448566211623964e-06, "loss": 1.009, "step": 19605 }, { "epoch": 1.98, "grad_norm": 6.04981083164468, "learning_rate": 3.1421332959013063e-06, "loss": 0.9901, "step": 19610 }, { "epoch": 1.98, "grad_norm": 7.850619560943523, "learning_rate": 3.1394106099303766e-06, "loss": 0.9711, "step": 19615 }, { "epoch": 1.98, "grad_norm": 5.190983677356039, "learning_rate": 3.1366885641864764e-06, "loss": 0.974, "step": 19620 }, { "epoch": 1.98, "grad_norm": 12.919928057436149, "learning_rate": 3.1339671596062642e-06, "loss": 0.9464, "step": 19625 }, { "epoch": 1.98, "grad_norm": 11.208963956216042, "learning_rate": 3.1312463971261675e-06, "loss": 0.9159, "step": 19630 }, { "epoch": 1.98, "grad_norm": 10.998212896605247, "learning_rate": 3.1285262776824033e-06, "loss": 0.918, "step": 19635 }, { "epoch": 1.98, "grad_norm": 6.484371252595751, "learning_rate": 3.125806802210958e-06, "loss": 0.9785, "step": 19640 }, { "epoch": 1.98, "grad_norm": 10.07006168798367, "learning_rate": 3.1230879716476027e-06, "loss": 0.9834, "step": 19645 }, { "epoch": 1.98, "grad_norm": 8.719973146813349, "learning_rate": 3.1203697869278815e-06, "loss": 0.9671, "step": 19650 }, { "epoch": 1.98, "grad_norm": 9.906113261291717, "learning_rate": 3.117652248987125e-06, "loss": 0.9734, "step": 19655 }, { "epoch": 1.98, "grad_norm": 6.461870871497509, "learning_rate": 3.1149353587604282e-06, "loss": 0.9461, "step": 19660 }, { "epoch": 1.98, "grad_norm": 5.962793748592961, "learning_rate": 3.1122191171826765e-06, "loss": 0.974, "step": 19665 }, { "epoch": 1.98, "grad_norm": 4.555865802855921, "learning_rate": 3.109503525188522e-06, "loss": 0.9898, "step": 19670 }, { "epoch": 1.98, "grad_norm": 8.682060107258698, "learning_rate": 3.1067885837124003e-06, "loss": 0.9276, "step": 19675 }, { "epoch": 1.98, "grad_norm": 8.964687635868797, "learning_rate": 3.104074293688517e-06, "loss": 0.9366, "step": 19680 }, { "epoch": 1.98, "grad_norm": 6.943606394724827, "learning_rate": 3.1013606560508606e-06, "loss": 0.9712, "step": 19685 }, { "epoch": 1.99, "grad_norm": 15.623945247762496, "learning_rate": 3.098647671733187e-06, "loss": 0.9687, "step": 19690 }, { "epoch": 1.99, "grad_norm": 5.5336412205470475, "learning_rate": 3.0959353416690354e-06, "loss": 0.9732, "step": 19695 }, { "epoch": 1.99, "grad_norm": 21.16146004449491, "learning_rate": 3.0932236667917128e-06, "loss": 0.989, "step": 19700 }, { "epoch": 1.99, "grad_norm": 8.607128787122527, "learning_rate": 3.0905126480343057e-06, "loss": 0.9715, "step": 19705 }, { "epoch": 1.99, "grad_norm": 5.665532346332486, "learning_rate": 3.087802286329673e-06, "loss": 0.9226, "step": 19710 }, { "epoch": 1.99, "grad_norm": 11.992099425488396, "learning_rate": 3.085092582610446e-06, "loss": 0.9462, "step": 19715 }, { "epoch": 1.99, "grad_norm": 5.808277797298123, "learning_rate": 3.0823835378090344e-06, "loss": 1.006, "step": 19720 }, { "epoch": 1.99, "grad_norm": 5.481945211686297, "learning_rate": 3.0796751528576137e-06, "loss": 0.9797, "step": 19725 }, { "epoch": 1.99, "grad_norm": 5.420100717721782, "learning_rate": 3.07696742868814e-06, "loss": 0.9554, "step": 19730 }, { "epoch": 1.99, "grad_norm": 6.261498471883031, "learning_rate": 3.074260366232337e-06, "loss": 0.9386, "step": 19735 }, { "epoch": 1.99, "grad_norm": 7.262869495772408, "learning_rate": 3.071553966421703e-06, "loss": 0.9791, "step": 19740 }, { "epoch": 1.99, "grad_norm": 8.01776762367467, "learning_rate": 3.068848230187506e-06, "loss": 0.9706, "step": 19745 }, { "epoch": 1.99, "grad_norm": 16.350092017822924, "learning_rate": 3.0661431584607917e-06, "loss": 0.9205, "step": 19750 }, { "epoch": 1.99, "grad_norm": 9.02862210737073, "learning_rate": 3.063438752172366e-06, "loss": 0.9602, "step": 19755 }, { "epoch": 1.99, "grad_norm": 9.624701360428293, "learning_rate": 3.060735012252818e-06, "loss": 0.9675, "step": 19760 }, { "epoch": 1.99, "grad_norm": 10.022757663300407, "learning_rate": 3.058031939632499e-06, "loss": 0.9546, "step": 19765 }, { "epoch": 1.99, "grad_norm": 11.299124192747938, "learning_rate": 3.0553295352415356e-06, "loss": 0.9571, "step": 19770 }, { "epoch": 1.99, "grad_norm": 6.8246198096307005, "learning_rate": 3.0526278000098208e-06, "loss": 0.9435, "step": 19775 }, { "epoch": 1.99, "grad_norm": 18.1215474019368, "learning_rate": 3.0499267348670225e-06, "loss": 0.9766, "step": 19780 }, { "epoch": 1.99, "grad_norm": 7.868272802586499, "learning_rate": 3.047226340742569e-06, "loss": 0.9472, "step": 19785 }, { "epoch": 2.0, "grad_norm": 5.174261883529199, "learning_rate": 3.0445266185656685e-06, "loss": 0.9222, "step": 19790 }, { "epoch": 2.0, "grad_norm": 5.064743033532646, "learning_rate": 3.04182756926529e-06, "loss": 0.986, "step": 19795 }, { "epoch": 2.0, "grad_norm": 5.872229895463509, "learning_rate": 3.0391291937701755e-06, "loss": 0.9669, "step": 19800 }, { "epoch": 2.0, "grad_norm": 5.4018475622049, "learning_rate": 3.0364314930088312e-06, "loss": 0.9524, "step": 19805 }, { "epoch": 2.0, "grad_norm": 25.0373690123754, "learning_rate": 3.0337344679095373e-06, "loss": 0.9465, "step": 19810 }, { "epoch": 2.0, "grad_norm": 10.512944296977459, "learning_rate": 3.0310381194003312e-06, "loss": 1.0136, "step": 19815 }, { "epoch": 2.0, "grad_norm": 26.48421656057948, "learning_rate": 3.0283424484090306e-06, "loss": 0.9404, "step": 19820 }, { "epoch": 2.0, "grad_norm": 11.807803981551405, "learning_rate": 3.0256474558632093e-06, "loss": 0.9365, "step": 19825 }, { "epoch": 2.0, "grad_norm": 4.631833071132508, "learning_rate": 3.022953142690214e-06, "loss": 0.9636, "step": 19830 }, { "epoch": 2.0, "grad_norm": 5.990611013751563, "learning_rate": 3.0202595098171527e-06, "loss": 0.9332, "step": 19835 }, { "epoch": 2.0, "eval_loss": 1.0710164308547974, "eval_runtime": 25.431, "eval_samples_per_second": 31.694, "eval_steps_per_second": 3.972, "step": 19837 }, { "epoch": 2.0, "grad_norm": 10.07847082531632, "learning_rate": 3.0175665581709035e-06, "loss": 0.8202, "step": 19840 }, { "epoch": 2.0, "grad_norm": 8.554418428839272, "learning_rate": 3.014874288678109e-06, "loss": 0.708, "step": 19845 }, { "epoch": 2.0, "grad_norm": 5.866761074974754, "learning_rate": 3.012182702265173e-06, "loss": 0.6866, "step": 19850 }, { "epoch": 2.0, "grad_norm": 5.599634041133625, "learning_rate": 3.0094917998582737e-06, "loss": 0.693, "step": 19855 }, { "epoch": 2.0, "grad_norm": 8.495053387713696, "learning_rate": 3.006801582383343e-06, "loss": 0.6998, "step": 19860 }, { "epoch": 2.0, "grad_norm": 17.031744168199783, "learning_rate": 3.0041120507660854e-06, "loss": 0.6845, "step": 19865 }, { "epoch": 2.0, "grad_norm": 5.8690008816324, "learning_rate": 3.0014232059319636e-06, "loss": 0.6585, "step": 19870 }, { "epoch": 2.0, "grad_norm": 5.786581846929866, "learning_rate": 2.9987350488062088e-06, "loss": 0.7137, "step": 19875 }, { "epoch": 2.0, "grad_norm": 4.322402483787055, "learning_rate": 2.9960475803138105e-06, "loss": 0.6975, "step": 19880 }, { "epoch": 2.0, "grad_norm": 4.447412321402005, "learning_rate": 2.993360801379528e-06, "loss": 0.6839, "step": 19885 }, { "epoch": 2.01, "grad_norm": 6.652872882405284, "learning_rate": 2.9906747129278756e-06, "loss": 0.6754, "step": 19890 }, { "epoch": 2.01, "grad_norm": 5.959284438123414, "learning_rate": 2.9879893158831368e-06, "loss": 0.7099, "step": 19895 }, { "epoch": 2.01, "grad_norm": 11.276126342509745, "learning_rate": 2.985304611169352e-06, "loss": 0.7156, "step": 19900 }, { "epoch": 2.01, "grad_norm": 6.270239118964066, "learning_rate": 2.9826205997103264e-06, "loss": 0.7028, "step": 19905 }, { "epoch": 2.01, "grad_norm": 5.870637246845116, "learning_rate": 2.979937282429625e-06, "loss": 0.6825, "step": 19910 }, { "epoch": 2.01, "grad_norm": 6.8819565680372135, "learning_rate": 2.9772546602505758e-06, "loss": 0.6809, "step": 19915 }, { "epoch": 2.01, "grad_norm": 5.735775614992868, "learning_rate": 2.974572734096266e-06, "loss": 0.6958, "step": 19920 }, { "epoch": 2.01, "grad_norm": 5.125437627956788, "learning_rate": 2.9718915048895442e-06, "loss": 0.7067, "step": 19925 }, { "epoch": 2.01, "grad_norm": 5.669127427169447, "learning_rate": 2.9692109735530183e-06, "loss": 0.6444, "step": 19930 }, { "epoch": 2.01, "grad_norm": 12.186094264136873, "learning_rate": 2.966531141009057e-06, "loss": 0.6882, "step": 19935 }, { "epoch": 2.01, "grad_norm": 16.473372562403895, "learning_rate": 2.9638520081797862e-06, "loss": 0.6951, "step": 19940 }, { "epoch": 2.01, "grad_norm": 5.946284342434288, "learning_rate": 2.9611735759870963e-06, "loss": 0.7039, "step": 19945 }, { "epoch": 2.01, "grad_norm": 14.49126548825676, "learning_rate": 2.9584958453526315e-06, "loss": 0.7073, "step": 19950 }, { "epoch": 2.01, "grad_norm": 21.897553417108384, "learning_rate": 2.955818817197797e-06, "loss": 0.6855, "step": 19955 }, { "epoch": 2.01, "grad_norm": 5.509726325927025, "learning_rate": 2.9531424924437535e-06, "loss": 0.6387, "step": 19960 }, { "epoch": 2.01, "grad_norm": 14.647114585979262, "learning_rate": 2.950466872011425e-06, "loss": 0.7173, "step": 19965 }, { "epoch": 2.01, "grad_norm": 19.553340817495304, "learning_rate": 2.9477919568214886e-06, "loss": 0.7056, "step": 19970 }, { "epoch": 2.01, "grad_norm": 7.480061284531499, "learning_rate": 2.9451177477943772e-06, "loss": 0.6694, "step": 19975 }, { "epoch": 2.01, "grad_norm": 4.73237332843965, "learning_rate": 2.9424442458502884e-06, "loss": 0.6902, "step": 19980 }, { "epoch": 2.01, "grad_norm": 4.257596743266556, "learning_rate": 2.939771451909168e-06, "loss": 0.6621, "step": 19985 }, { "epoch": 2.02, "grad_norm": 7.007980217929251, "learning_rate": 2.937099366890725e-06, "loss": 0.6742, "step": 19990 }, { "epoch": 2.02, "grad_norm": 6.684722771405095, "learning_rate": 2.934427991714418e-06, "loss": 0.6924, "step": 19995 }, { "epoch": 2.02, "grad_norm": 6.459688562079147, "learning_rate": 2.931757327299467e-06, "loss": 0.6764, "step": 20000 }, { "epoch": 2.02, "grad_norm": 7.166925967390257, "learning_rate": 2.929087374564843e-06, "loss": 0.7016, "step": 20005 }, { "epoch": 2.02, "grad_norm": 5.216661904637062, "learning_rate": 2.9264181344292775e-06, "loss": 0.688, "step": 20010 }, { "epoch": 2.02, "grad_norm": 4.799432873080406, "learning_rate": 2.9237496078112493e-06, "loss": 0.6843, "step": 20015 }, { "epoch": 2.02, "grad_norm": 4.919357986122606, "learning_rate": 2.921081795629e-06, "loss": 0.6726, "step": 20020 }, { "epoch": 2.02, "grad_norm": 8.222286865949679, "learning_rate": 2.918414698800517e-06, "loss": 0.6846, "step": 20025 }, { "epoch": 2.02, "grad_norm": 4.497129639848889, "learning_rate": 2.91574831824355e-06, "loss": 0.6942, "step": 20030 }, { "epoch": 2.02, "grad_norm": 4.741374508164342, "learning_rate": 2.913082654875596e-06, "loss": 0.722, "step": 20035 }, { "epoch": 2.02, "grad_norm": 4.86965920427053, "learning_rate": 2.9104177096139073e-06, "loss": 0.6915, "step": 20040 }, { "epoch": 2.02, "grad_norm": 9.343518561020797, "learning_rate": 2.9077534833754866e-06, "loss": 0.685, "step": 20045 }, { "epoch": 2.02, "grad_norm": 5.7595663507858905, "learning_rate": 2.9050899770770964e-06, "loss": 0.6682, "step": 20050 }, { "epoch": 2.02, "grad_norm": 5.569107304442746, "learning_rate": 2.902427191635242e-06, "loss": 0.6701, "step": 20055 }, { "epoch": 2.02, "grad_norm": 4.941058043606878, "learning_rate": 2.8997651279661893e-06, "loss": 0.6646, "step": 20060 }, { "epoch": 2.02, "grad_norm": 5.2021071826259995, "learning_rate": 2.8971037869859503e-06, "loss": 0.6754, "step": 20065 }, { "epoch": 2.02, "grad_norm": 23.181178572182155, "learning_rate": 2.8944431696102898e-06, "loss": 0.6937, "step": 20070 }, { "epoch": 2.02, "grad_norm": 8.262592504800057, "learning_rate": 2.8917832767547215e-06, "loss": 0.6674, "step": 20075 }, { "epoch": 2.02, "grad_norm": 8.445147355846368, "learning_rate": 2.8891241093345163e-06, "loss": 0.7094, "step": 20080 }, { "epoch": 2.03, "grad_norm": 5.449205286580153, "learning_rate": 2.886465668264686e-06, "loss": 0.6716, "step": 20085 }, { "epoch": 2.03, "grad_norm": 15.904881029885123, "learning_rate": 2.8838079544600038e-06, "loss": 0.6855, "step": 20090 }, { "epoch": 2.03, "grad_norm": 7.317897665887902, "learning_rate": 2.881150968834983e-06, "loss": 0.6476, "step": 20095 }, { "epoch": 2.03, "grad_norm": 22.960176156048963, "learning_rate": 2.87849471230389e-06, "loss": 0.6739, "step": 20100 }, { "epoch": 2.03, "grad_norm": 12.81723029382713, "learning_rate": 2.87583918578074e-06, "loss": 0.7035, "step": 20105 }, { "epoch": 2.03, "grad_norm": 6.429570917860615, "learning_rate": 2.8731843901792955e-06, "loss": 0.6859, "step": 20110 }, { "epoch": 2.03, "grad_norm": 16.864528516068667, "learning_rate": 2.8705303264130734e-06, "loss": 0.6718, "step": 20115 }, { "epoch": 2.03, "grad_norm": 5.239644650544476, "learning_rate": 2.86787699539533e-06, "loss": 0.655, "step": 20120 }, { "epoch": 2.03, "grad_norm": 5.1466601613579686, "learning_rate": 2.8652243980390778e-06, "loss": 0.7119, "step": 20125 }, { "epoch": 2.03, "grad_norm": 6.394354689435088, "learning_rate": 2.862572535257071e-06, "loss": 0.7041, "step": 20130 }, { "epoch": 2.03, "grad_norm": 19.978558529974322, "learning_rate": 2.8599214079618137e-06, "loss": 0.6777, "step": 20135 }, { "epoch": 2.03, "grad_norm": 5.013234273687217, "learning_rate": 2.857271017065554e-06, "loss": 0.6787, "step": 20140 }, { "epoch": 2.03, "grad_norm": 7.637030733686688, "learning_rate": 2.854621363480292e-06, "loss": 0.7093, "step": 20145 }, { "epoch": 2.03, "grad_norm": 6.422025071823492, "learning_rate": 2.851972448117769e-06, "loss": 0.7019, "step": 20150 }, { "epoch": 2.03, "grad_norm": 9.61149127644432, "learning_rate": 2.8493242718894765e-06, "loss": 0.6949, "step": 20155 }, { "epoch": 2.03, "grad_norm": 6.036412049392237, "learning_rate": 2.8466768357066475e-06, "loss": 0.6843, "step": 20160 }, { "epoch": 2.03, "grad_norm": 12.36780256872287, "learning_rate": 2.844030140480263e-06, "loss": 0.7137, "step": 20165 }, { "epoch": 2.03, "grad_norm": 5.8956494643916875, "learning_rate": 2.8413841871210456e-06, "loss": 0.6885, "step": 20170 }, { "epoch": 2.03, "grad_norm": 5.501107518944112, "learning_rate": 2.83873897653947e-06, "loss": 0.6672, "step": 20175 }, { "epoch": 2.03, "grad_norm": 5.34334702345557, "learning_rate": 2.836094509645747e-06, "loss": 0.6899, "step": 20180 }, { "epoch": 2.04, "grad_norm": 7.976060684480714, "learning_rate": 2.8334507873498373e-06, "loss": 0.6979, "step": 20185 }, { "epoch": 2.04, "grad_norm": 4.765949760846961, "learning_rate": 2.8308078105614433e-06, "loss": 0.6678, "step": 20190 }, { "epoch": 2.04, "grad_norm": 5.483903474155636, "learning_rate": 2.8281655801900098e-06, "loss": 0.6894, "step": 20195 }, { "epoch": 2.04, "grad_norm": 5.11059505030005, "learning_rate": 2.8255240971447227e-06, "loss": 0.6772, "step": 20200 }, { "epoch": 2.04, "grad_norm": 4.625996552622085, "learning_rate": 2.8228833623345205e-06, "loss": 0.71, "step": 20205 }, { "epoch": 2.04, "grad_norm": 5.572562001223029, "learning_rate": 2.820243376668071e-06, "loss": 0.6595, "step": 20210 }, { "epoch": 2.04, "grad_norm": 5.759923303950589, "learning_rate": 2.817604141053796e-06, "loss": 0.7204, "step": 20215 }, { "epoch": 2.04, "grad_norm": 6.061007775325533, "learning_rate": 2.8149656563998513e-06, "loss": 0.6661, "step": 20220 }, { "epoch": 2.04, "grad_norm": 4.473550927111044, "learning_rate": 2.8123279236141378e-06, "loss": 0.6577, "step": 20225 }, { "epoch": 2.04, "grad_norm": 6.747404862096733, "learning_rate": 2.809690943604295e-06, "loss": 0.6944, "step": 20230 }, { "epoch": 2.04, "grad_norm": 12.424278453040909, "learning_rate": 2.8070547172777063e-06, "loss": 0.6621, "step": 20235 }, { "epoch": 2.04, "grad_norm": 5.392454233816305, "learning_rate": 2.8044192455414955e-06, "loss": 0.675, "step": 20240 }, { "epoch": 2.04, "grad_norm": 4.785097695138289, "learning_rate": 2.8017845293025243e-06, "loss": 0.6736, "step": 20245 }, { "epoch": 2.04, "grad_norm": 6.860239130549238, "learning_rate": 2.7991505694673983e-06, "loss": 0.7173, "step": 20250 }, { "epoch": 2.04, "grad_norm": 5.029730559126252, "learning_rate": 2.7965173669424596e-06, "loss": 0.6879, "step": 20255 }, { "epoch": 2.04, "grad_norm": 9.141573323808583, "learning_rate": 2.7938849226337905e-06, "loss": 0.6831, "step": 20260 }, { "epoch": 2.04, "grad_norm": 4.803515943071768, "learning_rate": 2.79125323744721e-06, "loss": 0.6667, "step": 20265 }, { "epoch": 2.04, "grad_norm": 4.768831960299563, "learning_rate": 2.7886223122882833e-06, "loss": 0.6589, "step": 20270 }, { "epoch": 2.04, "grad_norm": 5.363161119443387, "learning_rate": 2.7859921480623048e-06, "loss": 0.6545, "step": 20275 }, { "epoch": 2.04, "grad_norm": 6.738430484972305, "learning_rate": 2.783362745674315e-06, "loss": 0.6556, "step": 20280 }, { "epoch": 2.05, "grad_norm": 5.157022650903519, "learning_rate": 2.780734106029088e-06, "loss": 0.6538, "step": 20285 }, { "epoch": 2.05, "grad_norm": 8.027913613784868, "learning_rate": 2.7781062300311357e-06, "loss": 0.6428, "step": 20290 }, { "epoch": 2.05, "grad_norm": 7.702997038291723, "learning_rate": 2.7754791185847045e-06, "loss": 0.6968, "step": 20295 }, { "epoch": 2.05, "grad_norm": 4.703632225890155, "learning_rate": 2.7728527725937866e-06, "loss": 0.7008, "step": 20300 }, { "epoch": 2.05, "grad_norm": 4.326646266181257, "learning_rate": 2.7702271929621004e-06, "loss": 0.6359, "step": 20305 }, { "epoch": 2.05, "grad_norm": 7.596957107489121, "learning_rate": 2.7676023805931096e-06, "loss": 0.6543, "step": 20310 }, { "epoch": 2.05, "grad_norm": 4.4689293897811275, "learning_rate": 2.764978336390007e-06, "loss": 0.6543, "step": 20315 }, { "epoch": 2.05, "grad_norm": 4.592063799128114, "learning_rate": 2.762355061255724e-06, "loss": 0.6456, "step": 20320 }, { "epoch": 2.05, "grad_norm": 8.724734726353038, "learning_rate": 2.7597325560929254e-06, "loss": 0.6652, "step": 20325 }, { "epoch": 2.05, "grad_norm": 4.5520198384323365, "learning_rate": 2.7571108218040172e-06, "loss": 0.6667, "step": 20330 }, { "epoch": 2.05, "grad_norm": 4.7006276737748305, "learning_rate": 2.7544898592911305e-06, "loss": 0.6714, "step": 20335 }, { "epoch": 2.05, "grad_norm": 7.035710997650199, "learning_rate": 2.7518696694561402e-06, "loss": 0.663, "step": 20340 }, { "epoch": 2.05, "grad_norm": 5.984785241111086, "learning_rate": 2.7492502532006498e-06, "loss": 0.6972, "step": 20345 }, { "epoch": 2.05, "grad_norm": 12.48809346091373, "learning_rate": 2.746631611425997e-06, "loss": 0.6877, "step": 20350 }, { "epoch": 2.05, "grad_norm": 4.822505788690949, "learning_rate": 2.744013745033256e-06, "loss": 0.6788, "step": 20355 }, { "epoch": 2.05, "grad_norm": 8.309126432519314, "learning_rate": 2.7413966549232274e-06, "loss": 0.6215, "step": 20360 }, { "epoch": 2.05, "grad_norm": 9.376856836130756, "learning_rate": 2.7387803419964547e-06, "loss": 0.6749, "step": 20365 }, { "epoch": 2.05, "grad_norm": 4.955550699410014, "learning_rate": 2.736164807153205e-06, "loss": 0.6753, "step": 20370 }, { "epoch": 2.05, "grad_norm": 12.158760477300898, "learning_rate": 2.7335500512934844e-06, "loss": 0.657, "step": 20375 }, { "epoch": 2.05, "grad_norm": 7.839000626662179, "learning_rate": 2.7309360753170267e-06, "loss": 0.698, "step": 20380 }, { "epoch": 2.06, "grad_norm": 4.465886089969563, "learning_rate": 2.7283228801232985e-06, "loss": 0.6795, "step": 20385 }, { "epoch": 2.06, "grad_norm": 11.638003524690347, "learning_rate": 2.725710466611495e-06, "loss": 0.6578, "step": 20390 }, { "epoch": 2.06, "grad_norm": 15.555120482903076, "learning_rate": 2.7230988356805502e-06, "loss": 0.6611, "step": 20395 }, { "epoch": 2.06, "grad_norm": 6.007908263827729, "learning_rate": 2.7204879882291195e-06, "loss": 0.711, "step": 20400 }, { "epoch": 2.06, "grad_norm": 9.52058789317427, "learning_rate": 2.7178779251555963e-06, "loss": 0.686, "step": 20405 }, { "epoch": 2.06, "grad_norm": 8.290434833099377, "learning_rate": 2.7152686473581e-06, "loss": 0.6597, "step": 20410 }, { "epoch": 2.06, "grad_norm": 6.865850991334291, "learning_rate": 2.71266015573448e-06, "loss": 0.6921, "step": 20415 }, { "epoch": 2.06, "grad_norm": 7.093957021899156, "learning_rate": 2.7100524511823134e-06, "loss": 0.6589, "step": 20420 }, { "epoch": 2.06, "grad_norm": 12.990284071684462, "learning_rate": 2.7074455345989128e-06, "loss": 0.6583, "step": 20425 }, { "epoch": 2.06, "grad_norm": 8.045778809474987, "learning_rate": 2.7048394068813127e-06, "loss": 0.6724, "step": 20430 }, { "epoch": 2.06, "grad_norm": 5.497086333725814, "learning_rate": 2.7022340689262827e-06, "loss": 0.6372, "step": 20435 }, { "epoch": 2.06, "grad_norm": 12.37157407740874, "learning_rate": 2.6996295216303147e-06, "loss": 0.6974, "step": 20440 }, { "epoch": 2.06, "grad_norm": 8.893442888000223, "learning_rate": 2.697025765889631e-06, "loss": 0.6669, "step": 20445 }, { "epoch": 2.06, "grad_norm": 5.408198005557214, "learning_rate": 2.6944228026001795e-06, "loss": 0.6511, "step": 20450 }, { "epoch": 2.06, "grad_norm": 5.167101911384638, "learning_rate": 2.6918206326576424e-06, "loss": 0.6442, "step": 20455 }, { "epoch": 2.06, "grad_norm": 4.727500287228669, "learning_rate": 2.6892192569574183e-06, "loss": 0.6843, "step": 20460 }, { "epoch": 2.06, "grad_norm": 4.898389179651797, "learning_rate": 2.6866186763946426e-06, "loss": 0.6685, "step": 20465 }, { "epoch": 2.06, "grad_norm": 5.218251143268944, "learning_rate": 2.684018891864172e-06, "loss": 0.6747, "step": 20470 }, { "epoch": 2.06, "grad_norm": 4.851991764395169, "learning_rate": 2.681419904260587e-06, "loss": 0.6963, "step": 20475 }, { "epoch": 2.06, "grad_norm": 6.618853238599201, "learning_rate": 2.6788217144781995e-06, "loss": 0.6651, "step": 20480 }, { "epoch": 2.07, "grad_norm": 4.94027051266893, "learning_rate": 2.6762243234110415e-06, "loss": 0.688, "step": 20485 }, { "epoch": 2.07, "grad_norm": 8.675509761541878, "learning_rate": 2.6736277319528757e-06, "loss": 0.7261, "step": 20490 }, { "epoch": 2.07, "grad_norm": 8.93388076006707, "learning_rate": 2.6710319409971837e-06, "loss": 0.7099, "step": 20495 }, { "epoch": 2.07, "grad_norm": 5.709835978825093, "learning_rate": 2.66843695143718e-06, "loss": 0.7102, "step": 20500 }, { "epoch": 2.07, "grad_norm": 5.807707734312976, "learning_rate": 2.6658427641657903e-06, "loss": 0.6731, "step": 20505 }, { "epoch": 2.07, "grad_norm": 4.902988562763788, "learning_rate": 2.6632493800756787e-06, "loss": 0.6528, "step": 20510 }, { "epoch": 2.07, "grad_norm": 6.921829168490332, "learning_rate": 2.6606568000592214e-06, "loss": 0.697, "step": 20515 }, { "epoch": 2.07, "grad_norm": 5.402784949687757, "learning_rate": 2.658065025008526e-06, "loss": 0.6611, "step": 20520 }, { "epoch": 2.07, "grad_norm": 4.516261871930669, "learning_rate": 2.655474055815418e-06, "loss": 0.6624, "step": 20525 }, { "epoch": 2.07, "grad_norm": 5.161141461701427, "learning_rate": 2.6528838933714514e-06, "loss": 0.6762, "step": 20530 }, { "epoch": 2.07, "grad_norm": 4.941845927863708, "learning_rate": 2.6502945385678913e-06, "loss": 0.6946, "step": 20535 }, { "epoch": 2.07, "grad_norm": 5.659199780653852, "learning_rate": 2.6477059922957384e-06, "loss": 0.6783, "step": 20540 }, { "epoch": 2.07, "grad_norm": 7.124417827033684, "learning_rate": 2.645118255445705e-06, "loss": 0.6573, "step": 20545 }, { "epoch": 2.07, "grad_norm": 4.85559312595928, "learning_rate": 2.642531328908232e-06, "loss": 0.6704, "step": 20550 }, { "epoch": 2.07, "grad_norm": 4.770810220827577, "learning_rate": 2.639945213573476e-06, "loss": 0.6672, "step": 20555 }, { "epoch": 2.07, "grad_norm": 6.784653605858367, "learning_rate": 2.637359910331322e-06, "loss": 0.6597, "step": 20560 }, { "epoch": 2.07, "grad_norm": 5.517438292447058, "learning_rate": 2.6347754200713616e-06, "loss": 0.6762, "step": 20565 }, { "epoch": 2.07, "grad_norm": 7.121687089288357, "learning_rate": 2.6321917436829226e-06, "loss": 0.676, "step": 20570 }, { "epoch": 2.07, "grad_norm": 5.757356735754648, "learning_rate": 2.6296088820550402e-06, "loss": 0.6445, "step": 20575 }, { "epoch": 2.07, "grad_norm": 9.034457247639295, "learning_rate": 2.6270268360764807e-06, "loss": 0.689, "step": 20580 }, { "epoch": 2.08, "grad_norm": 5.2361163711605565, "learning_rate": 2.6244456066357176e-06, "loss": 0.6888, "step": 20585 }, { "epoch": 2.08, "grad_norm": 4.735153898853402, "learning_rate": 2.6218651946209543e-06, "loss": 0.7077, "step": 20590 }, { "epoch": 2.08, "grad_norm": 6.132323732813553, "learning_rate": 2.619285600920107e-06, "loss": 0.6814, "step": 20595 }, { "epoch": 2.08, "grad_norm": 4.56491215584217, "learning_rate": 2.6167068264208096e-06, "loss": 0.6309, "step": 20600 }, { "epoch": 2.08, "grad_norm": 4.420679368081689, "learning_rate": 2.6141288720104153e-06, "loss": 0.6567, "step": 20605 }, { "epoch": 2.08, "grad_norm": 4.678522405810549, "learning_rate": 2.611551738576e-06, "loss": 0.6773, "step": 20610 }, { "epoch": 2.08, "grad_norm": 4.886865696835705, "learning_rate": 2.6089754270043493e-06, "loss": 0.6665, "step": 20615 }, { "epoch": 2.08, "grad_norm": 5.956127785995881, "learning_rate": 2.6063999381819686e-06, "loss": 0.6539, "step": 20620 }, { "epoch": 2.08, "grad_norm": 13.519226867216945, "learning_rate": 2.6038252729950877e-06, "loss": 0.695, "step": 20625 }, { "epoch": 2.08, "grad_norm": 4.7267256993890925, "learning_rate": 2.601251432329637e-06, "loss": 0.6967, "step": 20630 }, { "epoch": 2.08, "grad_norm": 4.823013090894927, "learning_rate": 2.5986784170712797e-06, "loss": 0.7371, "step": 20635 }, { "epoch": 2.08, "grad_norm": 4.526737098118042, "learning_rate": 2.5961062281053838e-06, "loss": 0.6591, "step": 20640 }, { "epoch": 2.08, "grad_norm": 9.339338277834594, "learning_rate": 2.59353486631704e-06, "loss": 0.6972, "step": 20645 }, { "epoch": 2.08, "grad_norm": 7.65317210480881, "learning_rate": 2.5909643325910495e-06, "loss": 0.6889, "step": 20650 }, { "epoch": 2.08, "grad_norm": 5.956832370518871, "learning_rate": 2.588394627811934e-06, "loss": 0.709, "step": 20655 }, { "epoch": 2.08, "grad_norm": 5.067865571460534, "learning_rate": 2.5858257528639208e-06, "loss": 0.713, "step": 20660 }, { "epoch": 2.08, "grad_norm": 7.12917377194101, "learning_rate": 2.583257708630962e-06, "loss": 0.626, "step": 20665 }, { "epoch": 2.08, "grad_norm": 5.907861023980704, "learning_rate": 2.580690495996715e-06, "loss": 0.6633, "step": 20670 }, { "epoch": 2.08, "grad_norm": 7.7192338329918355, "learning_rate": 2.578124115844559e-06, "loss": 0.6593, "step": 20675 }, { "epoch": 2.08, "grad_norm": 10.403310083930933, "learning_rate": 2.5755585690575808e-06, "loss": 0.6623, "step": 20680 }, { "epoch": 2.09, "grad_norm": 5.040667531817449, "learning_rate": 2.572993856518587e-06, "loss": 0.6759, "step": 20685 }, { "epoch": 2.09, "grad_norm": 5.139888145277165, "learning_rate": 2.570429979110085e-06, "loss": 0.6965, "step": 20690 }, { "epoch": 2.09, "grad_norm": 12.342407045299352, "learning_rate": 2.5678669377143082e-06, "loss": 0.6686, "step": 20695 }, { "epoch": 2.09, "grad_norm": 8.900244543868103, "learning_rate": 2.5653047332131943e-06, "loss": 0.6473, "step": 20700 }, { "epoch": 2.09, "grad_norm": 5.137426285227035, "learning_rate": 2.562743366488397e-06, "loss": 0.6422, "step": 20705 }, { "epoch": 2.09, "grad_norm": 6.895202332079677, "learning_rate": 2.560182838421279e-06, "loss": 0.633, "step": 20710 }, { "epoch": 2.09, "grad_norm": 4.653129772734897, "learning_rate": 2.5576231498929187e-06, "loss": 0.6608, "step": 20715 }, { "epoch": 2.09, "grad_norm": 6.0133160033021715, "learning_rate": 2.5550643017840966e-06, "loss": 0.6672, "step": 20720 }, { "epoch": 2.09, "grad_norm": 5.671373077072103, "learning_rate": 2.5525062949753143e-06, "loss": 0.6669, "step": 20725 }, { "epoch": 2.09, "grad_norm": 5.510727721272181, "learning_rate": 2.549949130346777e-06, "loss": 0.6768, "step": 20730 }, { "epoch": 2.09, "grad_norm": 4.834607513541353, "learning_rate": 2.547392808778405e-06, "loss": 0.7069, "step": 20735 }, { "epoch": 2.09, "grad_norm": 4.60320911064297, "learning_rate": 2.544837331149826e-06, "loss": 0.6845, "step": 20740 }, { "epoch": 2.09, "grad_norm": 4.333675816136024, "learning_rate": 2.542282698340376e-06, "loss": 0.6757, "step": 20745 }, { "epoch": 2.09, "grad_norm": 4.605720986577917, "learning_rate": 2.5397289112291016e-06, "loss": 0.6295, "step": 20750 }, { "epoch": 2.09, "grad_norm": 9.79014322646118, "learning_rate": 2.5371759706947575e-06, "loss": 0.6647, "step": 20755 }, { "epoch": 2.09, "grad_norm": 8.1075613008192, "learning_rate": 2.5346238776158115e-06, "loss": 0.6826, "step": 20760 }, { "epoch": 2.09, "grad_norm": 6.475051681081209, "learning_rate": 2.532072632870433e-06, "loss": 0.6881, "step": 20765 }, { "epoch": 2.09, "grad_norm": 4.665453634646515, "learning_rate": 2.5295222373365056e-06, "loss": 0.6741, "step": 20770 }, { "epoch": 2.09, "grad_norm": 5.6573931101364465, "learning_rate": 2.526972691891617e-06, "loss": 0.661, "step": 20775 }, { "epoch": 2.1, "grad_norm": 8.297920740915522, "learning_rate": 2.5244239974130637e-06, "loss": 0.6929, "step": 20780 }, { "epoch": 2.1, "grad_norm": 6.549276471188224, "learning_rate": 2.5218761547778457e-06, "loss": 0.6911, "step": 20785 }, { "epoch": 2.1, "grad_norm": 5.854245160085682, "learning_rate": 2.519329164862678e-06, "loss": 0.6927, "step": 20790 }, { "epoch": 2.1, "grad_norm": 4.596145059140424, "learning_rate": 2.5167830285439743e-06, "loss": 0.6333, "step": 20795 }, { "epoch": 2.1, "grad_norm": 6.27937273157563, "learning_rate": 2.5142377466978598e-06, "loss": 0.688, "step": 20800 }, { "epoch": 2.1, "grad_norm": 8.834287310597626, "learning_rate": 2.5116933202001627e-06, "loss": 0.683, "step": 20805 }, { "epoch": 2.1, "grad_norm": 6.1546276439154175, "learning_rate": 2.509149749926417e-06, "loss": 0.6488, "step": 20810 }, { "epoch": 2.1, "grad_norm": 4.963786781997544, "learning_rate": 2.5066070367518624e-06, "loss": 0.6578, "step": 20815 }, { "epoch": 2.1, "grad_norm": 4.901247568524524, "learning_rate": 2.5040651815514465e-06, "loss": 0.6575, "step": 20820 }, { "epoch": 2.1, "grad_norm": 4.8133936919905835, "learning_rate": 2.5015241851998156e-06, "loss": 0.6654, "step": 20825 }, { "epoch": 2.1, "grad_norm": 9.8028683121827, "learning_rate": 2.4989840485713287e-06, "loss": 0.6362, "step": 20830 }, { "epoch": 2.1, "grad_norm": 11.757108818862323, "learning_rate": 2.4964447725400417e-06, "loss": 0.6458, "step": 20835 }, { "epoch": 2.1, "grad_norm": 5.1336993367309995, "learning_rate": 2.493906357979717e-06, "loss": 0.6796, "step": 20840 }, { "epoch": 2.1, "grad_norm": 4.6868173155668975, "learning_rate": 2.491368805763819e-06, "loss": 0.6749, "step": 20845 }, { "epoch": 2.1, "grad_norm": 6.181757307196955, "learning_rate": 2.488832116765521e-06, "loss": 0.6935, "step": 20850 }, { "epoch": 2.1, "grad_norm": 4.879231867941652, "learning_rate": 2.486296291857691e-06, "loss": 0.6466, "step": 20855 }, { "epoch": 2.1, "grad_norm": 5.210616346246946, "learning_rate": 2.4837613319129082e-06, "loss": 0.6493, "step": 20860 }, { "epoch": 2.1, "grad_norm": 5.092954661412917, "learning_rate": 2.481227237803448e-06, "loss": 0.686, "step": 20865 }, { "epoch": 2.1, "grad_norm": 5.902697895006476, "learning_rate": 2.4786940104012897e-06, "loss": 0.6926, "step": 20870 }, { "epoch": 2.1, "grad_norm": 7.0086173784932, "learning_rate": 2.4761616505781137e-06, "loss": 0.6361, "step": 20875 }, { "epoch": 2.11, "grad_norm": 5.193635820587068, "learning_rate": 2.473630159205302e-06, "loss": 0.6792, "step": 20880 }, { "epoch": 2.11, "grad_norm": 6.165631840813355, "learning_rate": 2.471099537153941e-06, "loss": 0.654, "step": 20885 }, { "epoch": 2.11, "grad_norm": 4.861817478106466, "learning_rate": 2.468569785294812e-06, "loss": 0.6763, "step": 20890 }, { "epoch": 2.11, "grad_norm": 5.372317983836856, "learning_rate": 2.466040904498404e-06, "loss": 0.6674, "step": 20895 }, { "epoch": 2.11, "grad_norm": 4.755747744451209, "learning_rate": 2.463512895634901e-06, "loss": 0.6508, "step": 20900 }, { "epoch": 2.11, "grad_norm": 5.040387676166835, "learning_rate": 2.460985759574187e-06, "loss": 0.6563, "step": 20905 }, { "epoch": 2.11, "grad_norm": 4.968898382731428, "learning_rate": 2.4584594971858473e-06, "loss": 0.6549, "step": 20910 }, { "epoch": 2.11, "grad_norm": 8.786369127215906, "learning_rate": 2.455934109339168e-06, "loss": 0.6701, "step": 20915 }, { "epoch": 2.11, "grad_norm": 5.362652153951624, "learning_rate": 2.453409596903131e-06, "loss": 0.6565, "step": 20920 }, { "epoch": 2.11, "grad_norm": 5.037224859768098, "learning_rate": 2.4508859607464203e-06, "loss": 0.69, "step": 20925 }, { "epoch": 2.11, "grad_norm": 6.183266255986752, "learning_rate": 2.4483632017374167e-06, "loss": 0.6915, "step": 20930 }, { "epoch": 2.11, "grad_norm": 4.745109900608311, "learning_rate": 2.445841320744198e-06, "loss": 0.705, "step": 20935 }, { "epoch": 2.11, "grad_norm": 10.187762151918701, "learning_rate": 2.443320318634539e-06, "loss": 0.6788, "step": 20940 }, { "epoch": 2.11, "grad_norm": 5.243981756066541, "learning_rate": 2.4408001962759187e-06, "loss": 0.6579, "step": 20945 }, { "epoch": 2.11, "grad_norm": 5.436368028087012, "learning_rate": 2.4382809545355046e-06, "loss": 0.6857, "step": 20950 }, { "epoch": 2.11, "grad_norm": 11.060669721055994, "learning_rate": 2.4357625942801694e-06, "loss": 0.658, "step": 20955 }, { "epoch": 2.11, "grad_norm": 7.9540665812641596, "learning_rate": 2.4332451163764765e-06, "loss": 0.6839, "step": 20960 }, { "epoch": 2.11, "grad_norm": 4.879592711398314, "learning_rate": 2.4307285216906866e-06, "loss": 0.6852, "step": 20965 }, { "epoch": 2.11, "grad_norm": 9.395266201019412, "learning_rate": 2.4282128110887575e-06, "loss": 0.6396, "step": 20970 }, { "epoch": 2.11, "grad_norm": 10.715951603539619, "learning_rate": 2.4256979854363453e-06, "loss": 0.6795, "step": 20975 }, { "epoch": 2.12, "grad_norm": 5.113152703376329, "learning_rate": 2.423184045598796e-06, "loss": 0.6761, "step": 20980 }, { "epoch": 2.12, "grad_norm": 8.185557904992484, "learning_rate": 2.420670992441157e-06, "loss": 0.7158, "step": 20985 }, { "epoch": 2.12, "grad_norm": 5.8309932391804375, "learning_rate": 2.4181588268281656e-06, "loss": 0.6775, "step": 20990 }, { "epoch": 2.12, "grad_norm": 5.352726397807516, "learning_rate": 2.4156475496242563e-06, "loss": 0.6462, "step": 20995 }, { "epoch": 2.12, "grad_norm": 5.074028530793119, "learning_rate": 2.413137161693557e-06, "loss": 0.6394, "step": 21000 }, { "epoch": 2.12, "grad_norm": 4.806118859847987, "learning_rate": 2.410627663899888e-06, "loss": 0.6965, "step": 21005 }, { "epoch": 2.12, "grad_norm": 7.303205623748911, "learning_rate": 2.4081190571067688e-06, "loss": 0.6612, "step": 21010 }, { "epoch": 2.12, "grad_norm": 6.7635101057624665, "learning_rate": 2.4056113421774036e-06, "loss": 0.6534, "step": 21015 }, { "epoch": 2.12, "grad_norm": 6.0749282619508, "learning_rate": 2.4031045199746998e-06, "loss": 0.7072, "step": 21020 }, { "epoch": 2.12, "grad_norm": 4.909661154884081, "learning_rate": 2.4005985913612507e-06, "loss": 0.6823, "step": 21025 }, { "epoch": 2.12, "grad_norm": 13.926500818645366, "learning_rate": 2.3980935571993435e-06, "loss": 0.6779, "step": 21030 }, { "epoch": 2.12, "grad_norm": 9.879990313993776, "learning_rate": 2.3955894183509566e-06, "loss": 0.685, "step": 21035 }, { "epoch": 2.12, "grad_norm": 7.49919898506855, "learning_rate": 2.3930861756777648e-06, "loss": 0.6762, "step": 21040 }, { "epoch": 2.12, "grad_norm": 4.825971877972342, "learning_rate": 2.390583830041128e-06, "loss": 0.6662, "step": 21045 }, { "epoch": 2.12, "grad_norm": 4.836307842150723, "learning_rate": 2.3880823823021056e-06, "loss": 0.7139, "step": 21050 }, { "epoch": 2.12, "grad_norm": 4.8872685890080385, "learning_rate": 2.38558183332144e-06, "loss": 0.6456, "step": 21055 }, { "epoch": 2.12, "grad_norm": 4.874725834505177, "learning_rate": 2.3830821839595684e-06, "loss": 0.7174, "step": 21060 }, { "epoch": 2.12, "grad_norm": 5.197819594155209, "learning_rate": 2.3805834350766165e-06, "loss": 0.6593, "step": 21065 }, { "epoch": 2.12, "grad_norm": 5.793047824829741, "learning_rate": 2.3780855875324047e-06, "loss": 0.6978, "step": 21070 }, { "epoch": 2.12, "grad_norm": 4.829755969981174, "learning_rate": 2.375588642186436e-06, "loss": 0.663, "step": 21075 }, { "epoch": 2.13, "grad_norm": 8.64441800256596, "learning_rate": 2.373092599897911e-06, "loss": 0.6674, "step": 21080 }, { "epoch": 2.13, "grad_norm": 10.047361648257937, "learning_rate": 2.3705974615257133e-06, "loss": 0.7065, "step": 21085 }, { "epoch": 2.13, "grad_norm": 9.95270047732342, "learning_rate": 2.3681032279284173e-06, "loss": 0.7028, "step": 21090 }, { "epoch": 2.13, "grad_norm": 5.204323988308421, "learning_rate": 2.365609899964285e-06, "loss": 0.6651, "step": 21095 }, { "epoch": 2.13, "grad_norm": 5.271678402978469, "learning_rate": 2.3631174784912723e-06, "loss": 0.6666, "step": 21100 }, { "epoch": 2.13, "grad_norm": 5.063459933192673, "learning_rate": 2.3606259643670148e-06, "loss": 0.6482, "step": 21105 }, { "epoch": 2.13, "grad_norm": 5.437559796028627, "learning_rate": 2.3581353584488437e-06, "loss": 0.6586, "step": 21110 }, { "epoch": 2.13, "grad_norm": 8.825669261536088, "learning_rate": 2.355645661593773e-06, "loss": 0.641, "step": 21115 }, { "epoch": 2.13, "grad_norm": 4.667516137702388, "learning_rate": 2.3531568746585036e-06, "loss": 0.6698, "step": 21120 }, { "epoch": 2.13, "grad_norm": 5.988785674417255, "learning_rate": 2.3506689984994265e-06, "loss": 0.6739, "step": 21125 }, { "epoch": 2.13, "grad_norm": 4.727327892790064, "learning_rate": 2.3481820339726142e-06, "loss": 0.6624, "step": 21130 }, { "epoch": 2.13, "grad_norm": 4.8673197563247435, "learning_rate": 2.345695981933833e-06, "loss": 0.6868, "step": 21135 }, { "epoch": 2.13, "grad_norm": 4.490877814256852, "learning_rate": 2.3432108432385274e-06, "loss": 0.6523, "step": 21140 }, { "epoch": 2.13, "grad_norm": 9.706242854874962, "learning_rate": 2.3407266187418354e-06, "loss": 0.6702, "step": 21145 }, { "epoch": 2.13, "grad_norm": 4.630820647418488, "learning_rate": 2.338243309298574e-06, "loss": 0.6594, "step": 21150 }, { "epoch": 2.13, "grad_norm": 5.784604042705804, "learning_rate": 2.3357609157632473e-06, "loss": 0.734, "step": 21155 }, { "epoch": 2.13, "grad_norm": 5.414196654666086, "learning_rate": 2.3332794389900434e-06, "loss": 0.6885, "step": 21160 }, { "epoch": 2.13, "grad_norm": 7.1018738978504, "learning_rate": 2.3307988798328384e-06, "loss": 0.7051, "step": 21165 }, { "epoch": 2.13, "grad_norm": 4.906674381033124, "learning_rate": 2.3283192391451885e-06, "loss": 0.6619, "step": 21170 }, { "epoch": 2.13, "grad_norm": 4.469245284173604, "learning_rate": 2.3258405177803386e-06, "loss": 0.6902, "step": 21175 }, { "epoch": 2.14, "grad_norm": 6.014548678716852, "learning_rate": 2.323362716591212e-06, "loss": 0.6439, "step": 21180 }, { "epoch": 2.14, "grad_norm": 4.395091828408313, "learning_rate": 2.320885836430418e-06, "loss": 0.6858, "step": 21185 }, { "epoch": 2.14, "grad_norm": 4.338385671125915, "learning_rate": 2.3184098781502473e-06, "loss": 0.6797, "step": 21190 }, { "epoch": 2.14, "grad_norm": 4.440486546092224, "learning_rate": 2.3159348426026784e-06, "loss": 0.6535, "step": 21195 }, { "epoch": 2.14, "grad_norm": 5.014703336012933, "learning_rate": 2.313460730639364e-06, "loss": 0.6694, "step": 21200 }, { "epoch": 2.14, "grad_norm": 6.0268000255109815, "learning_rate": 2.3109875431116485e-06, "loss": 0.6431, "step": 21205 }, { "epoch": 2.14, "grad_norm": 7.294111410480581, "learning_rate": 2.308515280870551e-06, "loss": 0.6721, "step": 21210 }, { "epoch": 2.14, "grad_norm": 4.596658628899066, "learning_rate": 2.3060439447667755e-06, "loss": 0.64, "step": 21215 }, { "epoch": 2.14, "grad_norm": 5.359281340269409, "learning_rate": 2.3035735356507027e-06, "loss": 0.6634, "step": 21220 }, { "epoch": 2.14, "grad_norm": 8.878999044428964, "learning_rate": 2.3011040543724035e-06, "loss": 0.6545, "step": 21225 }, { "epoch": 2.14, "grad_norm": 5.7306168814791745, "learning_rate": 2.2986355017816194e-06, "loss": 0.7132, "step": 21230 }, { "epoch": 2.14, "grad_norm": 6.440527418729273, "learning_rate": 2.2961678787277803e-06, "loss": 0.6431, "step": 21235 }, { "epoch": 2.14, "grad_norm": 6.095974682993984, "learning_rate": 2.2937011860599924e-06, "loss": 0.6931, "step": 21240 }, { "epoch": 2.14, "grad_norm": 7.959130597361517, "learning_rate": 2.2912354246270406e-06, "loss": 0.6827, "step": 21245 }, { "epoch": 2.14, "grad_norm": 4.820875700283996, "learning_rate": 2.2887705952773933e-06, "loss": 0.6873, "step": 21250 }, { "epoch": 2.14, "grad_norm": 5.121696731093606, "learning_rate": 2.286306698859192e-06, "loss": 0.6799, "step": 21255 }, { "epoch": 2.14, "grad_norm": 10.373115426094676, "learning_rate": 2.2838437362202653e-06, "loss": 0.7123, "step": 21260 }, { "epoch": 2.14, "grad_norm": 11.701640475740954, "learning_rate": 2.2813817082081135e-06, "loss": 0.6797, "step": 21265 }, { "epoch": 2.14, "grad_norm": 6.262566730988645, "learning_rate": 2.278920615669921e-06, "loss": 0.7041, "step": 21270 }, { "epoch": 2.14, "grad_norm": 8.108524486232833, "learning_rate": 2.276460459452546e-06, "loss": 0.6351, "step": 21275 }, { "epoch": 2.15, "grad_norm": 9.204313406839686, "learning_rate": 2.2740012404025253e-06, "loss": 0.6517, "step": 21280 }, { "epoch": 2.15, "grad_norm": 4.876521529639379, "learning_rate": 2.2715429593660722e-06, "loss": 0.6841, "step": 21285 }, { "epoch": 2.15, "grad_norm": 6.1636493192147785, "learning_rate": 2.269085617189083e-06, "loss": 0.6706, "step": 21290 }, { "epoch": 2.15, "grad_norm": 7.354043706285142, "learning_rate": 2.266629214717122e-06, "loss": 0.6583, "step": 21295 }, { "epoch": 2.15, "grad_norm": 4.656344874179418, "learning_rate": 2.2641737527954405e-06, "loss": 0.6794, "step": 21300 }, { "epoch": 2.15, "grad_norm": 15.927842070258235, "learning_rate": 2.261719232268957e-06, "loss": 0.6469, "step": 21305 }, { "epoch": 2.15, "grad_norm": 11.574361597789768, "learning_rate": 2.2592656539822706e-06, "loss": 0.67, "step": 21310 }, { "epoch": 2.15, "grad_norm": 4.458625032162326, "learning_rate": 2.256813018779653e-06, "loss": 0.656, "step": 21315 }, { "epoch": 2.15, "grad_norm": 15.504041246695413, "learning_rate": 2.254361327505057e-06, "loss": 0.6734, "step": 21320 }, { "epoch": 2.15, "grad_norm": 4.920432068704808, "learning_rate": 2.251910581002104e-06, "loss": 0.6801, "step": 21325 }, { "epoch": 2.15, "grad_norm": 4.977985796519088, "learning_rate": 2.2494607801140977e-06, "loss": 0.6701, "step": 21330 }, { "epoch": 2.15, "grad_norm": 5.521974127049712, "learning_rate": 2.2470119256840102e-06, "loss": 0.6597, "step": 21335 }, { "epoch": 2.15, "grad_norm": 6.2108291136682245, "learning_rate": 2.2445640185544887e-06, "loss": 0.6738, "step": 21340 }, { "epoch": 2.15, "grad_norm": 5.578370170317782, "learning_rate": 2.2421170595678554e-06, "loss": 0.6956, "step": 21345 }, { "epoch": 2.15, "grad_norm": 5.455209357833909, "learning_rate": 2.2396710495661096e-06, "loss": 0.6872, "step": 21350 }, { "epoch": 2.15, "grad_norm": 4.932387669873154, "learning_rate": 2.2372259893909177e-06, "loss": 0.6931, "step": 21355 }, { "epoch": 2.15, "grad_norm": 7.383305059308782, "learning_rate": 2.234781879883625e-06, "loss": 0.6765, "step": 21360 }, { "epoch": 2.15, "grad_norm": 4.588357606015773, "learning_rate": 2.2323387218852472e-06, "loss": 0.6468, "step": 21365 }, { "epoch": 2.15, "grad_norm": 6.9486101441179855, "learning_rate": 2.229896516236472e-06, "loss": 0.7126, "step": 21370 }, { "epoch": 2.16, "grad_norm": 6.831380879476231, "learning_rate": 2.227455263777657e-06, "loss": 0.6925, "step": 21375 }, { "epoch": 2.16, "grad_norm": 14.772545065182918, "learning_rate": 2.22501496534884e-06, "loss": 0.6487, "step": 21380 }, { "epoch": 2.16, "grad_norm": 6.355784462134895, "learning_rate": 2.2225756217897232e-06, "loss": 0.6881, "step": 21385 }, { "epoch": 2.16, "grad_norm": 10.180633022588685, "learning_rate": 2.2201372339396797e-06, "loss": 0.7179, "step": 21390 }, { "epoch": 2.16, "grad_norm": 4.906657785907644, "learning_rate": 2.2176998026377615e-06, "loss": 0.6773, "step": 21395 }, { "epoch": 2.16, "grad_norm": 5.973334214591822, "learning_rate": 2.2152633287226836e-06, "loss": 0.6862, "step": 21400 }, { "epoch": 2.16, "grad_norm": 5.857519742476949, "learning_rate": 2.2128278130328346e-06, "loss": 0.6675, "step": 21405 }, { "epoch": 2.16, "grad_norm": 5.331979105826995, "learning_rate": 2.2103932564062706e-06, "loss": 0.6346, "step": 21410 }, { "epoch": 2.16, "grad_norm": 6.850241441793481, "learning_rate": 2.207959659680726e-06, "loss": 0.6459, "step": 21415 }, { "epoch": 2.16, "grad_norm": 4.663222394532015, "learning_rate": 2.2055270236935927e-06, "loss": 0.6659, "step": 21420 }, { "epoch": 2.16, "grad_norm": 6.714394073555861, "learning_rate": 2.2030953492819435e-06, "loss": 0.6453, "step": 21425 }, { "epoch": 2.16, "grad_norm": 6.421109528249489, "learning_rate": 2.2006646372825138e-06, "loss": 0.6648, "step": 21430 }, { "epoch": 2.16, "grad_norm": 6.463981661729482, "learning_rate": 2.198234888531708e-06, "loss": 0.6567, "step": 21435 }, { "epoch": 2.16, "grad_norm": 5.074988279798165, "learning_rate": 2.1958061038656e-06, "loss": 0.6448, "step": 21440 }, { "epoch": 2.16, "grad_norm": 4.73004122724363, "learning_rate": 2.193378284119934e-06, "loss": 0.6768, "step": 21445 }, { "epoch": 2.16, "grad_norm": 5.659328962468312, "learning_rate": 2.190951430130119e-06, "loss": 0.6546, "step": 21450 }, { "epoch": 2.16, "grad_norm": 4.614004198385022, "learning_rate": 2.188525542731236e-06, "loss": 0.6502, "step": 21455 }, { "epoch": 2.16, "grad_norm": 6.497775382681463, "learning_rate": 2.1861006227580276e-06, "loss": 0.7165, "step": 21460 }, { "epoch": 2.16, "grad_norm": 4.542848170679907, "learning_rate": 2.1836766710449077e-06, "loss": 0.6685, "step": 21465 }, { "epoch": 2.16, "grad_norm": 4.649025832208719, "learning_rate": 2.1812536884259537e-06, "loss": 0.6761, "step": 21470 }, { "epoch": 2.17, "grad_norm": 4.801979439674982, "learning_rate": 2.178831675734915e-06, "loss": 0.6833, "step": 21475 }, { "epoch": 2.17, "grad_norm": 4.821275013329016, "learning_rate": 2.1764106338052005e-06, "loss": 0.6756, "step": 21480 }, { "epoch": 2.17, "grad_norm": 4.596268528847123, "learning_rate": 2.1739905634698916e-06, "loss": 0.6768, "step": 21485 }, { "epoch": 2.17, "grad_norm": 8.495715820222872, "learning_rate": 2.171571465561731e-06, "loss": 0.6667, "step": 21490 }, { "epoch": 2.17, "grad_norm": 4.776997921508625, "learning_rate": 2.169153340913127e-06, "loss": 0.6944, "step": 21495 }, { "epoch": 2.17, "grad_norm": 5.506795003368372, "learning_rate": 2.1667361903561534e-06, "loss": 0.6646, "step": 21500 }, { "epoch": 2.17, "grad_norm": 7.392404856257032, "learning_rate": 2.1643200147225523e-06, "loss": 0.6782, "step": 21505 }, { "epoch": 2.17, "grad_norm": 5.477503623359234, "learning_rate": 2.1619048148437256e-06, "loss": 0.6601, "step": 21510 }, { "epoch": 2.17, "grad_norm": 4.524025133682762, "learning_rate": 2.1594905915507397e-06, "loss": 0.6839, "step": 21515 }, { "epoch": 2.17, "grad_norm": 4.615708947012337, "learning_rate": 2.1570773456743305e-06, "loss": 0.6494, "step": 21520 }, { "epoch": 2.17, "grad_norm": 5.095074229625856, "learning_rate": 2.1546650780448907e-06, "loss": 0.6696, "step": 21525 }, { "epoch": 2.17, "grad_norm": 6.528698210233045, "learning_rate": 2.15225378949248e-06, "loss": 0.6469, "step": 21530 }, { "epoch": 2.17, "grad_norm": 7.061796993105184, "learning_rate": 2.149843480846819e-06, "loss": 0.6828, "step": 21535 }, { "epoch": 2.17, "grad_norm": 4.608756274550869, "learning_rate": 2.1474341529372955e-06, "loss": 0.6519, "step": 21540 }, { "epoch": 2.17, "grad_norm": 7.6559465404075455, "learning_rate": 2.1450258065929536e-06, "loss": 0.6905, "step": 21545 }, { "epoch": 2.17, "grad_norm": 4.611780806892584, "learning_rate": 2.1426184426425073e-06, "loss": 0.6165, "step": 21550 }, { "epoch": 2.17, "grad_norm": 4.775707939635718, "learning_rate": 2.1402120619143254e-06, "loss": 0.6955, "step": 21555 }, { "epoch": 2.17, "grad_norm": 4.858396353644519, "learning_rate": 2.137806665236441e-06, "loss": 0.6687, "step": 21560 }, { "epoch": 2.17, "grad_norm": 4.800894336334279, "learning_rate": 2.135402253436548e-06, "loss": 0.6648, "step": 21565 }, { "epoch": 2.17, "grad_norm": 4.6098765956852015, "learning_rate": 2.1329988273420055e-06, "loss": 0.6861, "step": 21570 }, { "epoch": 2.18, "grad_norm": 8.6380129196337, "learning_rate": 2.1305963877798265e-06, "loss": 0.6491, "step": 21575 }, { "epoch": 2.18, "grad_norm": 5.510593341834106, "learning_rate": 2.128194935576692e-06, "loss": 0.6633, "step": 21580 }, { "epoch": 2.18, "grad_norm": 5.012673378533721, "learning_rate": 2.1257944715589363e-06, "loss": 0.6853, "step": 21585 }, { "epoch": 2.18, "grad_norm": 5.940600401787411, "learning_rate": 2.1233949965525585e-06, "loss": 0.6819, "step": 21590 }, { "epoch": 2.18, "grad_norm": 8.435816299728263, "learning_rate": 2.120996511383213e-06, "loss": 0.6768, "step": 21595 }, { "epoch": 2.18, "grad_norm": 7.472662445866399, "learning_rate": 2.1185990168762193e-06, "loss": 0.6378, "step": 21600 }, { "epoch": 2.18, "grad_norm": 5.412920367145333, "learning_rate": 2.1162025138565505e-06, "loss": 0.691, "step": 21605 }, { "epoch": 2.18, "grad_norm": 4.563470399698418, "learning_rate": 2.1138070031488445e-06, "loss": 0.7107, "step": 21610 }, { "epoch": 2.18, "grad_norm": 5.746862880742315, "learning_rate": 2.1114124855773915e-06, "loss": 0.6786, "step": 21615 }, { "epoch": 2.18, "grad_norm": 5.143965721903495, "learning_rate": 2.1090189619661437e-06, "loss": 0.651, "step": 21620 }, { "epoch": 2.18, "grad_norm": 5.909432206635309, "learning_rate": 2.1066264331387084e-06, "loss": 0.6533, "step": 21625 }, { "epoch": 2.18, "grad_norm": 5.953769822474178, "learning_rate": 2.104234899918355e-06, "loss": 0.6591, "step": 21630 }, { "epoch": 2.18, "grad_norm": 4.648977379946307, "learning_rate": 2.101844363128007e-06, "loss": 0.6816, "step": 21635 }, { "epoch": 2.18, "grad_norm": 7.201087140446208, "learning_rate": 2.099454823590244e-06, "loss": 0.6705, "step": 21640 }, { "epoch": 2.18, "grad_norm": 4.959974141273814, "learning_rate": 2.0970662821273074e-06, "loss": 0.6469, "step": 21645 }, { "epoch": 2.18, "grad_norm": 4.664286681471639, "learning_rate": 2.094678739561091e-06, "loss": 0.6646, "step": 21650 }, { "epoch": 2.18, "grad_norm": 6.485570321475418, "learning_rate": 2.092292196713145e-06, "loss": 0.6523, "step": 21655 }, { "epoch": 2.18, "grad_norm": 5.368909470789177, "learning_rate": 2.0899066544046754e-06, "loss": 0.6451, "step": 21660 }, { "epoch": 2.18, "grad_norm": 4.97877059094573, "learning_rate": 2.087522113456548e-06, "loss": 0.674, "step": 21665 }, { "epoch": 2.18, "grad_norm": 6.508107160264964, "learning_rate": 2.085138574689278e-06, "loss": 0.6405, "step": 21670 }, { "epoch": 2.19, "grad_norm": 5.801473722359958, "learning_rate": 2.082756038923042e-06, "loss": 0.6738, "step": 21675 }, { "epoch": 2.19, "grad_norm": 4.701163787304521, "learning_rate": 2.080374506977667e-06, "loss": 0.6915, "step": 21680 }, { "epoch": 2.19, "grad_norm": 5.0583630847683345, "learning_rate": 2.0779939796726358e-06, "loss": 0.6789, "step": 21685 }, { "epoch": 2.19, "grad_norm": 4.989863272716518, "learning_rate": 2.075614457827083e-06, "loss": 0.6593, "step": 21690 }, { "epoch": 2.19, "grad_norm": 5.457297521696792, "learning_rate": 2.073235942259804e-06, "loss": 0.6472, "step": 21695 }, { "epoch": 2.19, "grad_norm": 12.769915672849963, "learning_rate": 2.0708584337892406e-06, "loss": 0.6985, "step": 21700 }, { "epoch": 2.19, "grad_norm": 6.0036448831642755, "learning_rate": 2.0684819332334937e-06, "loss": 0.6477, "step": 21705 }, { "epoch": 2.19, "grad_norm": 5.353427945270811, "learning_rate": 2.066106441410314e-06, "loss": 0.716, "step": 21710 }, { "epoch": 2.19, "grad_norm": 5.403640101289655, "learning_rate": 2.0637319591371057e-06, "loss": 0.6456, "step": 21715 }, { "epoch": 2.19, "grad_norm": 4.799200550599031, "learning_rate": 2.0613584872309238e-06, "loss": 0.6445, "step": 21720 }, { "epoch": 2.19, "grad_norm": 10.13065659228395, "learning_rate": 2.058986026508482e-06, "loss": 0.6908, "step": 21725 }, { "epoch": 2.19, "grad_norm": 6.577943061340765, "learning_rate": 2.0566145777861374e-06, "loss": 0.6751, "step": 21730 }, { "epoch": 2.19, "grad_norm": 6.263454894906639, "learning_rate": 2.054244141879907e-06, "loss": 0.6474, "step": 21735 }, { "epoch": 2.19, "grad_norm": 4.782587443226552, "learning_rate": 2.0518747196054533e-06, "loss": 0.633, "step": 21740 }, { "epoch": 2.19, "grad_norm": 4.484463749033461, "learning_rate": 2.049506311778094e-06, "loss": 0.6482, "step": 21745 }, { "epoch": 2.19, "grad_norm": 5.316010453674646, "learning_rate": 2.047138919212792e-06, "loss": 0.6819, "step": 21750 }, { "epoch": 2.19, "grad_norm": 6.308040713505686, "learning_rate": 2.04477254272417e-06, "loss": 0.6636, "step": 21755 }, { "epoch": 2.19, "grad_norm": 4.735685322131239, "learning_rate": 2.0424071831264913e-06, "loss": 0.6882, "step": 21760 }, { "epoch": 2.19, "grad_norm": 8.13529321248825, "learning_rate": 2.0400428412336776e-06, "loss": 0.6645, "step": 21765 }, { "epoch": 2.19, "grad_norm": 9.906802535189406, "learning_rate": 2.0376795178592973e-06, "loss": 0.66, "step": 21770 }, { "epoch": 2.2, "grad_norm": 6.087737709787373, "learning_rate": 2.035317213816562e-06, "loss": 0.6657, "step": 21775 }, { "epoch": 2.2, "grad_norm": 4.619965083742663, "learning_rate": 2.0329559299183438e-06, "loss": 0.6584, "step": 21780 }, { "epoch": 2.2, "grad_norm": 7.693608908068327, "learning_rate": 2.0305956669771544e-06, "loss": 0.692, "step": 21785 }, { "epoch": 2.2, "grad_norm": 7.339632482464978, "learning_rate": 2.0282364258051614e-06, "loss": 0.6453, "step": 21790 }, { "epoch": 2.2, "grad_norm": 4.938669128359225, "learning_rate": 2.025878207214174e-06, "loss": 0.6991, "step": 21795 }, { "epoch": 2.2, "grad_norm": 5.791176392690036, "learning_rate": 2.023521012015659e-06, "loss": 0.6592, "step": 21800 }, { "epoch": 2.2, "grad_norm": 5.366344972490452, "learning_rate": 2.021164841020717e-06, "loss": 0.6674, "step": 21805 }, { "epoch": 2.2, "grad_norm": 5.120904543191494, "learning_rate": 2.0188096950401097e-06, "loss": 0.6761, "step": 21810 }, { "epoch": 2.2, "grad_norm": 8.949775455094654, "learning_rate": 2.016455574884238e-06, "loss": 0.668, "step": 21815 }, { "epoch": 2.2, "grad_norm": 6.4831923815287515, "learning_rate": 2.0141024813631543e-06, "loss": 0.6894, "step": 21820 }, { "epoch": 2.2, "grad_norm": 8.6838611621769, "learning_rate": 2.0117504152865535e-06, "loss": 0.677, "step": 21825 }, { "epoch": 2.2, "grad_norm": 11.12976987528081, "learning_rate": 2.0093993774637844e-06, "loss": 0.6721, "step": 21830 }, { "epoch": 2.2, "grad_norm": 7.923204024712102, "learning_rate": 2.007049368703829e-06, "loss": 0.6547, "step": 21835 }, { "epoch": 2.2, "grad_norm": 15.444648074512381, "learning_rate": 2.0047003898153294e-06, "loss": 0.7173, "step": 21840 }, { "epoch": 2.2, "grad_norm": 4.512895550568181, "learning_rate": 2.002352441606563e-06, "loss": 0.6774, "step": 21845 }, { "epoch": 2.2, "grad_norm": 5.120862716747655, "learning_rate": 2.00000552488546e-06, "loss": 0.6994, "step": 21850 }, { "epoch": 2.2, "grad_norm": 6.1405050814060465, "learning_rate": 1.9976596404595896e-06, "loss": 0.6761, "step": 21855 }, { "epoch": 2.2, "grad_norm": 6.729094027264364, "learning_rate": 1.995314789136172e-06, "loss": 0.6588, "step": 21860 }, { "epoch": 2.2, "grad_norm": 4.843415886395163, "learning_rate": 1.9929709717220618e-06, "loss": 0.6514, "step": 21865 }, { "epoch": 2.2, "grad_norm": 6.202932791193621, "learning_rate": 1.9906281890237713e-06, "loss": 0.6734, "step": 21870 }, { "epoch": 2.21, "grad_norm": 4.752555156991824, "learning_rate": 1.9882864418474433e-06, "loss": 0.6801, "step": 21875 }, { "epoch": 2.21, "grad_norm": 4.546389634068141, "learning_rate": 1.985945730998877e-06, "loss": 0.712, "step": 21880 }, { "epoch": 2.21, "grad_norm": 4.450342124329078, "learning_rate": 1.9836060572835043e-06, "loss": 0.6513, "step": 21885 }, { "epoch": 2.21, "grad_norm": 5.011460680897503, "learning_rate": 1.981267421506409e-06, "loss": 0.7039, "step": 21890 }, { "epoch": 2.21, "grad_norm": 4.361626448788882, "learning_rate": 1.9789298244723094e-06, "loss": 0.647, "step": 21895 }, { "epoch": 2.21, "grad_norm": 5.543949883889084, "learning_rate": 1.9765932669855696e-06, "loss": 0.7214, "step": 21900 }, { "epoch": 2.21, "grad_norm": 4.629136822219546, "learning_rate": 1.9742577498502015e-06, "loss": 0.6905, "step": 21905 }, { "epoch": 2.21, "grad_norm": 8.094072104483322, "learning_rate": 1.9719232738698496e-06, "loss": 0.7063, "step": 21910 }, { "epoch": 2.21, "grad_norm": 6.180828173160776, "learning_rate": 1.9695898398478087e-06, "loss": 0.6723, "step": 21915 }, { "epoch": 2.21, "grad_norm": 11.900414331532556, "learning_rate": 1.9672574485870095e-06, "loss": 0.6339, "step": 21920 }, { "epoch": 2.21, "grad_norm": 13.887237517635425, "learning_rate": 1.9649261008900256e-06, "loss": 0.6656, "step": 21925 }, { "epoch": 2.21, "grad_norm": 5.3677286120369345, "learning_rate": 1.9625957975590697e-06, "loss": 0.6835, "step": 21930 }, { "epoch": 2.21, "grad_norm": 4.997891580541595, "learning_rate": 1.9602665393960006e-06, "loss": 0.6776, "step": 21935 }, { "epoch": 2.21, "grad_norm": 13.034257581641024, "learning_rate": 1.9579383272023106e-06, "loss": 0.6598, "step": 21940 }, { "epoch": 2.21, "grad_norm": 5.095295571247911, "learning_rate": 1.9556111617791383e-06, "loss": 0.6423, "step": 21945 }, { "epoch": 2.21, "grad_norm": 4.779739155406581, "learning_rate": 1.9532850439272576e-06, "loss": 0.659, "step": 21950 }, { "epoch": 2.21, "grad_norm": 17.447892921304916, "learning_rate": 1.950959974447083e-06, "loss": 0.6411, "step": 21955 }, { "epoch": 2.21, "grad_norm": 5.38037099062299, "learning_rate": 1.948635954138668e-06, "loss": 0.6598, "step": 21960 }, { "epoch": 2.21, "grad_norm": 6.985338264358882, "learning_rate": 1.946312983801708e-06, "loss": 0.6597, "step": 21965 }, { "epoch": 2.22, "grad_norm": 5.06480244294857, "learning_rate": 1.943991064235532e-06, "loss": 0.6527, "step": 21970 }, { "epoch": 2.22, "grad_norm": 6.539366017792932, "learning_rate": 1.941670196239113e-06, "loss": 0.6831, "step": 21975 }, { "epoch": 2.22, "grad_norm": 4.784330135145143, "learning_rate": 1.939350380611058e-06, "loss": 0.6861, "step": 21980 }, { "epoch": 2.22, "grad_norm": 7.570023660771087, "learning_rate": 1.937031618149616e-06, "loss": 0.6576, "step": 21985 }, { "epoch": 2.22, "grad_norm": 4.7423422973082845, "learning_rate": 1.9347139096526662e-06, "loss": 0.6765, "step": 21990 }, { "epoch": 2.22, "grad_norm": 6.434972787435978, "learning_rate": 1.932397255917734e-06, "loss": 0.6976, "step": 21995 }, { "epoch": 2.22, "grad_norm": 5.393669477772615, "learning_rate": 1.930081657741974e-06, "loss": 0.6386, "step": 22000 }, { "epoch": 2.22, "grad_norm": 5.324763834075317, "learning_rate": 1.9277671159221858e-06, "loss": 0.6698, "step": 22005 }, { "epoch": 2.22, "grad_norm": 5.172778216238288, "learning_rate": 1.925453631254796e-06, "loss": 0.668, "step": 22010 }, { "epoch": 2.22, "grad_norm": 10.775476470719338, "learning_rate": 1.9231412045358794e-06, "loss": 0.7001, "step": 22015 }, { "epoch": 2.22, "grad_norm": 5.772612247891434, "learning_rate": 1.920829836561134e-06, "loss": 0.6343, "step": 22020 }, { "epoch": 2.22, "grad_norm": 5.638808772116786, "learning_rate": 1.9185195281258984e-06, "loss": 0.6662, "step": 22025 }, { "epoch": 2.22, "grad_norm": 8.477423201454245, "learning_rate": 1.9162102800251526e-06, "loss": 0.691, "step": 22030 }, { "epoch": 2.22, "grad_norm": 6.001265825880683, "learning_rate": 1.913902093053502e-06, "loss": 0.7369, "step": 22035 }, { "epoch": 2.22, "grad_norm": 5.695603228654765, "learning_rate": 1.911594968005195e-06, "loss": 0.6901, "step": 22040 }, { "epoch": 2.22, "grad_norm": 4.699503683926651, "learning_rate": 1.90928890567411e-06, "loss": 0.6603, "step": 22045 }, { "epoch": 2.22, "grad_norm": 5.246464717025585, "learning_rate": 1.9069839068537605e-06, "loss": 0.6683, "step": 22050 }, { "epoch": 2.22, "grad_norm": 7.441835122688492, "learning_rate": 1.9046799723372927e-06, "loss": 0.6552, "step": 22055 }, { "epoch": 2.22, "grad_norm": 6.807563202328714, "learning_rate": 1.902377102917492e-06, "loss": 0.6635, "step": 22060 }, { "epoch": 2.22, "grad_norm": 4.53254206934949, "learning_rate": 1.90007529938677e-06, "loss": 0.6673, "step": 22065 }, { "epoch": 2.23, "grad_norm": 6.36613438278603, "learning_rate": 1.897774562537178e-06, "loss": 0.6644, "step": 22070 }, { "epoch": 2.23, "grad_norm": 8.838309800932239, "learning_rate": 1.895474893160396e-06, "loss": 0.7222, "step": 22075 }, { "epoch": 2.23, "grad_norm": 7.298006664524484, "learning_rate": 1.8931762920477387e-06, "loss": 0.6583, "step": 22080 }, { "epoch": 2.23, "grad_norm": 6.656911860726481, "learning_rate": 1.89087875999015e-06, "loss": 0.6445, "step": 22085 }, { "epoch": 2.23, "grad_norm": 4.649023226724349, "learning_rate": 1.888582297778212e-06, "loss": 0.6448, "step": 22090 }, { "epoch": 2.23, "grad_norm": 5.356781900163968, "learning_rate": 1.8862869062021317e-06, "loss": 0.6458, "step": 22095 }, { "epoch": 2.23, "grad_norm": 5.5443744334239895, "learning_rate": 1.8839925860517549e-06, "loss": 0.6573, "step": 22100 }, { "epoch": 2.23, "grad_norm": 4.735248749146872, "learning_rate": 1.8816993381165533e-06, "loss": 0.6369, "step": 22105 }, { "epoch": 2.23, "grad_norm": 6.68724647456985, "learning_rate": 1.8794071631856315e-06, "loss": 0.6606, "step": 22110 }, { "epoch": 2.23, "grad_norm": 4.519650423597918, "learning_rate": 1.8771160620477219e-06, "loss": 0.6397, "step": 22115 }, { "epoch": 2.23, "grad_norm": 7.213776592218962, "learning_rate": 1.8748260354911945e-06, "loss": 0.6731, "step": 22120 }, { "epoch": 2.23, "grad_norm": 8.309313240765013, "learning_rate": 1.872537084304042e-06, "loss": 0.6633, "step": 22125 }, { "epoch": 2.23, "grad_norm": 5.250550954950561, "learning_rate": 1.8702492092738934e-06, "loss": 0.6962, "step": 22130 }, { "epoch": 2.23, "grad_norm": 4.756675355745477, "learning_rate": 1.8679624111880024e-06, "loss": 0.6402, "step": 22135 }, { "epoch": 2.23, "grad_norm": 6.987116484431764, "learning_rate": 1.8656766908332542e-06, "loss": 0.669, "step": 22140 }, { "epoch": 2.23, "grad_norm": 5.662568308039815, "learning_rate": 1.8633920489961615e-06, "loss": 0.6795, "step": 22145 }, { "epoch": 2.23, "grad_norm": 4.9315334222705465, "learning_rate": 1.8611084864628708e-06, "loss": 0.63, "step": 22150 }, { "epoch": 2.23, "grad_norm": 7.919007732778563, "learning_rate": 1.8588260040191518e-06, "loss": 0.6346, "step": 22155 }, { "epoch": 2.23, "grad_norm": 5.209612486019074, "learning_rate": 1.856544602450403e-06, "loss": 0.6401, "step": 22160 }, { "epoch": 2.23, "grad_norm": 4.573632350427878, "learning_rate": 1.8542642825416558e-06, "loss": 0.6574, "step": 22165 }, { "epoch": 2.24, "grad_norm": 5.75801656606795, "learning_rate": 1.8519850450775646e-06, "loss": 0.681, "step": 22170 }, { "epoch": 2.24, "grad_norm": 4.473139584757566, "learning_rate": 1.849706890842412e-06, "loss": 0.64, "step": 22175 }, { "epoch": 2.24, "grad_norm": 5.356833240856356, "learning_rate": 1.8474298206201086e-06, "loss": 0.6876, "step": 22180 }, { "epoch": 2.24, "grad_norm": 5.005661901628179, "learning_rate": 1.8451538351941938e-06, "loss": 0.6857, "step": 22185 }, { "epoch": 2.24, "grad_norm": 5.55621385776115, "learning_rate": 1.84287893534783e-06, "loss": 0.6653, "step": 22190 }, { "epoch": 2.24, "grad_norm": 5.004544719422335, "learning_rate": 1.8406051218638104e-06, "loss": 0.6388, "step": 22195 }, { "epoch": 2.24, "grad_norm": 5.650527884490754, "learning_rate": 1.8383323955245513e-06, "loss": 0.666, "step": 22200 }, { "epoch": 2.24, "grad_norm": 5.510569408301339, "learning_rate": 1.8360607571120948e-06, "loss": 0.6684, "step": 22205 }, { "epoch": 2.24, "grad_norm": 5.61974386639768, "learning_rate": 1.8337902074081082e-06, "loss": 0.669, "step": 22210 }, { "epoch": 2.24, "grad_norm": 5.199501659759417, "learning_rate": 1.831520747193889e-06, "loss": 0.6624, "step": 22215 }, { "epoch": 2.24, "grad_norm": 5.019792770858063, "learning_rate": 1.8292523772503524e-06, "loss": 0.6581, "step": 22220 }, { "epoch": 2.24, "grad_norm": 8.958303016321787, "learning_rate": 1.826985098358046e-06, "loss": 0.6779, "step": 22225 }, { "epoch": 2.24, "grad_norm": 7.503162405669565, "learning_rate": 1.8247189112971374e-06, "loss": 0.6477, "step": 22230 }, { "epoch": 2.24, "grad_norm": 6.53591139782825, "learning_rate": 1.8224538168474182e-06, "loss": 0.686, "step": 22235 }, { "epoch": 2.24, "grad_norm": 5.93514558069354, "learning_rate": 1.820189815788304e-06, "loss": 0.6986, "step": 22240 }, { "epoch": 2.24, "grad_norm": 5.085521784062833, "learning_rate": 1.8179269088988387e-06, "loss": 0.6652, "step": 22245 }, { "epoch": 2.24, "grad_norm": 4.759343061989954, "learning_rate": 1.8156650969576832e-06, "loss": 0.6996, "step": 22250 }, { "epoch": 2.24, "grad_norm": 6.081130624986382, "learning_rate": 1.8134043807431283e-06, "loss": 0.6706, "step": 22255 }, { "epoch": 2.24, "grad_norm": 4.646210752535273, "learning_rate": 1.811144761033083e-06, "loss": 0.6515, "step": 22260 }, { "epoch": 2.24, "grad_norm": 5.462019258658007, "learning_rate": 1.8088862386050786e-06, "loss": 0.6332, "step": 22265 }, { "epoch": 2.25, "grad_norm": 4.5455720420032675, "learning_rate": 1.8066288142362704e-06, "loss": 0.6632, "step": 22270 }, { "epoch": 2.25, "grad_norm": 5.0054769315556555, "learning_rate": 1.8043724887034392e-06, "loss": 0.6543, "step": 22275 }, { "epoch": 2.25, "grad_norm": 6.001193032887485, "learning_rate": 1.802117262782982e-06, "loss": 0.6798, "step": 22280 }, { "epoch": 2.25, "grad_norm": 5.377042635357598, "learning_rate": 1.799863137250919e-06, "loss": 0.628, "step": 22285 }, { "epoch": 2.25, "grad_norm": 6.247020646583163, "learning_rate": 1.7976101128828955e-06, "loss": 0.6841, "step": 22290 }, { "epoch": 2.25, "grad_norm": 5.228469202329293, "learning_rate": 1.7953581904541733e-06, "loss": 0.6506, "step": 22295 }, { "epoch": 2.25, "grad_norm": 6.332671657525808, "learning_rate": 1.7931073707396373e-06, "loss": 0.6919, "step": 22300 }, { "epoch": 2.25, "grad_norm": 8.67677861390262, "learning_rate": 1.7908576545137907e-06, "loss": 0.6763, "step": 22305 }, { "epoch": 2.25, "grad_norm": 5.955850137989377, "learning_rate": 1.7886090425507612e-06, "loss": 0.6497, "step": 22310 }, { "epoch": 2.25, "grad_norm": 5.066982632939822, "learning_rate": 1.7863615356242913e-06, "loss": 0.6893, "step": 22315 }, { "epoch": 2.25, "grad_norm": 4.719905492192519, "learning_rate": 1.7841151345077495e-06, "loss": 0.6588, "step": 22320 }, { "epoch": 2.25, "grad_norm": 6.610428719794262, "learning_rate": 1.7818698399741186e-06, "loss": 0.6422, "step": 22325 }, { "epoch": 2.25, "grad_norm": 8.213106162955256, "learning_rate": 1.7796256527960021e-06, "loss": 0.6617, "step": 22330 }, { "epoch": 2.25, "grad_norm": 7.229355768945967, "learning_rate": 1.7773825737456208e-06, "loss": 0.6505, "step": 22335 }, { "epoch": 2.25, "grad_norm": 5.012731623542265, "learning_rate": 1.7751406035948193e-06, "loss": 0.6619, "step": 22340 }, { "epoch": 2.25, "grad_norm": 5.413531472969937, "learning_rate": 1.7728997431150546e-06, "loss": 0.6601, "step": 22345 }, { "epoch": 2.25, "grad_norm": 5.726110813450331, "learning_rate": 1.7706599930774077e-06, "loss": 0.6832, "step": 22350 }, { "epoch": 2.25, "grad_norm": 4.9672144231735045, "learning_rate": 1.768421354252573e-06, "loss": 0.654, "step": 22355 }, { "epoch": 2.25, "grad_norm": 7.407604286978984, "learning_rate": 1.7661838274108633e-06, "loss": 0.7097, "step": 22360 }, { "epoch": 2.25, "grad_norm": 5.588861929961504, "learning_rate": 1.7639474133222085e-06, "loss": 0.6319, "step": 22365 }, { "epoch": 2.26, "grad_norm": 5.068515096985442, "learning_rate": 1.7617121127561598e-06, "loss": 0.6928, "step": 22370 }, { "epoch": 2.26, "grad_norm": 4.565497603312442, "learning_rate": 1.7594779264818783e-06, "loss": 0.6595, "step": 22375 }, { "epoch": 2.26, "grad_norm": 4.856303691873805, "learning_rate": 1.7572448552681493e-06, "loss": 0.6576, "step": 22380 }, { "epoch": 2.26, "grad_norm": 5.032417280865937, "learning_rate": 1.7550128998833681e-06, "loss": 0.6665, "step": 22385 }, { "epoch": 2.26, "grad_norm": 11.027154143621729, "learning_rate": 1.7527820610955487e-06, "loss": 0.6895, "step": 22390 }, { "epoch": 2.26, "grad_norm": 5.220039667640416, "learning_rate": 1.7505523396723189e-06, "loss": 0.6352, "step": 22395 }, { "epoch": 2.26, "grad_norm": 9.297318647388199, "learning_rate": 1.7483237363809274e-06, "loss": 0.6873, "step": 22400 }, { "epoch": 2.26, "grad_norm": 4.900496581353949, "learning_rate": 1.7460962519882324e-06, "loss": 0.6739, "step": 22405 }, { "epoch": 2.26, "grad_norm": 4.656133617830663, "learning_rate": 1.7438698872607073e-06, "loss": 0.6538, "step": 22410 }, { "epoch": 2.26, "grad_norm": 5.577365486763903, "learning_rate": 1.7416446429644462e-06, "loss": 0.6357, "step": 22415 }, { "epoch": 2.26, "grad_norm": 5.4964458786704755, "learning_rate": 1.739420519865151e-06, "loss": 0.6212, "step": 22420 }, { "epoch": 2.26, "grad_norm": 5.718619521205935, "learning_rate": 1.7371975187281408e-06, "loss": 0.6586, "step": 22425 }, { "epoch": 2.26, "grad_norm": 5.828469851925843, "learning_rate": 1.7349756403183466e-06, "loss": 0.6864, "step": 22430 }, { "epoch": 2.26, "grad_norm": 4.822043565193145, "learning_rate": 1.7327548854003174e-06, "loss": 0.6553, "step": 22435 }, { "epoch": 2.26, "grad_norm": 4.742210440858266, "learning_rate": 1.7305352547382104e-06, "loss": 0.6594, "step": 22440 }, { "epoch": 2.26, "grad_norm": 5.268040093322078, "learning_rate": 1.7283167490958008e-06, "loss": 0.704, "step": 22445 }, { "epoch": 2.26, "grad_norm": 5.3592022150205665, "learning_rate": 1.726099369236473e-06, "loss": 0.655, "step": 22450 }, { "epoch": 2.26, "grad_norm": 5.995899113494076, "learning_rate": 1.7238831159232257e-06, "loss": 0.6753, "step": 22455 }, { "epoch": 2.26, "grad_norm": 4.883862383907867, "learning_rate": 1.7216679899186672e-06, "loss": 0.6574, "step": 22460 }, { "epoch": 2.26, "grad_norm": 4.639544085369703, "learning_rate": 1.719453991985024e-06, "loss": 0.6652, "step": 22465 }, { "epoch": 2.27, "grad_norm": 5.225580088013155, "learning_rate": 1.7172411228841268e-06, "loss": 0.6618, "step": 22470 }, { "epoch": 2.27, "grad_norm": 5.1915848336128985, "learning_rate": 1.7150293833774257e-06, "loss": 0.6785, "step": 22475 }, { "epoch": 2.27, "grad_norm": 7.4886162606891284, "learning_rate": 1.7128187742259766e-06, "loss": 0.6523, "step": 22480 }, { "epoch": 2.27, "grad_norm": 5.071153259933611, "learning_rate": 1.7106092961904476e-06, "loss": 0.6642, "step": 22485 }, { "epoch": 2.27, "grad_norm": 5.821964514546382, "learning_rate": 1.708400950031116e-06, "loss": 0.6571, "step": 22490 }, { "epoch": 2.27, "grad_norm": 6.567740592588277, "learning_rate": 1.706193736507875e-06, "loss": 0.6624, "step": 22495 }, { "epoch": 2.27, "grad_norm": 5.930879935653045, "learning_rate": 1.7039876563802227e-06, "loss": 0.6651, "step": 22500 }, { "epoch": 2.27, "grad_norm": 6.0767573323260935, "learning_rate": 1.701782710407271e-06, "loss": 0.7218, "step": 22505 }, { "epoch": 2.27, "grad_norm": 4.6670914829207115, "learning_rate": 1.699578899347738e-06, "loss": 0.6669, "step": 22510 }, { "epoch": 2.27, "grad_norm": 5.849989536360458, "learning_rate": 1.697376223959954e-06, "loss": 0.6287, "step": 22515 }, { "epoch": 2.27, "grad_norm": 8.789381725465098, "learning_rate": 1.6951746850018553e-06, "loss": 0.6648, "step": 22520 }, { "epoch": 2.27, "grad_norm": 7.480342356637824, "learning_rate": 1.6929742832309926e-06, "loss": 0.6871, "step": 22525 }, { "epoch": 2.27, "grad_norm": 8.677024202708074, "learning_rate": 1.6907750194045187e-06, "loss": 0.672, "step": 22530 }, { "epoch": 2.27, "grad_norm": 4.2619899057185044, "learning_rate": 1.6885768942792018e-06, "loss": 0.6492, "step": 22535 }, { "epoch": 2.27, "grad_norm": 4.64342507169857, "learning_rate": 1.6863799086114123e-06, "loss": 0.6228, "step": 22540 }, { "epoch": 2.27, "grad_norm": 4.816052603229862, "learning_rate": 1.684184063157132e-06, "loss": 0.6745, "step": 22545 }, { "epoch": 2.27, "grad_norm": 5.13577322884629, "learning_rate": 1.6819893586719482e-06, "loss": 0.6422, "step": 22550 }, { "epoch": 2.27, "grad_norm": 5.261141020345327, "learning_rate": 1.6797957959110556e-06, "loss": 0.6444, "step": 22555 }, { "epoch": 2.27, "grad_norm": 13.338569930570511, "learning_rate": 1.6776033756292602e-06, "loss": 0.6916, "step": 22560 }, { "epoch": 2.28, "grad_norm": 8.419978426971964, "learning_rate": 1.675412098580968e-06, "loss": 0.6775, "step": 22565 }, { "epoch": 2.28, "grad_norm": 5.16182641300474, "learning_rate": 1.6732219655201992e-06, "loss": 0.6604, "step": 22570 }, { "epoch": 2.28, "grad_norm": 5.597935147747176, "learning_rate": 1.6710329772005745e-06, "loss": 0.6568, "step": 22575 }, { "epoch": 2.28, "grad_norm": 5.515372501062043, "learning_rate": 1.668845134375323e-06, "loss": 0.6796, "step": 22580 }, { "epoch": 2.28, "grad_norm": 6.9916103673781, "learning_rate": 1.6666584377972778e-06, "loss": 0.6468, "step": 22585 }, { "epoch": 2.28, "grad_norm": 5.799949235202191, "learning_rate": 1.6644728882188816e-06, "loss": 0.6541, "step": 22590 }, { "epoch": 2.28, "grad_norm": 7.06369827344823, "learning_rate": 1.6622884863921774e-06, "loss": 0.645, "step": 22595 }, { "epoch": 2.28, "grad_norm": 4.824620430723345, "learning_rate": 1.6601052330688183e-06, "loss": 0.6786, "step": 22600 }, { "epoch": 2.28, "grad_norm": 5.431448980072174, "learning_rate": 1.657923129000059e-06, "loss": 0.6947, "step": 22605 }, { "epoch": 2.28, "grad_norm": 5.597594890904755, "learning_rate": 1.6557421749367587e-06, "loss": 0.7003, "step": 22610 }, { "epoch": 2.28, "grad_norm": 4.8169484999390715, "learning_rate": 1.6535623716293803e-06, "loss": 0.6554, "step": 22615 }, { "epoch": 2.28, "grad_norm": 6.649438638850983, "learning_rate": 1.6513837198279954e-06, "loss": 0.6925, "step": 22620 }, { "epoch": 2.28, "grad_norm": 8.363151737668417, "learning_rate": 1.6492062202822724e-06, "loss": 0.675, "step": 22625 }, { "epoch": 2.28, "grad_norm": 6.638961681603949, "learning_rate": 1.6470298737414902e-06, "loss": 0.6805, "step": 22630 }, { "epoch": 2.28, "grad_norm": 5.301544447481115, "learning_rate": 1.6448546809545263e-06, "loss": 0.6685, "step": 22635 }, { "epoch": 2.28, "grad_norm": 4.82592619935771, "learning_rate": 1.6426806426698616e-06, "loss": 0.6311, "step": 22640 }, { "epoch": 2.28, "grad_norm": 4.879463534993098, "learning_rate": 1.6405077596355802e-06, "loss": 0.676, "step": 22645 }, { "epoch": 2.28, "grad_norm": 4.285753076248275, "learning_rate": 1.6383360325993714e-06, "loss": 0.6604, "step": 22650 }, { "epoch": 2.28, "grad_norm": 6.806887277989003, "learning_rate": 1.636165462308521e-06, "loss": 0.7141, "step": 22655 }, { "epoch": 2.28, "grad_norm": 4.653980698418252, "learning_rate": 1.6339960495099243e-06, "loss": 0.6709, "step": 22660 }, { "epoch": 2.29, "grad_norm": 5.908926619764527, "learning_rate": 1.6318277949500722e-06, "loss": 0.6616, "step": 22665 }, { "epoch": 2.29, "grad_norm": 6.916726063911858, "learning_rate": 1.6296606993750585e-06, "loss": 0.6744, "step": 22670 }, { "epoch": 2.29, "grad_norm": 8.085829241125342, "learning_rate": 1.6274947635305787e-06, "loss": 0.6666, "step": 22675 }, { "epoch": 2.29, "grad_norm": 5.298049943306973, "learning_rate": 1.6253299881619288e-06, "loss": 0.6646, "step": 22680 }, { "epoch": 2.29, "grad_norm": 4.5751950869327045, "learning_rate": 1.623166374014008e-06, "loss": 0.6832, "step": 22685 }, { "epoch": 2.29, "grad_norm": 5.355871168560011, "learning_rate": 1.6210039218313111e-06, "loss": 0.6712, "step": 22690 }, { "epoch": 2.29, "grad_norm": 5.288403929339322, "learning_rate": 1.6188426323579393e-06, "loss": 0.6812, "step": 22695 }, { "epoch": 2.29, "grad_norm": 4.5701276572794125, "learning_rate": 1.616682506337588e-06, "loss": 0.6712, "step": 22700 }, { "epoch": 2.29, "grad_norm": 5.251012685634926, "learning_rate": 1.6145235445135548e-06, "loss": 0.7031, "step": 22705 }, { "epoch": 2.29, "grad_norm": 5.5135520460923795, "learning_rate": 1.6123657476287351e-06, "loss": 0.659, "step": 22710 }, { "epoch": 2.29, "grad_norm": 4.66950416575332, "learning_rate": 1.6102091164256274e-06, "loss": 0.6622, "step": 22715 }, { "epoch": 2.29, "grad_norm": 5.036823131575185, "learning_rate": 1.6080536516463236e-06, "loss": 0.6851, "step": 22720 }, { "epoch": 2.29, "grad_norm": 5.4755189801415325, "learning_rate": 1.60589935403252e-06, "loss": 0.6726, "step": 22725 }, { "epoch": 2.29, "grad_norm": 6.375815713966879, "learning_rate": 1.6037462243255075e-06, "loss": 0.6622, "step": 22730 }, { "epoch": 2.29, "grad_norm": 5.399869288391677, "learning_rate": 1.6015942632661751e-06, "loss": 0.6638, "step": 22735 }, { "epoch": 2.29, "grad_norm": 5.892863779789559, "learning_rate": 1.5994434715950092e-06, "loss": 0.6664, "step": 22740 }, { "epoch": 2.29, "grad_norm": 10.217792975059123, "learning_rate": 1.5972938500520985e-06, "loss": 0.6393, "step": 22745 }, { "epoch": 2.29, "grad_norm": 5.600183963459423, "learning_rate": 1.5951453993771226e-06, "loss": 0.6497, "step": 22750 }, { "epoch": 2.29, "grad_norm": 9.397161807213989, "learning_rate": 1.5929981203093642e-06, "loss": 0.6868, "step": 22755 }, { "epoch": 2.29, "grad_norm": 6.338230143136408, "learning_rate": 1.5908520135876981e-06, "loss": 0.631, "step": 22760 }, { "epoch": 2.3, "grad_norm": 9.48096451098916, "learning_rate": 1.588707079950599e-06, "loss": 0.6505, "step": 22765 }, { "epoch": 2.3, "grad_norm": 4.999293793939398, "learning_rate": 1.586563320136133e-06, "loss": 0.637, "step": 22770 }, { "epoch": 2.3, "grad_norm": 5.274154137351849, "learning_rate": 1.5844207348819702e-06, "loss": 0.6752, "step": 22775 }, { "epoch": 2.3, "grad_norm": 6.788575808926672, "learning_rate": 1.5822793249253687e-06, "loss": 0.6621, "step": 22780 }, { "epoch": 2.3, "grad_norm": 4.857226848457538, "learning_rate": 1.5801390910031888e-06, "loss": 0.6832, "step": 22785 }, { "epoch": 2.3, "grad_norm": 7.658009144634473, "learning_rate": 1.5780000338518813e-06, "loss": 0.6528, "step": 22790 }, { "epoch": 2.3, "grad_norm": 16.1108955367705, "learning_rate": 1.5758621542074942e-06, "loss": 0.6745, "step": 22795 }, { "epoch": 2.3, "grad_norm": 13.716842477092792, "learning_rate": 1.573725452805669e-06, "loss": 0.6798, "step": 22800 }, { "epoch": 2.3, "grad_norm": 12.84118164022468, "learning_rate": 1.5715899303816413e-06, "loss": 0.6628, "step": 22805 }, { "epoch": 2.3, "grad_norm": 13.752749266021626, "learning_rate": 1.569455587670246e-06, "loss": 0.689, "step": 22810 }, { "epoch": 2.3, "grad_norm": 5.108777183409205, "learning_rate": 1.5673224254059045e-06, "loss": 0.6336, "step": 22815 }, { "epoch": 2.3, "grad_norm": 5.070827902531056, "learning_rate": 1.5651904443226396e-06, "loss": 0.6531, "step": 22820 }, { "epoch": 2.3, "grad_norm": 6.386136834259939, "learning_rate": 1.563059645154062e-06, "loss": 0.6565, "step": 22825 }, { "epoch": 2.3, "grad_norm": 7.73848076802254, "learning_rate": 1.5609300286333785e-06, "loss": 0.637, "step": 22830 }, { "epoch": 2.3, "grad_norm": 4.383849619388004, "learning_rate": 1.558801595493385e-06, "loss": 0.6544, "step": 22835 }, { "epoch": 2.3, "grad_norm": 4.528671138607547, "learning_rate": 1.556674346466478e-06, "loss": 0.6391, "step": 22840 }, { "epoch": 2.3, "grad_norm": 7.863716125361647, "learning_rate": 1.5545482822846375e-06, "loss": 0.6687, "step": 22845 }, { "epoch": 2.3, "grad_norm": 10.416901948032352, "learning_rate": 1.5524234036794443e-06, "loss": 0.6686, "step": 22850 }, { "epoch": 2.3, "grad_norm": 12.080893725057612, "learning_rate": 1.5502997113820651e-06, "loss": 0.6806, "step": 22855 }, { "epoch": 2.3, "grad_norm": 12.492772940957037, "learning_rate": 1.5481772061232602e-06, "loss": 0.6668, "step": 22860 }, { "epoch": 2.31, "grad_norm": 6.6746567293411605, "learning_rate": 1.5460558886333799e-06, "loss": 0.6356, "step": 22865 }, { "epoch": 2.31, "grad_norm": 5.517073475954265, "learning_rate": 1.5439357596423715e-06, "loss": 0.6996, "step": 22870 }, { "epoch": 2.31, "grad_norm": 4.808254989184655, "learning_rate": 1.5418168198797656e-06, "loss": 0.664, "step": 22875 }, { "epoch": 2.31, "grad_norm": 4.489570597294558, "learning_rate": 1.5396990700746906e-06, "loss": 0.6744, "step": 22880 }, { "epoch": 2.31, "grad_norm": 5.180070947431376, "learning_rate": 1.5375825109558606e-06, "loss": 0.6738, "step": 22885 }, { "epoch": 2.31, "grad_norm": 4.799182335651492, "learning_rate": 1.5354671432515816e-06, "loss": 0.6175, "step": 22890 }, { "epoch": 2.31, "grad_norm": 4.500868500253037, "learning_rate": 1.533352967689748e-06, "loss": 0.6295, "step": 22895 }, { "epoch": 2.31, "grad_norm": 4.943229695166293, "learning_rate": 1.5312399849978483e-06, "loss": 0.6333, "step": 22900 }, { "epoch": 2.31, "grad_norm": 7.070154327116882, "learning_rate": 1.5291281959029558e-06, "loss": 0.6425, "step": 22905 }, { "epoch": 2.31, "grad_norm": 4.857751537719575, "learning_rate": 1.5270176011317373e-06, "loss": 0.6621, "step": 22910 }, { "epoch": 2.31, "grad_norm": 4.647239303953281, "learning_rate": 1.5249082014104455e-06, "loss": 0.646, "step": 22915 }, { "epoch": 2.31, "grad_norm": 4.693460297092233, "learning_rate": 1.5227999974649221e-06, "loss": 0.6826, "step": 22920 }, { "epoch": 2.31, "grad_norm": 4.809710792652713, "learning_rate": 1.5206929900205987e-06, "loss": 0.637, "step": 22925 }, { "epoch": 2.31, "grad_norm": 6.580585052715756, "learning_rate": 1.5185871798024925e-06, "loss": 0.6762, "step": 22930 }, { "epoch": 2.31, "grad_norm": 4.500498281272657, "learning_rate": 1.516482567535214e-06, "loss": 0.6469, "step": 22935 }, { "epoch": 2.31, "grad_norm": 4.953886299364294, "learning_rate": 1.514379153942956e-06, "loss": 0.6251, "step": 22940 }, { "epoch": 2.31, "grad_norm": 4.8641256553373, "learning_rate": 1.5122769397495047e-06, "loss": 0.6595, "step": 22945 }, { "epoch": 2.31, "grad_norm": 4.563681811327853, "learning_rate": 1.510175925678224e-06, "loss": 0.6188, "step": 22950 }, { "epoch": 2.31, "grad_norm": 5.174693521988725, "learning_rate": 1.508076112452076e-06, "loss": 0.6701, "step": 22955 }, { "epoch": 2.31, "grad_norm": 4.501701102219561, "learning_rate": 1.5059775007936006e-06, "loss": 0.682, "step": 22960 }, { "epoch": 2.32, "grad_norm": 5.772847198348246, "learning_rate": 1.5038800914249319e-06, "loss": 0.6474, "step": 22965 }, { "epoch": 2.32, "grad_norm": 4.9431492869509865, "learning_rate": 1.501783885067783e-06, "loss": 0.6448, "step": 22970 }, { "epoch": 2.32, "grad_norm": 5.472881197234141, "learning_rate": 1.4996888824434613e-06, "loss": 0.6552, "step": 22975 }, { "epoch": 2.32, "grad_norm": 5.316621141713757, "learning_rate": 1.4975950842728482e-06, "loss": 0.6278, "step": 22980 }, { "epoch": 2.32, "grad_norm": 4.813533380241714, "learning_rate": 1.4955024912764239e-06, "loss": 0.6656, "step": 22985 }, { "epoch": 2.32, "grad_norm": 4.427506467377018, "learning_rate": 1.4934111041742427e-06, "loss": 0.629, "step": 22990 }, { "epoch": 2.32, "grad_norm": 4.177308682078762, "learning_rate": 1.4913209236859533e-06, "loss": 0.6516, "step": 22995 }, { "epoch": 2.32, "grad_norm": 4.602772286736399, "learning_rate": 1.4892319505307817e-06, "loss": 0.6848, "step": 23000 }, { "epoch": 2.32, "grad_norm": 5.021566911989718, "learning_rate": 1.4871441854275454e-06, "loss": 0.6633, "step": 23005 }, { "epoch": 2.32, "grad_norm": 5.1205395441480155, "learning_rate": 1.4850576290946368e-06, "loss": 0.6336, "step": 23010 }, { "epoch": 2.32, "grad_norm": 5.050517972420797, "learning_rate": 1.4829722822500425e-06, "loss": 0.6554, "step": 23015 }, { "epoch": 2.32, "grad_norm": 5.489818487186654, "learning_rate": 1.480888145611325e-06, "loss": 0.6831, "step": 23020 }, { "epoch": 2.32, "grad_norm": 4.368923565018206, "learning_rate": 1.478805219895637e-06, "loss": 0.6764, "step": 23025 }, { "epoch": 2.32, "grad_norm": 5.585422569645293, "learning_rate": 1.4767235058197083e-06, "loss": 0.6559, "step": 23030 }, { "epoch": 2.32, "grad_norm": 7.324264673646722, "learning_rate": 1.474643004099857e-06, "loss": 0.6808, "step": 23035 }, { "epoch": 2.32, "grad_norm": 5.070237790985046, "learning_rate": 1.4725637154519817e-06, "loss": 0.6641, "step": 23040 }, { "epoch": 2.32, "grad_norm": 5.047696957897613, "learning_rate": 1.4704856405915629e-06, "loss": 0.6386, "step": 23045 }, { "epoch": 2.32, "grad_norm": 4.5509977864925, "learning_rate": 1.4684087802336639e-06, "loss": 0.6647, "step": 23050 }, { "epoch": 2.32, "grad_norm": 5.120281916761617, "learning_rate": 1.466333135092929e-06, "loss": 0.6543, "step": 23055 }, { "epoch": 2.32, "grad_norm": 4.583061331252208, "learning_rate": 1.4642587058835883e-06, "loss": 0.6635, "step": 23060 }, { "epoch": 2.33, "grad_norm": 7.757596000491188, "learning_rate": 1.4621854933194491e-06, "loss": 0.6824, "step": 23065 }, { "epoch": 2.33, "grad_norm": 4.383801982904171, "learning_rate": 1.4601134981139054e-06, "loss": 0.6578, "step": 23070 }, { "epoch": 2.33, "grad_norm": 5.3470348884541306, "learning_rate": 1.4580427209799231e-06, "loss": 0.6928, "step": 23075 }, { "epoch": 2.33, "grad_norm": 5.149969443706589, "learning_rate": 1.4559731626300589e-06, "loss": 0.6617, "step": 23080 }, { "epoch": 2.33, "grad_norm": 5.284858096905827, "learning_rate": 1.4539048237764431e-06, "loss": 0.6275, "step": 23085 }, { "epoch": 2.33, "grad_norm": 4.693178152027507, "learning_rate": 1.451837705130792e-06, "loss": 0.6713, "step": 23090 }, { "epoch": 2.33, "grad_norm": 5.203824484556633, "learning_rate": 1.449771807404396e-06, "loss": 0.6607, "step": 23095 }, { "epoch": 2.33, "grad_norm": 5.086803924453811, "learning_rate": 1.447707131308133e-06, "loss": 0.7089, "step": 23100 }, { "epoch": 2.33, "grad_norm": 5.049690406164953, "learning_rate": 1.4456436775524507e-06, "loss": 0.7033, "step": 23105 }, { "epoch": 2.33, "grad_norm": 4.761020015525577, "learning_rate": 1.4435814468473858e-06, "loss": 0.6751, "step": 23110 }, { "epoch": 2.33, "grad_norm": 9.406705510139943, "learning_rate": 1.4415204399025462e-06, "loss": 0.631, "step": 23115 }, { "epoch": 2.33, "grad_norm": 5.127818797007829, "learning_rate": 1.4394606574271264e-06, "loss": 0.6809, "step": 23120 }, { "epoch": 2.33, "grad_norm": 7.045815118104132, "learning_rate": 1.437402100129892e-06, "loss": 0.6879, "step": 23125 }, { "epoch": 2.33, "grad_norm": 5.939354889411304, "learning_rate": 1.4353447687191958e-06, "loss": 0.6896, "step": 23130 }, { "epoch": 2.33, "grad_norm": 5.292790763207718, "learning_rate": 1.4332886639029564e-06, "loss": 0.7029, "step": 23135 }, { "epoch": 2.33, "grad_norm": 5.154024926520374, "learning_rate": 1.4312337863886832e-06, "loss": 0.6413, "step": 23140 }, { "epoch": 2.33, "grad_norm": 4.647242944147119, "learning_rate": 1.4291801368834534e-06, "loss": 0.6296, "step": 23145 }, { "epoch": 2.33, "grad_norm": 6.41781643535116, "learning_rate": 1.42712771609393e-06, "loss": 0.6493, "step": 23150 }, { "epoch": 2.33, "grad_norm": 6.03442067665093, "learning_rate": 1.4250765247263455e-06, "loss": 0.6604, "step": 23155 }, { "epoch": 2.34, "grad_norm": 4.550340133803211, "learning_rate": 1.423026563486517e-06, "loss": 0.6379, "step": 23160 }, { "epoch": 2.34, "grad_norm": 5.03642193203217, "learning_rate": 1.420977833079829e-06, "loss": 0.6574, "step": 23165 }, { "epoch": 2.34, "grad_norm": 5.5802584153495705, "learning_rate": 1.4189303342112515e-06, "loss": 0.6574, "step": 23170 }, { "epoch": 2.34, "grad_norm": 4.571903557702888, "learning_rate": 1.416884067585324e-06, "loss": 0.6682, "step": 23175 }, { "epoch": 2.34, "grad_norm": 4.723195708364407, "learning_rate": 1.4148390339061686e-06, "loss": 0.6845, "step": 23180 }, { "epoch": 2.34, "grad_norm": 5.344770915417063, "learning_rate": 1.412795233877477e-06, "loss": 0.6471, "step": 23185 }, { "epoch": 2.34, "grad_norm": 5.694346065527504, "learning_rate": 1.4107526682025196e-06, "loss": 0.6362, "step": 23190 }, { "epoch": 2.34, "grad_norm": 4.6656465393345625, "learning_rate": 1.4087113375841405e-06, "loss": 0.626, "step": 23195 }, { "epoch": 2.34, "grad_norm": 7.125473741431489, "learning_rate": 1.4066712427247585e-06, "loss": 0.65, "step": 23200 }, { "epoch": 2.34, "grad_norm": 5.056846590080103, "learning_rate": 1.404632384326372e-06, "loss": 0.6943, "step": 23205 }, { "epoch": 2.34, "grad_norm": 4.925503705896461, "learning_rate": 1.4025947630905462e-06, "loss": 0.6557, "step": 23210 }, { "epoch": 2.34, "grad_norm": 6.065099671118211, "learning_rate": 1.400558379718428e-06, "loss": 0.7129, "step": 23215 }, { "epoch": 2.34, "grad_norm": 5.1811074220151765, "learning_rate": 1.3985232349107337e-06, "loss": 0.6796, "step": 23220 }, { "epoch": 2.34, "grad_norm": 4.636562868475947, "learning_rate": 1.3964893293677544e-06, "loss": 0.6606, "step": 23225 }, { "epoch": 2.34, "grad_norm": 5.413028304997516, "learning_rate": 1.3944566637893532e-06, "loss": 0.667, "step": 23230 }, { "epoch": 2.34, "grad_norm": 5.872599984362106, "learning_rate": 1.392425238874972e-06, "loss": 0.6637, "step": 23235 }, { "epoch": 2.34, "grad_norm": 6.7251617213033485, "learning_rate": 1.3903950553236195e-06, "loss": 0.6672, "step": 23240 }, { "epoch": 2.34, "grad_norm": 6.3003296095887995, "learning_rate": 1.3883661138338816e-06, "loss": 0.6466, "step": 23245 }, { "epoch": 2.34, "grad_norm": 5.494566292087462, "learning_rate": 1.386338415103915e-06, "loss": 0.6855, "step": 23250 }, { "epoch": 2.34, "grad_norm": 5.15293869738178, "learning_rate": 1.384311959831448e-06, "loss": 0.6595, "step": 23255 }, { "epoch": 2.35, "grad_norm": 4.260877143165857, "learning_rate": 1.3822867487137808e-06, "loss": 0.6825, "step": 23260 }, { "epoch": 2.35, "grad_norm": 4.584211338372017, "learning_rate": 1.38026278244779e-06, "loss": 0.6439, "step": 23265 }, { "epoch": 2.35, "grad_norm": 7.742745689669459, "learning_rate": 1.3782400617299164e-06, "loss": 0.6723, "step": 23270 }, { "epoch": 2.35, "grad_norm": 4.726444616139492, "learning_rate": 1.37621858725618e-06, "loss": 0.6351, "step": 23275 }, { "epoch": 2.35, "grad_norm": 4.850540870101814, "learning_rate": 1.374198359722167e-06, "loss": 0.6377, "step": 23280 }, { "epoch": 2.35, "grad_norm": 4.95117451315653, "learning_rate": 1.3721793798230348e-06, "loss": 0.6468, "step": 23285 }, { "epoch": 2.35, "grad_norm": 5.481108016903852, "learning_rate": 1.370161648253512e-06, "loss": 0.6457, "step": 23290 }, { "epoch": 2.35, "grad_norm": 4.867437599763508, "learning_rate": 1.3681451657079003e-06, "loss": 0.7072, "step": 23295 }, { "epoch": 2.35, "grad_norm": 5.348183075500717, "learning_rate": 1.3661299328800671e-06, "loss": 0.6474, "step": 23300 }, { "epoch": 2.35, "grad_norm": 4.566750840451449, "learning_rate": 1.3641159504634543e-06, "loss": 0.6517, "step": 23305 }, { "epoch": 2.35, "grad_norm": 4.349563812986563, "learning_rate": 1.3621032191510708e-06, "loss": 0.6582, "step": 23310 }, { "epoch": 2.35, "grad_norm": 5.230110035485306, "learning_rate": 1.3600917396354946e-06, "loss": 0.6464, "step": 23315 }, { "epoch": 2.35, "grad_norm": 10.85878257847084, "learning_rate": 1.3580815126088742e-06, "loss": 0.6586, "step": 23320 }, { "epoch": 2.35, "grad_norm": 11.345914802277472, "learning_rate": 1.356072538762926e-06, "loss": 0.643, "step": 23325 }, { "epoch": 2.35, "grad_norm": 4.8404459088455924, "learning_rate": 1.3540648187889377e-06, "loss": 0.6631, "step": 23330 }, { "epoch": 2.35, "grad_norm": 4.5794802817734865, "learning_rate": 1.352058353377762e-06, "loss": 0.6473, "step": 23335 }, { "epoch": 2.35, "grad_norm": 5.9389158480385, "learning_rate": 1.3500531432198239e-06, "loss": 0.6806, "step": 23340 }, { "epoch": 2.35, "grad_norm": 6.3870434258097735, "learning_rate": 1.3480491890051128e-06, "loss": 0.6266, "step": 23345 }, { "epoch": 2.35, "grad_norm": 4.826035233529999, "learning_rate": 1.3460464914231886e-06, "loss": 0.6739, "step": 23350 }, { "epoch": 2.35, "grad_norm": 4.647906550361867, "learning_rate": 1.3440450511631748e-06, "loss": 0.669, "step": 23355 }, { "epoch": 2.36, "grad_norm": 4.637883983239464, "learning_rate": 1.3420448689137683e-06, "loss": 0.6564, "step": 23360 }, { "epoch": 2.36, "grad_norm": 5.5577202461210335, "learning_rate": 1.3400459453632274e-06, "loss": 0.6824, "step": 23365 }, { "epoch": 2.36, "grad_norm": 4.9500063154271485, "learning_rate": 1.3380482811993827e-06, "loss": 0.6521, "step": 23370 }, { "epoch": 2.36, "grad_norm": 5.150985099315612, "learning_rate": 1.3360518771096264e-06, "loss": 0.6485, "step": 23375 }, { "epoch": 2.36, "grad_norm": 5.006946347892576, "learning_rate": 1.3340567337809203e-06, "loss": 0.6921, "step": 23380 }, { "epoch": 2.36, "grad_norm": 5.375026650050526, "learning_rate": 1.332062851899789e-06, "loss": 0.6219, "step": 23385 }, { "epoch": 2.36, "grad_norm": 4.841278654003525, "learning_rate": 1.3300702321523285e-06, "loss": 0.6747, "step": 23390 }, { "epoch": 2.36, "grad_norm": 4.856888018203015, "learning_rate": 1.3280788752241945e-06, "loss": 0.6781, "step": 23395 }, { "epoch": 2.36, "grad_norm": 5.087734511284156, "learning_rate": 1.326088781800614e-06, "loss": 0.7146, "step": 23400 }, { "epoch": 2.36, "grad_norm": 4.674249998453914, "learning_rate": 1.3240999525663745e-06, "loss": 0.6532, "step": 23405 }, { "epoch": 2.36, "grad_norm": 7.303622485507553, "learning_rate": 1.3221123882058308e-06, "loss": 0.6753, "step": 23410 }, { "epoch": 2.36, "grad_norm": 6.834287356147514, "learning_rate": 1.3201260894028994e-06, "loss": 0.6871, "step": 23415 }, { "epoch": 2.36, "grad_norm": 4.456355924902777, "learning_rate": 1.318141056841068e-06, "loss": 0.6214, "step": 23420 }, { "epoch": 2.36, "grad_norm": 5.193507840480104, "learning_rate": 1.3161572912033804e-06, "loss": 0.6692, "step": 23425 }, { "epoch": 2.36, "grad_norm": 5.216069177319419, "learning_rate": 1.3141747931724525e-06, "loss": 0.6571, "step": 23430 }, { "epoch": 2.36, "grad_norm": 5.731333771254167, "learning_rate": 1.3121935634304579e-06, "loss": 0.6906, "step": 23435 }, { "epoch": 2.36, "grad_norm": 4.787135408765722, "learning_rate": 1.310213602659135e-06, "loss": 0.6366, "step": 23440 }, { "epoch": 2.36, "grad_norm": 5.078790304953358, "learning_rate": 1.308234911539788e-06, "loss": 0.6694, "step": 23445 }, { "epoch": 2.36, "grad_norm": 4.6198193220233525, "learning_rate": 1.3062574907532804e-06, "loss": 0.6594, "step": 23450 }, { "epoch": 2.36, "grad_norm": 5.8832634578953735, "learning_rate": 1.3042813409800436e-06, "loss": 0.6642, "step": 23455 }, { "epoch": 2.37, "grad_norm": 4.567256783115041, "learning_rate": 1.3023064629000658e-06, "loss": 0.6852, "step": 23460 }, { "epoch": 2.37, "grad_norm": 6.4284491630095015, "learning_rate": 1.3003328571929047e-06, "loss": 0.6544, "step": 23465 }, { "epoch": 2.37, "grad_norm": 4.960176754434972, "learning_rate": 1.2983605245376729e-06, "loss": 0.6995, "step": 23470 }, { "epoch": 2.37, "grad_norm": 5.14495847600321, "learning_rate": 1.2963894656130498e-06, "loss": 0.6466, "step": 23475 }, { "epoch": 2.37, "grad_norm": 5.411771484130297, "learning_rate": 1.2944196810972726e-06, "loss": 0.6451, "step": 23480 }, { "epoch": 2.37, "grad_norm": 6.403061557609689, "learning_rate": 1.2924511716681454e-06, "loss": 0.6707, "step": 23485 }, { "epoch": 2.37, "grad_norm": 9.665497217526969, "learning_rate": 1.290483938003027e-06, "loss": 0.6474, "step": 23490 }, { "epoch": 2.37, "grad_norm": 6.353025034488214, "learning_rate": 1.2885179807788445e-06, "loss": 0.6332, "step": 23495 }, { "epoch": 2.37, "grad_norm": 4.23222585755923, "learning_rate": 1.2865533006720799e-06, "loss": 0.6703, "step": 23500 }, { "epoch": 2.37, "grad_norm": 10.189597357989905, "learning_rate": 1.284589898358778e-06, "loss": 0.6521, "step": 23505 }, { "epoch": 2.37, "grad_norm": 5.614627502483765, "learning_rate": 1.2826277745145416e-06, "loss": 0.6625, "step": 23510 }, { "epoch": 2.37, "grad_norm": 6.686704893298934, "learning_rate": 1.2806669298145391e-06, "loss": 0.6506, "step": 23515 }, { "epoch": 2.37, "grad_norm": 4.7921094655898955, "learning_rate": 1.2787073649334919e-06, "loss": 0.679, "step": 23520 }, { "epoch": 2.37, "grad_norm": 6.666328142877876, "learning_rate": 1.2767490805456878e-06, "loss": 0.6433, "step": 23525 }, { "epoch": 2.37, "grad_norm": 5.584125934385775, "learning_rate": 1.2747920773249689e-06, "loss": 0.6884, "step": 23530 }, { "epoch": 2.37, "grad_norm": 7.078045665988331, "learning_rate": 1.2728363559447388e-06, "loss": 0.6599, "step": 23535 }, { "epoch": 2.37, "grad_norm": 4.828946933390636, "learning_rate": 1.2708819170779568e-06, "loss": 0.6351, "step": 23540 }, { "epoch": 2.37, "grad_norm": 4.93210162599946, "learning_rate": 1.2689287613971467e-06, "loss": 0.6515, "step": 23545 }, { "epoch": 2.37, "grad_norm": 6.907369932396423, "learning_rate": 1.266976889574385e-06, "loss": 0.6519, "step": 23550 }, { "epoch": 2.37, "grad_norm": 4.496370017842469, "learning_rate": 1.265026302281312e-06, "loss": 0.6496, "step": 23555 }, { "epoch": 2.38, "grad_norm": 4.370476330349814, "learning_rate": 1.2630770001891207e-06, "loss": 0.6559, "step": 23560 }, { "epoch": 2.38, "grad_norm": 4.738527745570117, "learning_rate": 1.2611289839685643e-06, "loss": 0.652, "step": 23565 }, { "epoch": 2.38, "grad_norm": 4.5919750707047156, "learning_rate": 1.2591822542899535e-06, "loss": 0.6782, "step": 23570 }, { "epoch": 2.38, "grad_norm": 6.649660615172721, "learning_rate": 1.257236811823155e-06, "loss": 0.6538, "step": 23575 }, { "epoch": 2.38, "grad_norm": 4.520625159182393, "learning_rate": 1.2552926572375957e-06, "loss": 0.6689, "step": 23580 }, { "epoch": 2.38, "grad_norm": 6.9033900446678516, "learning_rate": 1.2533497912022558e-06, "loss": 0.6437, "step": 23585 }, { "epoch": 2.38, "grad_norm": 9.766767179662107, "learning_rate": 1.2514082143856748e-06, "loss": 0.6579, "step": 23590 }, { "epoch": 2.38, "grad_norm": 6.433224776492155, "learning_rate": 1.2494679274559473e-06, "loss": 0.6443, "step": 23595 }, { "epoch": 2.38, "grad_norm": 9.979552939552086, "learning_rate": 1.2475289310807243e-06, "loss": 0.6721, "step": 23600 }, { "epoch": 2.38, "grad_norm": 15.67763557490779, "learning_rate": 1.2455912259272101e-06, "loss": 0.6534, "step": 23605 }, { "epoch": 2.38, "grad_norm": 6.332743212638864, "learning_rate": 1.2436548126621706e-06, "loss": 0.6616, "step": 23610 }, { "epoch": 2.38, "grad_norm": 7.280536435536539, "learning_rate": 1.241719691951921e-06, "loss": 0.6559, "step": 23615 }, { "epoch": 2.38, "grad_norm": 5.5144024446355715, "learning_rate": 1.2397858644623372e-06, "loss": 0.6806, "step": 23620 }, { "epoch": 2.38, "grad_norm": 6.825569952036882, "learning_rate": 1.2378533308588465e-06, "loss": 0.6852, "step": 23625 }, { "epoch": 2.38, "grad_norm": 5.720916702390011, "learning_rate": 1.2359220918064307e-06, "loss": 0.642, "step": 23630 }, { "epoch": 2.38, "grad_norm": 4.798257914773307, "learning_rate": 1.2339921479696272e-06, "loss": 0.6712, "step": 23635 }, { "epoch": 2.38, "grad_norm": 5.585871975942351, "learning_rate": 1.2320635000125297e-06, "loss": 0.6744, "step": 23640 }, { "epoch": 2.38, "grad_norm": 6.093392040206783, "learning_rate": 1.2301361485987817e-06, "loss": 0.6983, "step": 23645 }, { "epoch": 2.38, "grad_norm": 6.9083491018896055, "learning_rate": 1.2282100943915854e-06, "loss": 0.7137, "step": 23650 }, { "epoch": 2.38, "grad_norm": 5.209742991514805, "learning_rate": 1.2262853380536938e-06, "loss": 0.66, "step": 23655 }, { "epoch": 2.39, "grad_norm": 4.855657115362898, "learning_rate": 1.2243618802474127e-06, "loss": 0.6402, "step": 23660 }, { "epoch": 2.39, "grad_norm": 5.611373859685703, "learning_rate": 1.2224397216346012e-06, "loss": 0.6706, "step": 23665 }, { "epoch": 2.39, "grad_norm": 4.840925762847756, "learning_rate": 1.2205188628766745e-06, "loss": 0.6681, "step": 23670 }, { "epoch": 2.39, "grad_norm": 5.042033497059596, "learning_rate": 1.2185993046345961e-06, "loss": 0.6468, "step": 23675 }, { "epoch": 2.39, "grad_norm": 4.79871160771968, "learning_rate": 1.2166810475688867e-06, "loss": 0.6741, "step": 23680 }, { "epoch": 2.39, "grad_norm": 5.105323448947047, "learning_rate": 1.214764092339616e-06, "loss": 0.6735, "step": 23685 }, { "epoch": 2.39, "grad_norm": 4.891121972055124, "learning_rate": 1.212848439606405e-06, "loss": 0.6561, "step": 23690 }, { "epoch": 2.39, "grad_norm": 6.4686279997552765, "learning_rate": 1.2109340900284289e-06, "loss": 0.6541, "step": 23695 }, { "epoch": 2.39, "grad_norm": 5.01156044355106, "learning_rate": 1.2090210442644124e-06, "loss": 0.668, "step": 23700 }, { "epoch": 2.39, "grad_norm": 5.618000263976089, "learning_rate": 1.2071093029726343e-06, "loss": 0.6481, "step": 23705 }, { "epoch": 2.39, "grad_norm": 5.163482808177356, "learning_rate": 1.205198866810922e-06, "loss": 0.6409, "step": 23710 }, { "epoch": 2.39, "grad_norm": 4.790103543725066, "learning_rate": 1.2032897364366563e-06, "loss": 0.6461, "step": 23715 }, { "epoch": 2.39, "grad_norm": 5.230244969450162, "learning_rate": 1.2013819125067661e-06, "loss": 0.6942, "step": 23720 }, { "epoch": 2.39, "grad_norm": 4.56863451913176, "learning_rate": 1.199475395677731e-06, "loss": 0.6497, "step": 23725 }, { "epoch": 2.39, "grad_norm": 4.490697737307678, "learning_rate": 1.1975701866055816e-06, "loss": 0.6548, "step": 23730 }, { "epoch": 2.39, "grad_norm": 5.674980329651185, "learning_rate": 1.1956662859459006e-06, "loss": 0.6524, "step": 23735 }, { "epoch": 2.39, "grad_norm": 9.130828897789995, "learning_rate": 1.1937636943538155e-06, "loss": 0.6814, "step": 23740 }, { "epoch": 2.39, "grad_norm": 5.096759628684305, "learning_rate": 1.1918624124840094e-06, "loss": 0.6951, "step": 23745 }, { "epoch": 2.39, "grad_norm": 4.732147767802345, "learning_rate": 1.18996244099071e-06, "loss": 0.6996, "step": 23750 }, { "epoch": 2.4, "grad_norm": 4.852807030794389, "learning_rate": 1.1880637805276956e-06, "loss": 0.6602, "step": 23755 }, { "epoch": 2.4, "grad_norm": 7.124165341584474, "learning_rate": 1.1861664317482918e-06, "loss": 0.6541, "step": 23760 }, { "epoch": 2.4, "grad_norm": 6.067019003192417, "learning_rate": 1.184270395305378e-06, "loss": 0.6576, "step": 23765 }, { "epoch": 2.4, "grad_norm": 6.195958907393889, "learning_rate": 1.182375671851375e-06, "loss": 0.6619, "step": 23770 }, { "epoch": 2.4, "grad_norm": 5.072963402580256, "learning_rate": 1.1804822620382584e-06, "loss": 0.6233, "step": 23775 }, { "epoch": 2.4, "grad_norm": 5.2949230601384825, "learning_rate": 1.1785901665175474e-06, "loss": 0.6862, "step": 23780 }, { "epoch": 2.4, "grad_norm": 5.679220448866176, "learning_rate": 1.1766993859403109e-06, "loss": 0.6397, "step": 23785 }, { "epoch": 2.4, "grad_norm": 6.529126893550042, "learning_rate": 1.1748099209571618e-06, "loss": 0.6602, "step": 23790 }, { "epoch": 2.4, "grad_norm": 7.105973681550202, "learning_rate": 1.1729217722182673e-06, "loss": 0.6719, "step": 23795 }, { "epoch": 2.4, "grad_norm": 7.280664552972269, "learning_rate": 1.171034940373334e-06, "loss": 0.6734, "step": 23800 }, { "epoch": 2.4, "grad_norm": 5.513459079235484, "learning_rate": 1.1691494260716225e-06, "loss": 0.6807, "step": 23805 }, { "epoch": 2.4, "grad_norm": 7.795157957580197, "learning_rate": 1.1672652299619342e-06, "loss": 0.6232, "step": 23810 }, { "epoch": 2.4, "grad_norm": 8.094072639673021, "learning_rate": 1.1653823526926195e-06, "loss": 0.6686, "step": 23815 }, { "epoch": 2.4, "grad_norm": 5.185770027064905, "learning_rate": 1.163500794911575e-06, "loss": 0.6832, "step": 23820 }, { "epoch": 2.4, "grad_norm": 5.688434083755331, "learning_rate": 1.1616205572662415e-06, "loss": 0.6613, "step": 23825 }, { "epoch": 2.4, "grad_norm": 5.044012018706139, "learning_rate": 1.1597416404036095e-06, "loss": 0.7053, "step": 23830 }, { "epoch": 2.4, "grad_norm": 4.448281320498033, "learning_rate": 1.1578640449702095e-06, "loss": 0.6427, "step": 23835 }, { "epoch": 2.4, "grad_norm": 4.846487787867139, "learning_rate": 1.1559877716121232e-06, "loss": 0.6528, "step": 23840 }, { "epoch": 2.4, "grad_norm": 4.610950244909516, "learning_rate": 1.1541128209749735e-06, "loss": 0.6585, "step": 23845 }, { "epoch": 2.4, "grad_norm": 4.877553176280549, "learning_rate": 1.1522391937039284e-06, "loss": 0.6696, "step": 23850 }, { "epoch": 2.41, "grad_norm": 5.477651738532338, "learning_rate": 1.150366890443701e-06, "loss": 0.677, "step": 23855 }, { "epoch": 2.41, "grad_norm": 4.863572635163428, "learning_rate": 1.1484959118385507e-06, "loss": 0.6449, "step": 23860 }, { "epoch": 2.41, "grad_norm": 4.7215699626198155, "learning_rate": 1.1466262585322773e-06, "loss": 0.6645, "step": 23865 }, { "epoch": 2.41, "grad_norm": 8.375682442605772, "learning_rate": 1.1447579311682294e-06, "loss": 0.7003, "step": 23870 }, { "epoch": 2.41, "grad_norm": 5.609454163336808, "learning_rate": 1.1428909303892955e-06, "loss": 0.6503, "step": 23875 }, { "epoch": 2.41, "grad_norm": 4.961901802189119, "learning_rate": 1.1410252568379082e-06, "loss": 0.6505, "step": 23880 }, { "epoch": 2.41, "grad_norm": 5.013798412490743, "learning_rate": 1.1391609111560425e-06, "loss": 0.6742, "step": 23885 }, { "epoch": 2.41, "grad_norm": 5.366997262349579, "learning_rate": 1.137297893985222e-06, "loss": 0.6679, "step": 23890 }, { "epoch": 2.41, "grad_norm": 6.604333797071156, "learning_rate": 1.1354362059665047e-06, "loss": 0.6788, "step": 23895 }, { "epoch": 2.41, "grad_norm": 4.409950786328532, "learning_rate": 1.1335758477404995e-06, "loss": 0.6651, "step": 23900 }, { "epoch": 2.41, "grad_norm": 4.340362687340669, "learning_rate": 1.1317168199473522e-06, "loss": 0.6428, "step": 23905 }, { "epoch": 2.41, "grad_norm": 4.850769904421837, "learning_rate": 1.1298591232267524e-06, "loss": 0.6648, "step": 23910 }, { "epoch": 2.41, "grad_norm": 5.969602368637574, "learning_rate": 1.1280027582179309e-06, "loss": 0.6632, "step": 23915 }, { "epoch": 2.41, "grad_norm": 6.396984680311234, "learning_rate": 1.1261477255596632e-06, "loss": 0.659, "step": 23920 }, { "epoch": 2.41, "grad_norm": 6.951125667931925, "learning_rate": 1.124294025890262e-06, "loss": 0.6385, "step": 23925 }, { "epoch": 2.41, "grad_norm": 5.376467123518012, "learning_rate": 1.1224416598475857e-06, "loss": 0.666, "step": 23930 }, { "epoch": 2.41, "grad_norm": 5.42513092423465, "learning_rate": 1.1205906280690315e-06, "loss": 0.6845, "step": 23935 }, { "epoch": 2.41, "grad_norm": 7.187259221881291, "learning_rate": 1.1187409311915365e-06, "loss": 0.6453, "step": 23940 }, { "epoch": 2.41, "grad_norm": 4.725902531276005, "learning_rate": 1.1168925698515782e-06, "loss": 0.688, "step": 23945 }, { "epoch": 2.41, "grad_norm": 4.6032004663512955, "learning_rate": 1.11504554468518e-06, "loss": 0.6641, "step": 23950 }, { "epoch": 2.42, "grad_norm": 8.473231294951365, "learning_rate": 1.113199856327899e-06, "loss": 0.6495, "step": 23955 }, { "epoch": 2.42, "grad_norm": 5.635391217197278, "learning_rate": 1.1113555054148335e-06, "loss": 0.6375, "step": 23960 }, { "epoch": 2.42, "grad_norm": 4.718512384709406, "learning_rate": 1.1095124925806262e-06, "loss": 0.6804, "step": 23965 }, { "epoch": 2.42, "grad_norm": 4.954687232537294, "learning_rate": 1.1076708184594542e-06, "loss": 0.6424, "step": 23970 }, { "epoch": 2.42, "grad_norm": 5.237282573011134, "learning_rate": 1.1058304836850358e-06, "loss": 0.6636, "step": 23975 }, { "epoch": 2.42, "grad_norm": 4.847006852076549, "learning_rate": 1.1039914888906278e-06, "loss": 0.6557, "step": 23980 }, { "epoch": 2.42, "grad_norm": 5.255883899216501, "learning_rate": 1.1021538347090282e-06, "loss": 0.6363, "step": 23985 }, { "epoch": 2.42, "grad_norm": 4.797507804413398, "learning_rate": 1.1003175217725699e-06, "loss": 0.6141, "step": 23990 }, { "epoch": 2.42, "grad_norm": 5.190261202725136, "learning_rate": 1.098482550713129e-06, "loss": 0.6663, "step": 23995 }, { "epoch": 2.42, "grad_norm": 7.210572111259796, "learning_rate": 1.0966489221621162e-06, "loss": 0.651, "step": 24000 }, { "epoch": 2.42, "grad_norm": 5.273773088540418, "learning_rate": 1.0948166367504814e-06, "loss": 0.6401, "step": 24005 }, { "epoch": 2.42, "grad_norm": 6.767623899763042, "learning_rate": 1.0929856951087104e-06, "loss": 0.6333, "step": 24010 }, { "epoch": 2.42, "grad_norm": 6.412046448881226, "learning_rate": 1.0911560978668312e-06, "loss": 0.6715, "step": 24015 }, { "epoch": 2.42, "grad_norm": 6.078105296172169, "learning_rate": 1.0893278456544043e-06, "loss": 0.6443, "step": 24020 }, { "epoch": 2.42, "grad_norm": 5.674597763088829, "learning_rate": 1.087500939100532e-06, "loss": 0.6508, "step": 24025 }, { "epoch": 2.42, "grad_norm": 5.110649682914834, "learning_rate": 1.0856753788338498e-06, "loss": 0.6628, "step": 24030 }, { "epoch": 2.42, "grad_norm": 4.889614638207886, "learning_rate": 1.0838511654825306e-06, "loss": 0.673, "step": 24035 }, { "epoch": 2.42, "grad_norm": 11.984844405841715, "learning_rate": 1.0820282996742842e-06, "loss": 0.6379, "step": 24040 }, { "epoch": 2.42, "grad_norm": 6.518256321449587, "learning_rate": 1.0802067820363588e-06, "loss": 0.6502, "step": 24045 }, { "epoch": 2.42, "grad_norm": 4.845221841902757, "learning_rate": 1.0783866131955345e-06, "loss": 0.6563, "step": 24050 }, { "epoch": 2.43, "grad_norm": 5.6040059079289595, "learning_rate": 1.0765677937781327e-06, "loss": 0.6574, "step": 24055 }, { "epoch": 2.43, "grad_norm": 6.081550821628119, "learning_rate": 1.0747503244100055e-06, "loss": 0.6582, "step": 24060 }, { "epoch": 2.43, "grad_norm": 6.16529242847246, "learning_rate": 1.0729342057165427e-06, "loss": 0.6523, "step": 24065 }, { "epoch": 2.43, "grad_norm": 5.544136274685921, "learning_rate": 1.0711194383226675e-06, "loss": 0.6813, "step": 24070 }, { "epoch": 2.43, "grad_norm": 5.539291616022181, "learning_rate": 1.0693060228528424e-06, "loss": 0.66, "step": 24075 }, { "epoch": 2.43, "grad_norm": 4.528425012110296, "learning_rate": 1.0674939599310607e-06, "loss": 0.6513, "step": 24080 }, { "epoch": 2.43, "grad_norm": 5.497645977169182, "learning_rate": 1.06568325018085e-06, "loss": 0.6785, "step": 24085 }, { "epoch": 2.43, "grad_norm": 5.452333913167858, "learning_rate": 1.063873894225278e-06, "loss": 0.6291, "step": 24090 }, { "epoch": 2.43, "grad_norm": 7.584321380477787, "learning_rate": 1.062065892686937e-06, "loss": 0.6179, "step": 24095 }, { "epoch": 2.43, "grad_norm": 5.582992398770889, "learning_rate": 1.0602592461879623e-06, "loss": 0.6542, "step": 24100 }, { "epoch": 2.43, "grad_norm": 5.267445725604313, "learning_rate": 1.0584539553500167e-06, "loss": 0.6883, "step": 24105 }, { "epoch": 2.43, "grad_norm": 5.106951719637822, "learning_rate": 1.056650020794302e-06, "loss": 0.6289, "step": 24110 }, { "epoch": 2.43, "grad_norm": 7.748623451954727, "learning_rate": 1.0548474431415473e-06, "loss": 0.6364, "step": 24115 }, { "epoch": 2.43, "grad_norm": 4.443000217909867, "learning_rate": 1.0530462230120209e-06, "loss": 0.6766, "step": 24120 }, { "epoch": 2.43, "grad_norm": 4.48550594637477, "learning_rate": 1.0512463610255191e-06, "loss": 0.6367, "step": 24125 }, { "epoch": 2.43, "grad_norm": 7.411209327002572, "learning_rate": 1.049447857801373e-06, "loss": 0.6673, "step": 24130 }, { "epoch": 2.43, "grad_norm": 24.461037766404242, "learning_rate": 1.0476507139584447e-06, "loss": 0.6432, "step": 24135 }, { "epoch": 2.43, "grad_norm": 5.983197491207409, "learning_rate": 1.0458549301151317e-06, "loss": 0.6388, "step": 24140 }, { "epoch": 2.43, "grad_norm": 4.714272084787362, "learning_rate": 1.04406050688936e-06, "loss": 0.6924, "step": 24145 }, { "epoch": 2.43, "grad_norm": 5.18344029670136, "learning_rate": 1.0422674448985904e-06, "loss": 0.6532, "step": 24150 }, { "epoch": 2.44, "grad_norm": 5.14167289300526, "learning_rate": 1.040475744759813e-06, "loss": 0.6907, "step": 24155 }, { "epoch": 2.44, "grad_norm": 6.647224608466981, "learning_rate": 1.0386854070895496e-06, "loss": 0.6835, "step": 24160 }, { "epoch": 2.44, "grad_norm": 7.015265097882171, "learning_rate": 1.0368964325038533e-06, "loss": 0.6691, "step": 24165 }, { "epoch": 2.44, "grad_norm": 4.5761073878121215, "learning_rate": 1.035108821618311e-06, "loss": 0.635, "step": 24170 }, { "epoch": 2.44, "grad_norm": 5.162102016764382, "learning_rate": 1.0333225750480341e-06, "loss": 0.6435, "step": 24175 }, { "epoch": 2.44, "grad_norm": 6.211445872418304, "learning_rate": 1.0315376934076725e-06, "loss": 0.6869, "step": 24180 }, { "epoch": 2.44, "grad_norm": 6.234258386467219, "learning_rate": 1.0297541773114e-06, "loss": 0.6333, "step": 24185 }, { "epoch": 2.44, "grad_norm": 5.055423005378473, "learning_rate": 1.0279720273729232e-06, "loss": 0.6543, "step": 24190 }, { "epoch": 2.44, "grad_norm": 4.32115384536653, "learning_rate": 1.0261912442054767e-06, "loss": 0.6433, "step": 24195 }, { "epoch": 2.44, "grad_norm": 4.849032712997024, "learning_rate": 1.024411828421829e-06, "loss": 0.66, "step": 24200 }, { "epoch": 2.44, "grad_norm": 5.072989418040996, "learning_rate": 1.022633780634274e-06, "loss": 0.68, "step": 24205 }, { "epoch": 2.44, "grad_norm": 5.483775285184719, "learning_rate": 1.0208571014546347e-06, "loss": 0.6556, "step": 24210 }, { "epoch": 2.44, "grad_norm": 4.745063361912836, "learning_rate": 1.0190817914942696e-06, "loss": 0.6549, "step": 24215 }, { "epoch": 2.44, "grad_norm": 6.906242377889271, "learning_rate": 1.0173078513640543e-06, "loss": 0.6336, "step": 24220 }, { "epoch": 2.44, "grad_norm": 6.337955797386548, "learning_rate": 1.0155352816744058e-06, "loss": 0.6436, "step": 24225 }, { "epoch": 2.44, "grad_norm": 6.599573279630294, "learning_rate": 1.013764083035259e-06, "loss": 0.6662, "step": 24230 }, { "epoch": 2.44, "grad_norm": 4.345380794791357, "learning_rate": 1.0119942560560858e-06, "loss": 0.6526, "step": 24235 }, { "epoch": 2.44, "grad_norm": 4.940158221488668, "learning_rate": 1.0102258013458783e-06, "loss": 0.6329, "step": 24240 }, { "epoch": 2.44, "grad_norm": 5.484670311151196, "learning_rate": 1.0084587195131652e-06, "loss": 0.6431, "step": 24245 }, { "epoch": 2.44, "grad_norm": 4.733477820239863, "learning_rate": 1.0066930111659911e-06, "loss": 0.66, "step": 24250 }, { "epoch": 2.45, "grad_norm": 6.399131133773677, "learning_rate": 1.0049286769119393e-06, "loss": 0.665, "step": 24255 }, { "epoch": 2.45, "grad_norm": 5.436299381570852, "learning_rate": 1.0031657173581126e-06, "loss": 0.6253, "step": 24260 }, { "epoch": 2.45, "grad_norm": 5.653731055188008, "learning_rate": 1.0014041331111467e-06, "loss": 0.6347, "step": 24265 }, { "epoch": 2.45, "grad_norm": 4.907820309286419, "learning_rate": 9.996439247771983e-07, "loss": 0.6569, "step": 24270 }, { "epoch": 2.45, "grad_norm": 4.544214480872975, "learning_rate": 9.97885092961957e-07, "loss": 0.6536, "step": 24275 }, { "epoch": 2.45, "grad_norm": 5.587307184447402, "learning_rate": 9.961276382706297e-07, "loss": 0.669, "step": 24280 }, { "epoch": 2.45, "grad_norm": 5.151800180599868, "learning_rate": 9.943715613079602e-07, "loss": 0.6471, "step": 24285 }, { "epoch": 2.45, "grad_norm": 4.647118728799816, "learning_rate": 9.926168626782085e-07, "loss": 0.6501, "step": 24290 }, { "epoch": 2.45, "grad_norm": 6.864846064764639, "learning_rate": 9.908635429851688e-07, "loss": 0.6601, "step": 24295 }, { "epoch": 2.45, "grad_norm": 4.941681400199244, "learning_rate": 9.891116028321528e-07, "loss": 0.657, "step": 24300 }, { "epoch": 2.45, "grad_norm": 7.459932839259485, "learning_rate": 9.873610428220065e-07, "loss": 0.6518, "step": 24305 }, { "epoch": 2.45, "grad_norm": 10.689099853989275, "learning_rate": 9.856118635570905e-07, "loss": 0.675, "step": 24310 }, { "epoch": 2.45, "grad_norm": 8.415278394684226, "learning_rate": 9.83864065639299e-07, "loss": 0.6749, "step": 24315 }, { "epoch": 2.45, "grad_norm": 8.166749219171328, "learning_rate": 9.821176496700453e-07, "loss": 0.6639, "step": 24320 }, { "epoch": 2.45, "grad_norm": 4.802799710730936, "learning_rate": 9.803726162502726e-07, "loss": 0.6423, "step": 24325 }, { "epoch": 2.45, "grad_norm": 4.814502203551193, "learning_rate": 9.786289659804415e-07, "loss": 0.6593, "step": 24330 }, { "epoch": 2.45, "grad_norm": 7.235234383606917, "learning_rate": 9.768866994605452e-07, "loss": 0.6661, "step": 24335 }, { "epoch": 2.45, "grad_norm": 5.895525185606499, "learning_rate": 9.75145817290092e-07, "loss": 0.6807, "step": 24340 }, { "epoch": 2.45, "grad_norm": 6.272616856347768, "learning_rate": 9.73406320068117e-07, "loss": 0.6332, "step": 24345 }, { "epoch": 2.46, "grad_norm": 7.1266478704623015, "learning_rate": 9.716682083931834e-07, "loss": 0.6698, "step": 24350 }, { "epoch": 2.46, "grad_norm": 4.686239580595646, "learning_rate": 9.699314828633704e-07, "loss": 0.6389, "step": 24355 }, { "epoch": 2.46, "grad_norm": 6.269954961352537, "learning_rate": 9.68196144076286e-07, "loss": 0.6258, "step": 24360 }, { "epoch": 2.46, "grad_norm": 4.586232695798104, "learning_rate": 9.66462192629058e-07, "loss": 0.6671, "step": 24365 }, { "epoch": 2.46, "grad_norm": 4.715433525036328, "learning_rate": 9.647296291183367e-07, "loss": 0.6313, "step": 24370 }, { "epoch": 2.46, "grad_norm": 4.60639124847477, "learning_rate": 9.629984541402947e-07, "loss": 0.6235, "step": 24375 }, { "epoch": 2.46, "grad_norm": 4.911709687109333, "learning_rate": 9.612686682906302e-07, "loss": 0.6797, "step": 24380 }, { "epoch": 2.46, "grad_norm": 4.824313647744851, "learning_rate": 9.59540272164558e-07, "loss": 0.6612, "step": 24385 }, { "epoch": 2.46, "grad_norm": 4.787688302758495, "learning_rate": 9.578132663568207e-07, "loss": 0.6451, "step": 24390 }, { "epoch": 2.46, "grad_norm": 4.836387498664002, "learning_rate": 9.560876514616775e-07, "loss": 0.6509, "step": 24395 }, { "epoch": 2.46, "grad_norm": 4.868549786245656, "learning_rate": 9.54363428072911e-07, "loss": 0.6472, "step": 24400 }, { "epoch": 2.46, "grad_norm": 6.530863320667234, "learning_rate": 9.526405967838237e-07, "loss": 0.6555, "step": 24405 }, { "epoch": 2.46, "grad_norm": 4.391324501270228, "learning_rate": 9.509191581872424e-07, "loss": 0.6357, "step": 24410 }, { "epoch": 2.46, "grad_norm": 4.6127302095667, "learning_rate": 9.491991128755107e-07, "loss": 0.6515, "step": 24415 }, { "epoch": 2.46, "grad_norm": 4.712671864213921, "learning_rate": 9.474804614404959e-07, "loss": 0.6574, "step": 24420 }, { "epoch": 2.46, "grad_norm": 4.847014489297407, "learning_rate": 9.457632044735832e-07, "loss": 0.6373, "step": 24425 }, { "epoch": 2.46, "grad_norm": 6.670228672865111, "learning_rate": 9.440473425656815e-07, "loss": 0.6506, "step": 24430 }, { "epoch": 2.46, "grad_norm": 4.431668423806074, "learning_rate": 9.423328763072131e-07, "loss": 0.6459, "step": 24435 }, { "epoch": 2.46, "grad_norm": 5.034693170576436, "learning_rate": 9.406198062881278e-07, "loss": 0.6449, "step": 24440 }, { "epoch": 2.46, "grad_norm": 4.678252267266481, "learning_rate": 9.389081330978883e-07, "loss": 0.6589, "step": 24445 }, { "epoch": 2.47, "grad_norm": 6.156093510328251, "learning_rate": 9.371978573254831e-07, "loss": 0.6928, "step": 24450 }, { "epoch": 2.47, "grad_norm": 8.083625790363284, "learning_rate": 9.354889795594135e-07, "loss": 0.6822, "step": 24455 }, { "epoch": 2.47, "grad_norm": 5.270059897769438, "learning_rate": 9.33781500387706e-07, "loss": 0.6475, "step": 24460 }, { "epoch": 2.47, "grad_norm": 5.445776137690954, "learning_rate": 9.320754203978999e-07, "loss": 0.6577, "step": 24465 }, { "epoch": 2.47, "grad_norm": 4.724132080897397, "learning_rate": 9.303707401770545e-07, "loss": 0.6127, "step": 24470 }, { "epoch": 2.47, "grad_norm": 4.683063811483321, "learning_rate": 9.286674603117524e-07, "loss": 0.6603, "step": 24475 }, { "epoch": 2.47, "grad_norm": 4.257766289785195, "learning_rate": 9.269655813880868e-07, "loss": 0.6006, "step": 24480 }, { "epoch": 2.47, "grad_norm": 5.429484324997625, "learning_rate": 9.25265103991676e-07, "loss": 0.6603, "step": 24485 }, { "epoch": 2.47, "grad_norm": 4.8748023788537385, "learning_rate": 9.235660287076509e-07, "loss": 0.6937, "step": 24490 }, { "epoch": 2.47, "grad_norm": 4.573077935697372, "learning_rate": 9.218683561206621e-07, "loss": 0.6657, "step": 24495 }, { "epoch": 2.47, "grad_norm": 5.0768056663720245, "learning_rate": 9.201720868148756e-07, "loss": 0.6426, "step": 24500 }, { "epoch": 2.47, "grad_norm": 5.132452067770202, "learning_rate": 9.184772213739784e-07, "loss": 0.6173, "step": 24505 }, { "epoch": 2.47, "grad_norm": 5.8371733786325946, "learning_rate": 9.167837603811702e-07, "loss": 0.6421, "step": 24510 }, { "epoch": 2.47, "grad_norm": 11.925534485149385, "learning_rate": 9.15091704419171e-07, "loss": 0.6504, "step": 24515 }, { "epoch": 2.47, "grad_norm": 7.885909463985392, "learning_rate": 9.134010540702148e-07, "loss": 0.6602, "step": 24520 }, { "epoch": 2.47, "grad_norm": 6.120338319418244, "learning_rate": 9.117118099160526e-07, "loss": 0.6821, "step": 24525 }, { "epoch": 2.47, "grad_norm": 6.798152916059508, "learning_rate": 9.100239725379512e-07, "loss": 0.6748, "step": 24530 }, { "epoch": 2.47, "grad_norm": 4.905722956211278, "learning_rate": 9.083375425166952e-07, "loss": 0.6423, "step": 24535 }, { "epoch": 2.47, "grad_norm": 6.674599790896468, "learning_rate": 9.066525204325816e-07, "loss": 0.6314, "step": 24540 }, { "epoch": 2.47, "grad_norm": 4.787652908868984, "learning_rate": 9.049689068654277e-07, "loss": 0.6461, "step": 24545 }, { "epoch": 2.48, "grad_norm": 4.648534073284891, "learning_rate": 9.032867023945613e-07, "loss": 0.6802, "step": 24550 }, { "epoch": 2.48, "grad_norm": 4.963734604380744, "learning_rate": 9.016059075988282e-07, "loss": 0.6637, "step": 24555 }, { "epoch": 2.48, "grad_norm": 4.815717399319649, "learning_rate": 8.999265230565863e-07, "loss": 0.666, "step": 24560 }, { "epoch": 2.48, "grad_norm": 4.534025588775262, "learning_rate": 8.982485493457133e-07, "loss": 0.6598, "step": 24565 }, { "epoch": 2.48, "grad_norm": 5.243752686300016, "learning_rate": 8.965719870435951e-07, "loss": 0.679, "step": 24570 }, { "epoch": 2.48, "grad_norm": 5.169525277173541, "learning_rate": 8.948968367271388e-07, "loss": 0.6735, "step": 24575 }, { "epoch": 2.48, "grad_norm": 4.372567152329839, "learning_rate": 8.932230989727598e-07, "loss": 0.6407, "step": 24580 }, { "epoch": 2.48, "grad_norm": 4.503524583394472, "learning_rate": 8.915507743563901e-07, "loss": 0.6268, "step": 24585 }, { "epoch": 2.48, "grad_norm": 4.582464553338467, "learning_rate": 8.89879863453475e-07, "loss": 0.6439, "step": 24590 }, { "epoch": 2.48, "grad_norm": 4.4729040322625515, "learning_rate": 8.882103668389719e-07, "loss": 0.6668, "step": 24595 }, { "epoch": 2.48, "grad_norm": 6.280305513971691, "learning_rate": 8.865422850873556e-07, "loss": 0.6826, "step": 24600 }, { "epoch": 2.48, "grad_norm": 4.931605865332295, "learning_rate": 8.848756187726093e-07, "loss": 0.6705, "step": 24605 }, { "epoch": 2.48, "grad_norm": 5.2975829385507325, "learning_rate": 8.83210368468233e-07, "loss": 0.6587, "step": 24610 }, { "epoch": 2.48, "grad_norm": 6.032825354671978, "learning_rate": 8.815465347472374e-07, "loss": 0.657, "step": 24615 }, { "epoch": 2.48, "grad_norm": 4.7918494728805, "learning_rate": 8.798841181821449e-07, "loss": 0.6433, "step": 24620 }, { "epoch": 2.48, "grad_norm": 4.6352119324579295, "learning_rate": 8.782231193449914e-07, "loss": 0.6052, "step": 24625 }, { "epoch": 2.48, "grad_norm": 4.468768095821291, "learning_rate": 8.765635388073274e-07, "loss": 0.6506, "step": 24630 }, { "epoch": 2.48, "grad_norm": 6.1714584235928, "learning_rate": 8.749053771402094e-07, "loss": 0.6813, "step": 24635 }, { "epoch": 2.48, "grad_norm": 5.405067333949699, "learning_rate": 8.732486349142127e-07, "loss": 0.6597, "step": 24640 }, { "epoch": 2.48, "grad_norm": 5.641166645937143, "learning_rate": 8.715933126994192e-07, "loss": 0.6686, "step": 24645 }, { "epoch": 2.49, "grad_norm": 6.540422303816012, "learning_rate": 8.699394110654241e-07, "loss": 0.6277, "step": 24650 }, { "epoch": 2.49, "grad_norm": 8.926265663207731, "learning_rate": 8.682869305813312e-07, "loss": 0.6619, "step": 24655 }, { "epoch": 2.49, "grad_norm": 4.522604459042804, "learning_rate": 8.666358718157608e-07, "loss": 0.6753, "step": 24660 }, { "epoch": 2.49, "grad_norm": 4.8097535901666495, "learning_rate": 8.649862353368377e-07, "loss": 0.6776, "step": 24665 }, { "epoch": 2.49, "grad_norm": 5.158174973798557, "learning_rate": 8.633380217122028e-07, "loss": 0.6765, "step": 24670 }, { "epoch": 2.49, "grad_norm": 4.610447525325233, "learning_rate": 8.616912315090037e-07, "loss": 0.6734, "step": 24675 }, { "epoch": 2.49, "grad_norm": 4.612845668220532, "learning_rate": 8.600458652938997e-07, "loss": 0.6521, "step": 24680 }, { "epoch": 2.49, "grad_norm": 4.578334838267179, "learning_rate": 8.584019236330576e-07, "loss": 0.6442, "step": 24685 }, { "epoch": 2.49, "grad_norm": 5.757414987867542, "learning_rate": 8.567594070921592e-07, "loss": 0.6377, "step": 24690 }, { "epoch": 2.49, "grad_norm": 5.545040668554131, "learning_rate": 8.551183162363907e-07, "loss": 0.6203, "step": 24695 }, { "epoch": 2.49, "grad_norm": 5.18825691136152, "learning_rate": 8.534786516304516e-07, "loss": 0.6589, "step": 24700 }, { "epoch": 2.49, "grad_norm": 4.63240360899482, "learning_rate": 8.518404138385483e-07, "loss": 0.6737, "step": 24705 }, { "epoch": 2.49, "grad_norm": 5.375068745954727, "learning_rate": 8.502036034243965e-07, "loss": 0.6233, "step": 24710 }, { "epoch": 2.49, "grad_norm": 4.711918972811405, "learning_rate": 8.485682209512203e-07, "loss": 0.6563, "step": 24715 }, { "epoch": 2.49, "grad_norm": 5.740392950733569, "learning_rate": 8.469342669817548e-07, "loss": 0.6779, "step": 24720 }, { "epoch": 2.49, "grad_norm": 4.782537893732716, "learning_rate": 8.453017420782422e-07, "loss": 0.6634, "step": 24725 }, { "epoch": 2.49, "grad_norm": 5.0966574448446185, "learning_rate": 8.436706468024303e-07, "loss": 0.67, "step": 24730 }, { "epoch": 2.49, "grad_norm": 4.884923814880293, "learning_rate": 8.420409817155806e-07, "loss": 0.6687, "step": 24735 }, { "epoch": 2.49, "grad_norm": 4.659544018108539, "learning_rate": 8.404127473784574e-07, "loss": 0.6462, "step": 24740 }, { "epoch": 2.49, "grad_norm": 4.709118306156166, "learning_rate": 8.387859443513353e-07, "loss": 0.679, "step": 24745 }, { "epoch": 2.5, "grad_norm": 4.928574888944737, "learning_rate": 8.371605731939935e-07, "loss": 0.6381, "step": 24750 }, { "epoch": 2.5, "grad_norm": 5.063956339138812, "learning_rate": 8.355366344657245e-07, "loss": 0.6671, "step": 24755 }, { "epoch": 2.5, "grad_norm": 4.831064742072634, "learning_rate": 8.339141287253205e-07, "loss": 0.6376, "step": 24760 }, { "epoch": 2.5, "grad_norm": 4.496507983904041, "learning_rate": 8.322930565310867e-07, "loss": 0.6523, "step": 24765 }, { "epoch": 2.5, "grad_norm": 5.897423559897414, "learning_rate": 8.306734184408316e-07, "loss": 0.654, "step": 24770 }, { "epoch": 2.5, "grad_norm": 4.895179239345184, "learning_rate": 8.290552150118714e-07, "loss": 0.6587, "step": 24775 }, { "epoch": 2.5, "grad_norm": 5.01025021028483, "learning_rate": 8.274384468010266e-07, "loss": 0.6599, "step": 24780 }, { "epoch": 2.5, "grad_norm": 5.621504495377497, "learning_rate": 8.258231143646284e-07, "loss": 0.6536, "step": 24785 }, { "epoch": 2.5, "grad_norm": 5.015773076364035, "learning_rate": 8.242092182585082e-07, "loss": 0.6886, "step": 24790 }, { "epoch": 2.5, "grad_norm": 7.427442287654225, "learning_rate": 8.225967590380091e-07, "loss": 0.6637, "step": 24795 }, { "epoch": 2.5, "grad_norm": 6.137015803961235, "learning_rate": 8.209857372579749e-07, "loss": 0.6189, "step": 24800 }, { "epoch": 2.5, "grad_norm": 4.886094258233801, "learning_rate": 8.193761534727579e-07, "loss": 0.6334, "step": 24805 }, { "epoch": 2.5, "grad_norm": 5.027983452333498, "learning_rate": 8.177680082362116e-07, "loss": 0.6403, "step": 24810 }, { "epoch": 2.5, "grad_norm": 4.498959374655465, "learning_rate": 8.161613021017006e-07, "loss": 0.665, "step": 24815 }, { "epoch": 2.5, "grad_norm": 7.442887102085162, "learning_rate": 8.145560356220883e-07, "loss": 0.6512, "step": 24820 }, { "epoch": 2.5, "grad_norm": 6.059385265435554, "learning_rate": 8.129522093497483e-07, "loss": 0.661, "step": 24825 }, { "epoch": 2.5, "grad_norm": 5.89352893079638, "learning_rate": 8.11349823836553e-07, "loss": 0.6473, "step": 24830 }, { "epoch": 2.5, "grad_norm": 5.557086942989607, "learning_rate": 8.097488796338837e-07, "loss": 0.6246, "step": 24835 }, { "epoch": 2.5, "grad_norm": 4.471587700245278, "learning_rate": 8.081493772926208e-07, "loss": 0.6219, "step": 24840 }, { "epoch": 2.5, "grad_norm": 6.8867025140941545, "learning_rate": 8.06551317363155e-07, "loss": 0.6267, "step": 24845 }, { "epoch": 2.51, "grad_norm": 4.849642771433342, "learning_rate": 8.049547003953756e-07, "loss": 0.6704, "step": 24850 }, { "epoch": 2.51, "grad_norm": 4.900048710600587, "learning_rate": 8.033595269386762e-07, "loss": 0.7352, "step": 24855 }, { "epoch": 2.51, "grad_norm": 4.458521029193161, "learning_rate": 8.017657975419563e-07, "loss": 0.6711, "step": 24860 }, { "epoch": 2.51, "grad_norm": 7.234623918704893, "learning_rate": 8.001735127536153e-07, "loss": 0.6682, "step": 24865 }, { "epoch": 2.51, "grad_norm": 5.8119753199973125, "learning_rate": 7.985826731215579e-07, "loss": 0.6526, "step": 24870 }, { "epoch": 2.51, "grad_norm": 4.786871359170267, "learning_rate": 7.969932791931883e-07, "loss": 0.6588, "step": 24875 }, { "epoch": 2.51, "grad_norm": 4.888432868770594, "learning_rate": 7.954053315154181e-07, "loss": 0.6491, "step": 24880 }, { "epoch": 2.51, "grad_norm": 4.7430510735248745, "learning_rate": 7.938188306346567e-07, "loss": 0.6593, "step": 24885 }, { "epoch": 2.51, "grad_norm": 6.1545479704772434, "learning_rate": 7.922337770968192e-07, "loss": 0.6435, "step": 24890 }, { "epoch": 2.51, "grad_norm": 4.849948977490544, "learning_rate": 7.906501714473203e-07, "loss": 0.6504, "step": 24895 }, { "epoch": 2.51, "grad_norm": 9.081958242724612, "learning_rate": 7.89068014231077e-07, "loss": 0.6467, "step": 24900 }, { "epoch": 2.51, "grad_norm": 5.470923465659294, "learning_rate": 7.874873059925064e-07, "loss": 0.6437, "step": 24905 }, { "epoch": 2.51, "grad_norm": 4.654415047763868, "learning_rate": 7.85908047275532e-07, "loss": 0.6388, "step": 24910 }, { "epoch": 2.51, "grad_norm": 6.073545061463334, "learning_rate": 7.843302386235724e-07, "loss": 0.6478, "step": 24915 }, { "epoch": 2.51, "grad_norm": 5.715609234994534, "learning_rate": 7.827538805795526e-07, "loss": 0.6644, "step": 24920 }, { "epoch": 2.51, "grad_norm": 5.964046487206026, "learning_rate": 7.811789736858943e-07, "loss": 0.6411, "step": 24925 }, { "epoch": 2.51, "grad_norm": 6.154862219711928, "learning_rate": 7.796055184845219e-07, "loss": 0.6658, "step": 24930 }, { "epoch": 2.51, "grad_norm": 4.905050013439889, "learning_rate": 7.780335155168584e-07, "loss": 0.6531, "step": 24935 }, { "epoch": 2.51, "grad_norm": 4.581144513340311, "learning_rate": 7.764629653238309e-07, "loss": 0.661, "step": 24940 }, { "epoch": 2.51, "grad_norm": 5.231113583542581, "learning_rate": 7.748938684458623e-07, "loss": 0.7024, "step": 24945 }, { "epoch": 2.52, "grad_norm": 8.860678825782125, "learning_rate": 7.733262254228791e-07, "loss": 0.6368, "step": 24950 }, { "epoch": 2.52, "grad_norm": 5.975033156518935, "learning_rate": 7.717600367943046e-07, "loss": 0.6649, "step": 24955 }, { "epoch": 2.52, "grad_norm": 8.01741938313855, "learning_rate": 7.701953030990628e-07, "loss": 0.658, "step": 24960 }, { "epoch": 2.52, "grad_norm": 5.3034097032783265, "learning_rate": 7.686320248755763e-07, "loss": 0.6758, "step": 24965 }, { "epoch": 2.52, "grad_norm": 7.27663529194628, "learning_rate": 7.670702026617699e-07, "loss": 0.6453, "step": 24970 }, { "epoch": 2.52, "grad_norm": 9.33105884995202, "learning_rate": 7.655098369950643e-07, "loss": 0.6452, "step": 24975 }, { "epoch": 2.52, "grad_norm": 5.392173457773588, "learning_rate": 7.639509284123776e-07, "loss": 0.6375, "step": 24980 }, { "epoch": 2.52, "grad_norm": 5.3268626627599165, "learning_rate": 7.623934774501318e-07, "loss": 0.6545, "step": 24985 }, { "epoch": 2.52, "grad_norm": 4.913510886945881, "learning_rate": 7.608374846442429e-07, "loss": 0.6389, "step": 24990 }, { "epoch": 2.52, "grad_norm": 4.420185217342942, "learning_rate": 7.592829505301269e-07, "loss": 0.6462, "step": 24995 }, { "epoch": 2.52, "grad_norm": 4.50249743438501, "learning_rate": 7.577298756426959e-07, "loss": 0.6746, "step": 25000 }, { "epoch": 2.52, "grad_norm": 5.406885599331131, "learning_rate": 7.561782605163642e-07, "loss": 0.6584, "step": 25005 }, { "epoch": 2.52, "grad_norm": 4.776451399414106, "learning_rate": 7.546281056850386e-07, "loss": 0.647, "step": 25010 }, { "epoch": 2.52, "grad_norm": 4.647823902193932, "learning_rate": 7.530794116821282e-07, "loss": 0.6189, "step": 25015 }, { "epoch": 2.52, "grad_norm": 8.680773430815062, "learning_rate": 7.515321790405356e-07, "loss": 0.6375, "step": 25020 }, { "epoch": 2.52, "grad_norm": 4.969723876599489, "learning_rate": 7.499864082926627e-07, "loss": 0.6222, "step": 25025 }, { "epoch": 2.52, "grad_norm": 4.758278915875785, "learning_rate": 7.484420999704067e-07, "loss": 0.6381, "step": 25030 }, { "epoch": 2.52, "grad_norm": 4.498832604608828, "learning_rate": 7.46899254605164e-07, "loss": 0.6355, "step": 25035 }, { "epoch": 2.52, "grad_norm": 6.443240026602288, "learning_rate": 7.453578727278249e-07, "loss": 0.6575, "step": 25040 }, { "epoch": 2.53, "grad_norm": 4.819968215010338, "learning_rate": 7.43817954868779e-07, "loss": 0.6548, "step": 25045 }, { "epoch": 2.53, "grad_norm": 5.28827657407379, "learning_rate": 7.422795015579098e-07, "loss": 0.619, "step": 25050 }, { "epoch": 2.53, "grad_norm": 4.686959750334854, "learning_rate": 7.407425133245971e-07, "loss": 0.6313, "step": 25055 }, { "epoch": 2.53, "grad_norm": 8.081679427774791, "learning_rate": 7.392069906977167e-07, "loss": 0.651, "step": 25060 }, { "epoch": 2.53, "grad_norm": 5.242750449479009, "learning_rate": 7.376729342056427e-07, "loss": 0.6706, "step": 25065 }, { "epoch": 2.53, "grad_norm": 4.727060169319375, "learning_rate": 7.361403443762399e-07, "loss": 0.664, "step": 25070 }, { "epoch": 2.53, "grad_norm": 5.85291155153888, "learning_rate": 7.346092217368727e-07, "loss": 0.6669, "step": 25075 }, { "epoch": 2.53, "grad_norm": 4.782691470831053, "learning_rate": 7.330795668143992e-07, "loss": 0.6496, "step": 25080 }, { "epoch": 2.53, "grad_norm": 5.031602082846597, "learning_rate": 7.315513801351709e-07, "loss": 0.6241, "step": 25085 }, { "epoch": 2.53, "grad_norm": 6.13748043404483, "learning_rate": 7.300246622250345e-07, "loss": 0.6558, "step": 25090 }, { "epoch": 2.53, "grad_norm": 5.022918393131529, "learning_rate": 7.284994136093348e-07, "loss": 0.6203, "step": 25095 }, { "epoch": 2.53, "grad_norm": 4.577534653944751, "learning_rate": 7.269756348129064e-07, "loss": 0.6342, "step": 25100 }, { "epoch": 2.53, "grad_norm": 6.3508017293347105, "learning_rate": 7.254533263600816e-07, "loss": 0.6571, "step": 25105 }, { "epoch": 2.53, "grad_norm": 4.6107215829957715, "learning_rate": 7.239324887746846e-07, "loss": 0.6478, "step": 25110 }, { "epoch": 2.53, "grad_norm": 4.9246172109137705, "learning_rate": 7.22413122580034e-07, "loss": 0.6753, "step": 25115 }, { "epoch": 2.53, "grad_norm": 4.752569415648657, "learning_rate": 7.208952282989423e-07, "loss": 0.6719, "step": 25120 }, { "epoch": 2.53, "grad_norm": 6.074928665972307, "learning_rate": 7.193788064537149e-07, "loss": 0.6584, "step": 25125 }, { "epoch": 2.53, "grad_norm": 5.459360160030664, "learning_rate": 7.178638575661523e-07, "loss": 0.6757, "step": 25130 }, { "epoch": 2.53, "grad_norm": 4.9071075389801475, "learning_rate": 7.163503821575457e-07, "loss": 0.6371, "step": 25135 }, { "epoch": 2.53, "grad_norm": 4.469886611538309, "learning_rate": 7.14838380748682e-07, "loss": 0.651, "step": 25140 }, { "epoch": 2.54, "grad_norm": 4.255804557272553, "learning_rate": 7.133278538598388e-07, "loss": 0.6691, "step": 25145 }, { "epoch": 2.54, "grad_norm": 5.76982918005449, "learning_rate": 7.118188020107869e-07, "loss": 0.6618, "step": 25150 }, { "epoch": 2.54, "grad_norm": 6.35076015863343, "learning_rate": 7.103112257207884e-07, "loss": 0.617, "step": 25155 }, { "epoch": 2.54, "grad_norm": 5.487834302585936, "learning_rate": 7.088051255086015e-07, "loss": 0.6986, "step": 25160 }, { "epoch": 2.54, "grad_norm": 8.413840768505342, "learning_rate": 7.073005018924706e-07, "loss": 0.694, "step": 25165 }, { "epoch": 2.54, "grad_norm": 5.55955416421179, "learning_rate": 7.057973553901387e-07, "loss": 0.6594, "step": 25170 }, { "epoch": 2.54, "grad_norm": 4.613757922631881, "learning_rate": 7.04295686518835e-07, "loss": 0.6504, "step": 25175 }, { "epoch": 2.54, "grad_norm": 6.900243725454976, "learning_rate": 7.027954957952827e-07, "loss": 0.6405, "step": 25180 }, { "epoch": 2.54, "grad_norm": 6.027603532032837, "learning_rate": 7.012967837356949e-07, "loss": 0.6339, "step": 25185 }, { "epoch": 2.54, "grad_norm": 7.712147105387408, "learning_rate": 6.997995508557792e-07, "loss": 0.6172, "step": 25190 }, { "epoch": 2.54, "grad_norm": 5.581397598376956, "learning_rate": 6.983037976707297e-07, "loss": 0.6563, "step": 25195 }, { "epoch": 2.54, "grad_norm": 7.936731592965554, "learning_rate": 6.968095246952361e-07, "loss": 0.6421, "step": 25200 }, { "epoch": 2.54, "grad_norm": 6.692962308108754, "learning_rate": 6.953167324434751e-07, "loss": 0.6399, "step": 25205 }, { "epoch": 2.54, "grad_norm": 11.914079098706027, "learning_rate": 6.938254214291152e-07, "loss": 0.6798, "step": 25210 }, { "epoch": 2.54, "grad_norm": 4.763857783003577, "learning_rate": 6.92335592165313e-07, "loss": 0.6922, "step": 25215 }, { "epoch": 2.54, "grad_norm": 4.452148035606174, "learning_rate": 6.908472451647213e-07, "loss": 0.6676, "step": 25220 }, { "epoch": 2.54, "grad_norm": 6.178984053236935, "learning_rate": 6.89360380939475e-07, "loss": 0.6449, "step": 25225 }, { "epoch": 2.54, "grad_norm": 4.261025469317658, "learning_rate": 6.878750000012058e-07, "loss": 0.6287, "step": 25230 }, { "epoch": 2.54, "grad_norm": 5.163820142436162, "learning_rate": 6.863911028610309e-07, "loss": 0.6342, "step": 25235 }, { "epoch": 2.54, "grad_norm": 5.186245405153009, "learning_rate": 6.849086900295571e-07, "loss": 0.6386, "step": 25240 }, { "epoch": 2.55, "grad_norm": 4.625364394762081, "learning_rate": 6.834277620168817e-07, "loss": 0.628, "step": 25245 }, { "epoch": 2.55, "grad_norm": 4.309413384246584, "learning_rate": 6.819483193325898e-07, "loss": 0.6031, "step": 25250 }, { "epoch": 2.55, "grad_norm": 5.5437712430174235, "learning_rate": 6.804703624857578e-07, "loss": 0.6801, "step": 25255 }, { "epoch": 2.55, "grad_norm": 5.719041204609119, "learning_rate": 6.789938919849475e-07, "loss": 0.6371, "step": 25260 }, { "epoch": 2.55, "grad_norm": 5.4311190616552825, "learning_rate": 6.775189083382128e-07, "loss": 0.6481, "step": 25265 }, { "epoch": 2.55, "grad_norm": 4.49817950043537, "learning_rate": 6.760454120530935e-07, "loss": 0.6764, "step": 25270 }, { "epoch": 2.55, "grad_norm": 5.075615059252561, "learning_rate": 6.74573403636618e-07, "loss": 0.6722, "step": 25275 }, { "epoch": 2.55, "grad_norm": 4.756692975556284, "learning_rate": 6.731028835953024e-07, "loss": 0.6153, "step": 25280 }, { "epoch": 2.55, "grad_norm": 5.111532811336681, "learning_rate": 6.716338524351523e-07, "loss": 0.6435, "step": 25285 }, { "epoch": 2.55, "grad_norm": 5.235658015353194, "learning_rate": 6.701663106616591e-07, "loss": 0.6366, "step": 25290 }, { "epoch": 2.55, "grad_norm": 8.998525285202195, "learning_rate": 6.687002587798036e-07, "loss": 0.617, "step": 25295 }, { "epoch": 2.55, "grad_norm": 5.636753103897285, "learning_rate": 6.672356972940519e-07, "loss": 0.6578, "step": 25300 }, { "epoch": 2.55, "grad_norm": 5.007653059949343, "learning_rate": 6.657726267083592e-07, "loss": 0.6836, "step": 25305 }, { "epoch": 2.55, "grad_norm": 7.4049423574361946, "learning_rate": 6.643110475261644e-07, "loss": 0.6093, "step": 25310 }, { "epoch": 2.55, "grad_norm": 4.575892380855971, "learning_rate": 6.62850960250398e-07, "loss": 0.6632, "step": 25315 }, { "epoch": 2.55, "grad_norm": 5.247916695481189, "learning_rate": 6.613923653834731e-07, "loss": 0.6536, "step": 25320 }, { "epoch": 2.55, "grad_norm": 4.71368785157455, "learning_rate": 6.599352634272921e-07, "loss": 0.6091, "step": 25325 }, { "epoch": 2.55, "grad_norm": 5.105286131354021, "learning_rate": 6.584796548832422e-07, "loss": 0.6343, "step": 25330 }, { "epoch": 2.55, "grad_norm": 6.713066779444538, "learning_rate": 6.570255402521964e-07, "loss": 0.6458, "step": 25335 }, { "epoch": 2.55, "grad_norm": 4.530370525092025, "learning_rate": 6.555729200345123e-07, "loss": 0.6326, "step": 25340 }, { "epoch": 2.56, "grad_norm": 7.959540168806156, "learning_rate": 6.541217947300388e-07, "loss": 0.6574, "step": 25345 }, { "epoch": 2.56, "grad_norm": 4.631356069842528, "learning_rate": 6.52672164838104e-07, "loss": 0.6537, "step": 25350 }, { "epoch": 2.56, "grad_norm": 7.655849920407781, "learning_rate": 6.512240308575257e-07, "loss": 0.6484, "step": 25355 }, { "epoch": 2.56, "grad_norm": 5.14779961172738, "learning_rate": 6.497773932866064e-07, "loss": 0.634, "step": 25360 }, { "epoch": 2.56, "grad_norm": 6.745994577184643, "learning_rate": 6.483322526231284e-07, "loss": 0.651, "step": 25365 }, { "epoch": 2.56, "grad_norm": 9.284058608425205, "learning_rate": 6.468886093643673e-07, "loss": 0.6718, "step": 25370 }, { "epoch": 2.56, "grad_norm": 4.781063229895192, "learning_rate": 6.454464640070767e-07, "loss": 0.6623, "step": 25375 }, { "epoch": 2.56, "grad_norm": 4.43704953047697, "learning_rate": 6.440058170475006e-07, "loss": 0.6134, "step": 25380 }, { "epoch": 2.56, "grad_norm": 4.953656567496981, "learning_rate": 6.425666689813603e-07, "loss": 0.7096, "step": 25385 }, { "epoch": 2.56, "grad_norm": 4.7737790556705, "learning_rate": 6.411290203038705e-07, "loss": 0.643, "step": 25390 }, { "epoch": 2.56, "grad_norm": 4.713787102142757, "learning_rate": 6.396928715097189e-07, "loss": 0.6776, "step": 25395 }, { "epoch": 2.56, "grad_norm": 4.8241511109663975, "learning_rate": 6.382582230930868e-07, "loss": 0.6387, "step": 25400 }, { "epoch": 2.56, "grad_norm": 5.215920059201571, "learning_rate": 6.368250755476329e-07, "loss": 0.6091, "step": 25405 }, { "epoch": 2.56, "grad_norm": 6.24655780189847, "learning_rate": 6.353934293665049e-07, "loss": 0.6465, "step": 25410 }, { "epoch": 2.56, "grad_norm": 4.51722974692802, "learning_rate": 6.339632850423272e-07, "loss": 0.6697, "step": 25415 }, { "epoch": 2.56, "grad_norm": 4.942691073595876, "learning_rate": 6.325346430672158e-07, "loss": 0.6949, "step": 25420 }, { "epoch": 2.56, "grad_norm": 5.600010750073224, "learning_rate": 6.311075039327602e-07, "loss": 0.6358, "step": 25425 }, { "epoch": 2.56, "grad_norm": 7.194071088902065, "learning_rate": 6.296818681300409e-07, "loss": 0.633, "step": 25430 }, { "epoch": 2.56, "grad_norm": 4.352558847451886, "learning_rate": 6.28257736149615e-07, "loss": 0.6241, "step": 25435 }, { "epoch": 2.56, "grad_norm": 4.656036452931519, "learning_rate": 6.268351084815283e-07, "loss": 0.6695, "step": 25440 }, { "epoch": 2.57, "grad_norm": 4.692429799378556, "learning_rate": 6.254139856153024e-07, "loss": 0.6111, "step": 25445 }, { "epoch": 2.57, "grad_norm": 4.833729995327521, "learning_rate": 6.23994368039948e-07, "loss": 0.6356, "step": 25450 }, { "epoch": 2.57, "grad_norm": 4.541053282950694, "learning_rate": 6.225762562439503e-07, "loss": 0.6815, "step": 25455 }, { "epoch": 2.57, "grad_norm": 4.63160893567276, "learning_rate": 6.211596507152823e-07, "loss": 0.6209, "step": 25460 }, { "epoch": 2.57, "grad_norm": 4.7630558895319615, "learning_rate": 6.197445519413958e-07, "loss": 0.6881, "step": 25465 }, { "epoch": 2.57, "grad_norm": 4.555410308682889, "learning_rate": 6.183309604092258e-07, "loss": 0.632, "step": 25470 }, { "epoch": 2.57, "grad_norm": 4.86849347035945, "learning_rate": 6.16918876605187e-07, "loss": 0.6498, "step": 25475 }, { "epoch": 2.57, "grad_norm": 4.873602369763425, "learning_rate": 6.155083010151769e-07, "loss": 0.6766, "step": 25480 }, { "epoch": 2.57, "grad_norm": 4.367303692125798, "learning_rate": 6.140992341245728e-07, "loss": 0.6312, "step": 25485 }, { "epoch": 2.57, "grad_norm": 4.862852100253972, "learning_rate": 6.126916764182334e-07, "loss": 0.6281, "step": 25490 }, { "epoch": 2.57, "grad_norm": 5.515087993078539, "learning_rate": 6.11285628380498e-07, "loss": 0.6453, "step": 25495 }, { "epoch": 2.57, "grad_norm": 4.2591560119771845, "learning_rate": 6.098810904951847e-07, "loss": 0.6283, "step": 25500 }, { "epoch": 2.57, "grad_norm": 4.87669698541844, "learning_rate": 6.084780632455967e-07, "loss": 0.6248, "step": 25505 }, { "epoch": 2.57, "grad_norm": 4.576326511788119, "learning_rate": 6.070765471145113e-07, "loss": 0.6219, "step": 25510 }, { "epoch": 2.57, "grad_norm": 5.332526356057729, "learning_rate": 6.056765425841921e-07, "loss": 0.667, "step": 25515 }, { "epoch": 2.57, "grad_norm": 4.774005536488563, "learning_rate": 6.04278050136376e-07, "loss": 0.633, "step": 25520 }, { "epoch": 2.57, "grad_norm": 8.115837864694559, "learning_rate": 6.028810702522853e-07, "loss": 0.6713, "step": 25525 }, { "epoch": 2.57, "grad_norm": 5.133344507166991, "learning_rate": 6.014856034126177e-07, "loss": 0.6668, "step": 25530 }, { "epoch": 2.57, "grad_norm": 4.432610346496629, "learning_rate": 6.000916500975534e-07, "loss": 0.6277, "step": 25535 }, { "epoch": 2.57, "grad_norm": 5.360887456370527, "learning_rate": 5.986992107867495e-07, "loss": 0.6635, "step": 25540 }, { "epoch": 2.58, "grad_norm": 4.656144876230373, "learning_rate": 5.973082859593448e-07, "loss": 0.6342, "step": 25545 }, { "epoch": 2.58, "grad_norm": 4.42985086767463, "learning_rate": 5.959188760939527e-07, "loss": 0.6248, "step": 25550 }, { "epoch": 2.58, "grad_norm": 4.887388152336578, "learning_rate": 5.945309816686695e-07, "loss": 0.588, "step": 25555 }, { "epoch": 2.58, "grad_norm": 4.9624664483204315, "learning_rate": 5.931446031610666e-07, "loss": 0.6716, "step": 25560 }, { "epoch": 2.58, "grad_norm": 5.040677312192779, "learning_rate": 5.917597410481979e-07, "loss": 0.6604, "step": 25565 }, { "epoch": 2.58, "grad_norm": 4.935228644214927, "learning_rate": 5.903763958065912e-07, "loss": 0.6344, "step": 25570 }, { "epoch": 2.58, "grad_norm": 4.952775601502335, "learning_rate": 5.889945679122566e-07, "loss": 0.6559, "step": 25575 }, { "epoch": 2.58, "grad_norm": 4.7423239151175185, "learning_rate": 5.876142578406763e-07, "loss": 0.6531, "step": 25580 }, { "epoch": 2.58, "grad_norm": 4.254082493619603, "learning_rate": 5.862354660668168e-07, "loss": 0.6331, "step": 25585 }, { "epoch": 2.58, "grad_norm": 4.633593931063916, "learning_rate": 5.848581930651165e-07, "loss": 0.6595, "step": 25590 }, { "epoch": 2.58, "grad_norm": 4.51468653861724, "learning_rate": 5.834824393094962e-07, "loss": 0.668, "step": 25595 }, { "epoch": 2.58, "grad_norm": 6.2054497461118, "learning_rate": 5.821082052733495e-07, "loss": 0.6203, "step": 25600 }, { "epoch": 2.58, "grad_norm": 6.41402199262695, "learning_rate": 5.807354914295516e-07, "loss": 0.6508, "step": 25605 }, { "epoch": 2.58, "grad_norm": 5.13511331457535, "learning_rate": 5.793642982504477e-07, "loss": 0.6391, "step": 25610 }, { "epoch": 2.58, "grad_norm": 5.619860066420089, "learning_rate": 5.779946262078683e-07, "loss": 0.6209, "step": 25615 }, { "epoch": 2.58, "grad_norm": 5.702009815902476, "learning_rate": 5.766264757731144e-07, "loss": 0.6334, "step": 25620 }, { "epoch": 2.58, "grad_norm": 7.854644332692116, "learning_rate": 5.752598474169641e-07, "loss": 0.6912, "step": 25625 }, { "epoch": 2.58, "grad_norm": 4.9694105036745775, "learning_rate": 5.738947416096752e-07, "loss": 0.6628, "step": 25630 }, { "epoch": 2.58, "grad_norm": 4.500559211637084, "learning_rate": 5.725311588209786e-07, "loss": 0.6253, "step": 25635 }, { "epoch": 2.59, "grad_norm": 4.848868708725839, "learning_rate": 5.711690995200814e-07, "loss": 0.6545, "step": 25640 }, { "epoch": 2.59, "grad_norm": 5.647854890983991, "learning_rate": 5.698085641756657e-07, "loss": 0.6283, "step": 25645 }, { "epoch": 2.59, "grad_norm": 5.390714116328605, "learning_rate": 5.684495532558931e-07, "loss": 0.6334, "step": 25650 }, { "epoch": 2.59, "grad_norm": 5.771148030368524, "learning_rate": 5.670920672283958e-07, "loss": 0.6491, "step": 25655 }, { "epoch": 2.59, "grad_norm": 4.782194409574925, "learning_rate": 5.657361065602846e-07, "loss": 0.6233, "step": 25660 }, { "epoch": 2.59, "grad_norm": 5.897256882115723, "learning_rate": 5.643816717181444e-07, "loss": 0.6263, "step": 25665 }, { "epoch": 2.59, "grad_norm": 4.424184580177064, "learning_rate": 5.630287631680343e-07, "loss": 0.5929, "step": 25670 }, { "epoch": 2.59, "grad_norm": 4.881655967229124, "learning_rate": 5.616773813754883e-07, "loss": 0.6481, "step": 25675 }, { "epoch": 2.59, "grad_norm": 4.506211542493089, "learning_rate": 5.60327526805517e-07, "loss": 0.6363, "step": 25680 }, { "epoch": 2.59, "grad_norm": 7.092793877990308, "learning_rate": 5.589791999226024e-07, "loss": 0.6262, "step": 25685 }, { "epoch": 2.59, "grad_norm": 4.5625815137116215, "learning_rate": 5.576324011907041e-07, "loss": 0.6374, "step": 25690 }, { "epoch": 2.59, "grad_norm": 5.210948120306893, "learning_rate": 5.562871310732543e-07, "loss": 0.6637, "step": 25695 }, { "epoch": 2.59, "grad_norm": 5.441147446353072, "learning_rate": 5.54943390033158e-07, "loss": 0.6427, "step": 25700 }, { "epoch": 2.59, "grad_norm": 4.73805515667014, "learning_rate": 5.53601178532795e-07, "loss": 0.6232, "step": 25705 }, { "epoch": 2.59, "grad_norm": 9.367015312850405, "learning_rate": 5.522604970340201e-07, "loss": 0.6698, "step": 25710 }, { "epoch": 2.59, "grad_norm": 6.430392634994808, "learning_rate": 5.509213459981594e-07, "loss": 0.6715, "step": 25715 }, { "epoch": 2.59, "grad_norm": 6.593853208661611, "learning_rate": 5.495837258860154e-07, "loss": 0.6764, "step": 25720 }, { "epoch": 2.59, "grad_norm": 4.917194336277024, "learning_rate": 5.482476371578605e-07, "loss": 0.6576, "step": 25725 }, { "epoch": 2.59, "grad_norm": 4.599302216384735, "learning_rate": 5.469130802734418e-07, "loss": 0.6801, "step": 25730 }, { "epoch": 2.59, "grad_norm": 4.5781171029935255, "learning_rate": 5.455800556919777e-07, "loss": 0.628, "step": 25735 }, { "epoch": 2.6, "grad_norm": 5.24417630576127, "learning_rate": 5.442485638721635e-07, "loss": 0.6454, "step": 25740 }, { "epoch": 2.6, "grad_norm": 6.1990419917474675, "learning_rate": 5.429186052721613e-07, "loss": 0.6697, "step": 25745 }, { "epoch": 2.6, "grad_norm": 4.870940237800007, "learning_rate": 5.415901803496109e-07, "loss": 0.6187, "step": 25750 }, { "epoch": 2.6, "grad_norm": 5.990545890958486, "learning_rate": 5.402632895616217e-07, "loss": 0.6298, "step": 25755 }, { "epoch": 2.6, "grad_norm": 6.069670700384858, "learning_rate": 5.389379333647748e-07, "loss": 0.6339, "step": 25760 }, { "epoch": 2.6, "grad_norm": 5.133013881271903, "learning_rate": 5.37614112215124e-07, "loss": 0.6353, "step": 25765 }, { "epoch": 2.6, "grad_norm": 4.550175486105931, "learning_rate": 5.362918265681943e-07, "loss": 0.5891, "step": 25770 }, { "epoch": 2.6, "grad_norm": 5.150306726699754, "learning_rate": 5.349710768789851e-07, "loss": 0.6505, "step": 25775 }, { "epoch": 2.6, "grad_norm": 4.704186001901201, "learning_rate": 5.336518636019622e-07, "loss": 0.6493, "step": 25780 }, { "epoch": 2.6, "grad_norm": 4.631385357677084, "learning_rate": 5.323341871910687e-07, "loss": 0.6723, "step": 25785 }, { "epoch": 2.6, "grad_norm": 4.732305304742764, "learning_rate": 5.310180480997135e-07, "loss": 0.6353, "step": 25790 }, { "epoch": 2.6, "grad_norm": 5.81983535599789, "learning_rate": 5.297034467807805e-07, "loss": 0.6561, "step": 25795 }, { "epoch": 2.6, "grad_norm": 4.4578611668626955, "learning_rate": 5.2839038368662e-07, "loss": 0.6382, "step": 25800 }, { "epoch": 2.6, "grad_norm": 4.919902980430912, "learning_rate": 5.27078859269059e-07, "loss": 0.6479, "step": 25805 }, { "epoch": 2.6, "grad_norm": 4.126133325799623, "learning_rate": 5.257688739793892e-07, "loss": 0.678, "step": 25810 }, { "epoch": 2.6, "grad_norm": 6.436875044498097, "learning_rate": 5.24460428268378e-07, "loss": 0.6517, "step": 25815 }, { "epoch": 2.6, "grad_norm": 5.790793229197409, "learning_rate": 5.231535225862583e-07, "loss": 0.6788, "step": 25820 }, { "epoch": 2.6, "grad_norm": 4.505001576559171, "learning_rate": 5.218481573827356e-07, "loss": 0.6755, "step": 25825 }, { "epoch": 2.6, "grad_norm": 4.509741158174838, "learning_rate": 5.205443331069843e-07, "loss": 0.6237, "step": 25830 }, { "epoch": 2.6, "grad_norm": 10.540920818207695, "learning_rate": 5.1924205020765e-07, "loss": 0.6581, "step": 25835 }, { "epoch": 2.61, "grad_norm": 4.4711223646163125, "learning_rate": 5.179413091328461e-07, "loss": 0.6207, "step": 25840 }, { "epoch": 2.61, "grad_norm": 6.107980371352623, "learning_rate": 5.166421103301572e-07, "loss": 0.657, "step": 25845 }, { "epoch": 2.61, "grad_norm": 4.71289938635372, "learning_rate": 5.153444542466368e-07, "loss": 0.6399, "step": 25850 }, { "epoch": 2.61, "grad_norm": 5.43401069724985, "learning_rate": 5.140483413288061e-07, "loss": 0.6474, "step": 25855 }, { "epoch": 2.61, "grad_norm": 4.692708943604419, "learning_rate": 5.127537720226555e-07, "loss": 0.6567, "step": 25860 }, { "epoch": 2.61, "grad_norm": 5.929392791908258, "learning_rate": 5.114607467736471e-07, "loss": 0.6174, "step": 25865 }, { "epoch": 2.61, "grad_norm": 4.493020788611299, "learning_rate": 5.101692660267077e-07, "loss": 0.6331, "step": 25870 }, { "epoch": 2.61, "grad_norm": 7.050134354806047, "learning_rate": 5.088793302262362e-07, "loss": 0.6717, "step": 25875 }, { "epoch": 2.61, "grad_norm": 4.760627520220874, "learning_rate": 5.075909398160983e-07, "loss": 0.6459, "step": 25880 }, { "epoch": 2.61, "grad_norm": 4.579875926689988, "learning_rate": 5.06304095239627e-07, "loss": 0.6128, "step": 25885 }, { "epoch": 2.61, "grad_norm": 4.695488079068494, "learning_rate": 5.050187969396248e-07, "loss": 0.6409, "step": 25890 }, { "epoch": 2.61, "grad_norm": 4.584920910978887, "learning_rate": 5.037350453583601e-07, "loss": 0.6127, "step": 25895 }, { "epoch": 2.61, "grad_norm": 4.733672284793962, "learning_rate": 5.024528409375728e-07, "loss": 0.6261, "step": 25900 }, { "epoch": 2.61, "grad_norm": 5.334173651853007, "learning_rate": 5.011721841184663e-07, "loss": 0.6602, "step": 25905 }, { "epoch": 2.61, "grad_norm": 4.659865730931912, "learning_rate": 4.998930753417153e-07, "loss": 0.6366, "step": 25910 }, { "epoch": 2.61, "grad_norm": 7.149491262060584, "learning_rate": 4.986155150474592e-07, "loss": 0.6125, "step": 25915 }, { "epoch": 2.61, "grad_norm": 5.661274683439253, "learning_rate": 4.973395036753054e-07, "loss": 0.6447, "step": 25920 }, { "epoch": 2.61, "grad_norm": 5.369340660498729, "learning_rate": 4.960650416643259e-07, "loss": 0.6536, "step": 25925 }, { "epoch": 2.61, "grad_norm": 4.4477401440865405, "learning_rate": 4.947921294530656e-07, "loss": 0.6509, "step": 25930 }, { "epoch": 2.61, "grad_norm": 5.025337057836171, "learning_rate": 4.935207674795295e-07, "loss": 0.6458, "step": 25935 }, { "epoch": 2.62, "grad_norm": 4.64075589139611, "learning_rate": 4.922509561811939e-07, "loss": 0.6374, "step": 25940 }, { "epoch": 2.62, "grad_norm": 4.465561320407794, "learning_rate": 4.909826959949988e-07, "loss": 0.6422, "step": 25945 }, { "epoch": 2.62, "grad_norm": 4.385235783038693, "learning_rate": 4.897159873573515e-07, "loss": 0.6421, "step": 25950 }, { "epoch": 2.62, "grad_norm": 4.594086031534952, "learning_rate": 4.884508307041241e-07, "loss": 0.6374, "step": 25955 }, { "epoch": 2.62, "grad_norm": 4.258903294857566, "learning_rate": 4.871872264706579e-07, "loss": 0.6678, "step": 25960 }, { "epoch": 2.62, "grad_norm": 4.863058821194196, "learning_rate": 4.859251750917559e-07, "loss": 0.6825, "step": 25965 }, { "epoch": 2.62, "grad_norm": 4.665283377330753, "learning_rate": 4.846646770016905e-07, "loss": 0.6431, "step": 25970 }, { "epoch": 2.62, "grad_norm": 4.468217346387384, "learning_rate": 4.834057326341973e-07, "loss": 0.6687, "step": 25975 }, { "epoch": 2.62, "grad_norm": 4.913611862161178, "learning_rate": 4.821483424224776e-07, "loss": 0.6269, "step": 25980 }, { "epoch": 2.62, "grad_norm": 5.830245506259714, "learning_rate": 4.808925067991977e-07, "loss": 0.642, "step": 25985 }, { "epoch": 2.62, "grad_norm": 5.179273168502301, "learning_rate": 4.796382261964905e-07, "loss": 0.647, "step": 25990 }, { "epoch": 2.62, "grad_norm": 4.474430609458084, "learning_rate": 4.78385501045951e-07, "loss": 0.6752, "step": 25995 }, { "epoch": 2.62, "grad_norm": 5.195319875581904, "learning_rate": 4.771343317786431e-07, "loss": 0.6482, "step": 26000 }, { "epoch": 2.62, "grad_norm": 5.632248905230632, "learning_rate": 4.75884718825092e-07, "loss": 0.6448, "step": 26005 }, { "epoch": 2.62, "grad_norm": 4.697644503611095, "learning_rate": 4.7463666261528816e-07, "loss": 0.6415, "step": 26010 }, { "epoch": 2.62, "grad_norm": 4.47786641642741, "learning_rate": 4.7339016357868586e-07, "loss": 0.6067, "step": 26015 }, { "epoch": 2.62, "grad_norm": 4.537017336696449, "learning_rate": 4.7214522214420464e-07, "loss": 0.6452, "step": 26020 }, { "epoch": 2.62, "grad_norm": 5.8881464897238285, "learning_rate": 4.7090183874022867e-07, "loss": 0.6293, "step": 26025 }, { "epoch": 2.62, "grad_norm": 7.987028504731956, "learning_rate": 4.696600137946028e-07, "loss": 0.6627, "step": 26030 }, { "epoch": 2.62, "grad_norm": 4.634628482096437, "learning_rate": 4.684197477346408e-07, "loss": 0.6449, "step": 26035 }, { "epoch": 2.63, "grad_norm": 4.980914909416849, "learning_rate": 4.6718104098711525e-07, "loss": 0.6715, "step": 26040 }, { "epoch": 2.63, "grad_norm": 4.477828631881524, "learning_rate": 4.65943893978264e-07, "loss": 0.6841, "step": 26045 }, { "epoch": 2.63, "grad_norm": 4.962689410012361, "learning_rate": 4.6470830713378714e-07, "loss": 0.6577, "step": 26050 }, { "epoch": 2.63, "grad_norm": 4.410764725035844, "learning_rate": 4.634742808788517e-07, "loss": 0.6437, "step": 26055 }, { "epoch": 2.63, "grad_norm": 5.205768291835934, "learning_rate": 4.62241815638082e-07, "loss": 0.6492, "step": 26060 }, { "epoch": 2.63, "grad_norm": 4.529713580489901, "learning_rate": 4.610109118355699e-07, "loss": 0.6483, "step": 26065 }, { "epoch": 2.63, "grad_norm": 5.108805923799643, "learning_rate": 4.597815698948688e-07, "loss": 0.6324, "step": 26070 }, { "epoch": 2.63, "grad_norm": 4.6314997168050525, "learning_rate": 4.585537902389925e-07, "loss": 0.6332, "step": 26075 }, { "epoch": 2.63, "grad_norm": 7.184210025979383, "learning_rate": 4.5732757329041866e-07, "loss": 0.6728, "step": 26080 }, { "epoch": 2.63, "grad_norm": 4.714161295756465, "learning_rate": 4.5610291947108866e-07, "loss": 0.6529, "step": 26085 }, { "epoch": 2.63, "grad_norm": 5.667064450386645, "learning_rate": 4.548798292024037e-07, "loss": 0.6738, "step": 26090 }, { "epoch": 2.63, "grad_norm": 7.745480185868553, "learning_rate": 4.536583029052294e-07, "loss": 0.6351, "step": 26095 }, { "epoch": 2.63, "grad_norm": 5.460932447787701, "learning_rate": 4.5243834099989006e-07, "loss": 0.6804, "step": 26100 }, { "epoch": 2.63, "grad_norm": 4.212632081219248, "learning_rate": 4.5121994390617484e-07, "loss": 0.6317, "step": 26105 }, { "epoch": 2.63, "grad_norm": 8.231950682387714, "learning_rate": 4.5000311204333123e-07, "loss": 0.6345, "step": 26110 }, { "epoch": 2.63, "grad_norm": 6.870402774643485, "learning_rate": 4.4878784583007207e-07, "loss": 0.6527, "step": 26115 }, { "epoch": 2.63, "grad_norm": 4.89773131939057, "learning_rate": 4.4757414568456724e-07, "loss": 0.6368, "step": 26120 }, { "epoch": 2.63, "grad_norm": 4.53326405211866, "learning_rate": 4.4636201202445164e-07, "loss": 0.6441, "step": 26125 }, { "epoch": 2.63, "grad_norm": 4.791953087780344, "learning_rate": 4.4515144526681875e-07, "loss": 0.6438, "step": 26130 }, { "epoch": 2.63, "grad_norm": 5.825566433607306, "learning_rate": 4.4394244582822264e-07, "loss": 0.6512, "step": 26135 }, { "epoch": 2.64, "grad_norm": 6.235658704216288, "learning_rate": 4.4273501412468e-07, "loss": 0.6547, "step": 26140 }, { "epoch": 2.64, "grad_norm": 6.890768808912151, "learning_rate": 4.4152915057166513e-07, "loss": 0.6593, "step": 26145 }, { "epoch": 2.64, "grad_norm": 4.795158812209058, "learning_rate": 4.403248555841166e-07, "loss": 0.6529, "step": 26150 }, { "epoch": 2.64, "grad_norm": 5.131070693260541, "learning_rate": 4.391221295764292e-07, "loss": 0.6528, "step": 26155 }, { "epoch": 2.64, "grad_norm": 4.710697756328377, "learning_rate": 4.379209729624617e-07, "loss": 0.6503, "step": 26160 }, { "epoch": 2.64, "grad_norm": 4.6781511899293236, "learning_rate": 4.367213861555308e-07, "loss": 0.6672, "step": 26165 }, { "epoch": 2.64, "grad_norm": 5.517593966636261, "learning_rate": 4.355233695684119e-07, "loss": 0.6221, "step": 26170 }, { "epoch": 2.64, "grad_norm": 8.373524161316528, "learning_rate": 4.343269236133413e-07, "loss": 0.6879, "step": 26175 }, { "epoch": 2.64, "grad_norm": 4.720161138755838, "learning_rate": 4.331320487020163e-07, "loss": 0.6572, "step": 26180 }, { "epoch": 2.64, "grad_norm": 4.763630065649678, "learning_rate": 4.319387452455903e-07, "loss": 0.6046, "step": 26185 }, { "epoch": 2.64, "grad_norm": 4.422303645853079, "learning_rate": 4.3074701365468043e-07, "loss": 0.6208, "step": 26190 }, { "epoch": 2.64, "grad_norm": 4.433871515640049, "learning_rate": 4.295568543393591e-07, "loss": 0.6487, "step": 26195 }, { "epoch": 2.64, "grad_norm": 4.931653971987222, "learning_rate": 4.283682677091583e-07, "loss": 0.6361, "step": 26200 }, { "epoch": 2.64, "grad_norm": 4.890247256439932, "learning_rate": 4.271812541730697e-07, "loss": 0.7039, "step": 26205 }, { "epoch": 2.64, "grad_norm": 4.698518626266278, "learning_rate": 4.2599581413954485e-07, "loss": 0.6728, "step": 26210 }, { "epoch": 2.64, "grad_norm": 4.8057777852945, "learning_rate": 4.2481194801649086e-07, "loss": 0.6493, "step": 26215 }, { "epoch": 2.64, "grad_norm": 5.169528262794991, "learning_rate": 4.236296562112768e-07, "loss": 0.6523, "step": 26220 }, { "epoch": 2.64, "grad_norm": 5.521880829571063, "learning_rate": 4.224489391307268e-07, "loss": 0.6345, "step": 26225 }, { "epoch": 2.64, "grad_norm": 5.596943705221121, "learning_rate": 4.212697971811247e-07, "loss": 0.6282, "step": 26230 }, { "epoch": 2.65, "grad_norm": 4.40358921987457, "learning_rate": 4.200922307682115e-07, "loss": 0.6475, "step": 26235 }, { "epoch": 2.65, "grad_norm": 4.424249521185416, "learning_rate": 4.1891624029718856e-07, "loss": 0.6432, "step": 26240 }, { "epoch": 2.65, "grad_norm": 4.796678850595712, "learning_rate": 4.1774182617271064e-07, "loss": 0.6428, "step": 26245 }, { "epoch": 2.65, "grad_norm": 4.6661345801770056, "learning_rate": 4.165689887988944e-07, "loss": 0.6626, "step": 26250 }, { "epoch": 2.65, "grad_norm": 4.5806291067324105, "learning_rate": 4.1539772857931203e-07, "loss": 0.6459, "step": 26255 }, { "epoch": 2.65, "grad_norm": 4.5650564431500085, "learning_rate": 4.142280459169923e-07, "loss": 0.6337, "step": 26260 }, { "epoch": 2.65, "grad_norm": 4.73604227877278, "learning_rate": 4.130599412144215e-07, "loss": 0.6437, "step": 26265 }, { "epoch": 2.65, "grad_norm": 4.586531802102478, "learning_rate": 4.118934148735437e-07, "loss": 0.653, "step": 26270 }, { "epoch": 2.65, "grad_norm": 5.758452148639693, "learning_rate": 4.107284672957601e-07, "loss": 0.6437, "step": 26275 }, { "epoch": 2.65, "grad_norm": 4.571298067242407, "learning_rate": 4.095650988819272e-07, "loss": 0.6772, "step": 26280 }, { "epoch": 2.65, "grad_norm": 7.556138379592204, "learning_rate": 4.084033100323598e-07, "loss": 0.6234, "step": 26285 }, { "epoch": 2.65, "grad_norm": 5.604219528719242, "learning_rate": 4.072431011468286e-07, "loss": 0.6023, "step": 26290 }, { "epoch": 2.65, "grad_norm": 5.610685075191588, "learning_rate": 4.0608447262455886e-07, "loss": 0.6254, "step": 26295 }, { "epoch": 2.65, "grad_norm": 4.604971165395091, "learning_rate": 4.049274248642343e-07, "loss": 0.6382, "step": 26300 }, { "epoch": 2.65, "grad_norm": 5.805269762146627, "learning_rate": 4.037719582639943e-07, "loss": 0.6471, "step": 26305 }, { "epoch": 2.65, "grad_norm": 5.928338049264352, "learning_rate": 4.02618073221433e-07, "loss": 0.6322, "step": 26310 }, { "epoch": 2.65, "grad_norm": 4.635905096298693, "learning_rate": 4.014657701336028e-07, "loss": 0.6773, "step": 26315 }, { "epoch": 2.65, "grad_norm": 4.460898503134962, "learning_rate": 4.003150493970087e-07, "loss": 0.6585, "step": 26320 }, { "epoch": 2.65, "grad_norm": 4.856638872130659, "learning_rate": 3.9916591140761294e-07, "loss": 0.6428, "step": 26325 }, { "epoch": 2.65, "grad_norm": 4.54106911842793, "learning_rate": 3.98018356560832e-07, "loss": 0.6197, "step": 26330 }, { "epoch": 2.66, "grad_norm": 4.777343218581925, "learning_rate": 3.9687238525153994e-07, "loss": 0.6599, "step": 26335 }, { "epoch": 2.66, "grad_norm": 5.6748165213887125, "learning_rate": 3.9572799787406245e-07, "loss": 0.6462, "step": 26340 }, { "epoch": 2.66, "grad_norm": 4.5916775692590726, "learning_rate": 3.945851948221846e-07, "loss": 0.605, "step": 26345 }, { "epoch": 2.66, "grad_norm": 4.306976299073246, "learning_rate": 3.9344397648914234e-07, "loss": 0.6697, "step": 26350 }, { "epoch": 2.66, "grad_norm": 5.195639071441672, "learning_rate": 3.9230434326762823e-07, "loss": 0.6419, "step": 26355 }, { "epoch": 2.66, "grad_norm": 4.581093545879182, "learning_rate": 3.9116629554978747e-07, "loss": 0.6413, "step": 26360 }, { "epoch": 2.66, "grad_norm": 5.052458529720565, "learning_rate": 3.9002983372722345e-07, "loss": 0.6623, "step": 26365 }, { "epoch": 2.66, "grad_norm": 4.697125648228998, "learning_rate": 3.888949581909901e-07, "loss": 0.6843, "step": 26370 }, { "epoch": 2.66, "grad_norm": 5.660423087556206, "learning_rate": 3.877616693315978e-07, "loss": 0.6547, "step": 26375 }, { "epoch": 2.66, "grad_norm": 4.394527486808878, "learning_rate": 3.8662996753901063e-07, "loss": 0.6739, "step": 26380 }, { "epoch": 2.66, "grad_norm": 4.587506714746173, "learning_rate": 3.8549985320264496e-07, "loss": 0.6577, "step": 26385 }, { "epoch": 2.66, "grad_norm": 4.525985421444638, "learning_rate": 3.8437132671137245e-07, "loss": 0.6433, "step": 26390 }, { "epoch": 2.66, "grad_norm": 5.308533008713368, "learning_rate": 3.8324438845351697e-07, "loss": 0.6337, "step": 26395 }, { "epoch": 2.66, "grad_norm": 4.600283096400497, "learning_rate": 3.8211903881685887e-07, "loss": 0.6838, "step": 26400 }, { "epoch": 2.66, "grad_norm": 4.629575518720822, "learning_rate": 3.8099527818862837e-07, "loss": 0.6116, "step": 26405 }, { "epoch": 2.66, "grad_norm": 4.946398314146315, "learning_rate": 3.7987310695551115e-07, "loss": 0.6687, "step": 26410 }, { "epoch": 2.66, "grad_norm": 4.699259761441962, "learning_rate": 3.787525255036456e-07, "loss": 0.6748, "step": 26415 }, { "epoch": 2.66, "grad_norm": 4.4768450040424606, "learning_rate": 3.7763353421862215e-07, "loss": 0.6404, "step": 26420 }, { "epoch": 2.66, "grad_norm": 4.489871861176078, "learning_rate": 3.7651613348548386e-07, "loss": 0.6138, "step": 26425 }, { "epoch": 2.66, "grad_norm": 4.413401018566667, "learning_rate": 3.7540032368872937e-07, "loss": 0.6692, "step": 26430 }, { "epoch": 2.67, "grad_norm": 4.8938714822113205, "learning_rate": 3.742861052123048e-07, "loss": 0.5991, "step": 26435 }, { "epoch": 2.67, "grad_norm": 5.467851521742816, "learning_rate": 3.7317347843961514e-07, "loss": 0.7053, "step": 26440 }, { "epoch": 2.67, "grad_norm": 4.564001385017399, "learning_rate": 3.7206244375351197e-07, "loss": 0.6543, "step": 26445 }, { "epoch": 2.67, "grad_norm": 5.099093322914692, "learning_rate": 3.7095300153630167e-07, "loss": 0.6714, "step": 26450 }, { "epoch": 2.67, "grad_norm": 4.413616723694407, "learning_rate": 3.6984515216974104e-07, "loss": 0.6774, "step": 26455 }, { "epoch": 2.67, "grad_norm": 4.746017427039399, "learning_rate": 3.687388960350424e-07, "loss": 0.6702, "step": 26460 }, { "epoch": 2.67, "grad_norm": 6.302096269813245, "learning_rate": 3.6763423351286576e-07, "loss": 0.6302, "step": 26465 }, { "epoch": 2.67, "grad_norm": 5.413759822215279, "learning_rate": 3.6653116498332587e-07, "loss": 0.6451, "step": 26470 }, { "epoch": 2.67, "grad_norm": 4.924375370439851, "learning_rate": 3.6542969082598576e-07, "loss": 0.6791, "step": 26475 }, { "epoch": 2.67, "grad_norm": 4.551790902336037, "learning_rate": 3.6432981141986347e-07, "loss": 0.6193, "step": 26480 }, { "epoch": 2.67, "grad_norm": 4.447720030507796, "learning_rate": 3.632315271434239e-07, "loss": 0.6189, "step": 26485 }, { "epoch": 2.67, "grad_norm": 4.80993642009209, "learning_rate": 3.6213483837458873e-07, "loss": 0.6334, "step": 26490 }, { "epoch": 2.67, "grad_norm": 4.8623738488613215, "learning_rate": 3.61039745490725e-07, "loss": 0.6147, "step": 26495 }, { "epoch": 2.67, "grad_norm": 5.022644888678823, "learning_rate": 3.5994624886865445e-07, "loss": 0.697, "step": 26500 }, { "epoch": 2.67, "grad_norm": 12.240058964168647, "learning_rate": 3.588543488846485e-07, "loss": 0.6848, "step": 26505 }, { "epoch": 2.67, "grad_norm": 4.456351689321203, "learning_rate": 3.5776404591442824e-07, "loss": 0.633, "step": 26510 }, { "epoch": 2.67, "grad_norm": 5.244832953200636, "learning_rate": 3.5667534033316466e-07, "loss": 0.6951, "step": 26515 }, { "epoch": 2.67, "grad_norm": 4.539647528153177, "learning_rate": 3.5558823251548304e-07, "loss": 0.6083, "step": 26520 }, { "epoch": 2.67, "grad_norm": 5.573046024457291, "learning_rate": 3.545027228354542e-07, "loss": 0.6366, "step": 26525 }, { "epoch": 2.67, "grad_norm": 4.955392603809859, "learning_rate": 3.534188116666004e-07, "loss": 0.681, "step": 26530 }, { "epoch": 2.68, "grad_norm": 5.170833130307659, "learning_rate": 3.523364993818978e-07, "loss": 0.6342, "step": 26535 }, { "epoch": 2.68, "grad_norm": 4.891291415673024, "learning_rate": 3.512557863537647e-07, "loss": 0.6489, "step": 26540 }, { "epoch": 2.68, "grad_norm": 4.888406857967937, "learning_rate": 3.5017667295407676e-07, "loss": 0.6544, "step": 26545 }, { "epoch": 2.68, "grad_norm": 4.449293637678464, "learning_rate": 3.490991595541532e-07, "loss": 0.6436, "step": 26550 }, { "epoch": 2.68, "grad_norm": 4.652288939825329, "learning_rate": 3.480232465247679e-07, "loss": 0.6586, "step": 26555 }, { "epoch": 2.68, "grad_norm": 6.060881608415734, "learning_rate": 3.469489342361393e-07, "loss": 0.6442, "step": 26560 }, { "epoch": 2.68, "grad_norm": 4.798399988610016, "learning_rate": 3.458762230579388e-07, "loss": 0.6235, "step": 26565 }, { "epoch": 2.68, "grad_norm": 4.838544020817497, "learning_rate": 3.44805113359285e-07, "loss": 0.652, "step": 26570 }, { "epoch": 2.68, "grad_norm": 4.5975411064486185, "learning_rate": 3.4373560550874543e-07, "loss": 0.6477, "step": 26575 }, { "epoch": 2.68, "grad_norm": 5.549271557696654, "learning_rate": 3.426676998743361e-07, "loss": 0.6937, "step": 26580 }, { "epoch": 2.68, "grad_norm": 4.760921452630401, "learning_rate": 3.416013968235238e-07, "loss": 0.6432, "step": 26585 }, { "epoch": 2.68, "grad_norm": 5.4813525599711355, "learning_rate": 3.4053669672322096e-07, "loss": 0.6855, "step": 26590 }, { "epoch": 2.68, "grad_norm": 4.671748925862381, "learning_rate": 3.3947359993979077e-07, "loss": 0.6466, "step": 26595 }, { "epoch": 2.68, "grad_norm": 4.543481218279123, "learning_rate": 3.3841210683904393e-07, "loss": 0.6411, "step": 26600 }, { "epoch": 2.68, "grad_norm": 4.72674022959049, "learning_rate": 3.3735221778623815e-07, "loss": 0.6554, "step": 26605 }, { "epoch": 2.68, "grad_norm": 5.098608945175818, "learning_rate": 3.362939331460807e-07, "loss": 0.6626, "step": 26610 }, { "epoch": 2.68, "grad_norm": 4.787862398042563, "learning_rate": 3.352372532827275e-07, "loss": 0.6539, "step": 26615 }, { "epoch": 2.68, "grad_norm": 4.632119559373765, "learning_rate": 3.341821785597787e-07, "loss": 0.6774, "step": 26620 }, { "epoch": 2.68, "grad_norm": 4.783547536689469, "learning_rate": 3.3312870934028685e-07, "loss": 0.6352, "step": 26625 }, { "epoch": 2.68, "grad_norm": 4.297647085341283, "learning_rate": 3.3207684598674906e-07, "loss": 0.674, "step": 26630 }, { "epoch": 2.69, "grad_norm": 4.711494616548537, "learning_rate": 3.310265888611097e-07, "loss": 0.6873, "step": 26635 }, { "epoch": 2.69, "grad_norm": 5.4983153795010775, "learning_rate": 3.2997793832476146e-07, "loss": 0.6039, "step": 26640 }, { "epoch": 2.69, "grad_norm": 4.69938871439085, "learning_rate": 3.2893089473854447e-07, "loss": 0.6504, "step": 26645 }, { "epoch": 2.69, "grad_norm": 4.5622571590941225, "learning_rate": 3.2788545846274553e-07, "loss": 0.6531, "step": 26650 }, { "epoch": 2.69, "grad_norm": 6.100048240343806, "learning_rate": 3.2684162985709757e-07, "loss": 0.6577, "step": 26655 }, { "epoch": 2.69, "grad_norm": 6.270853011278117, "learning_rate": 3.2579940928078204e-07, "loss": 0.6192, "step": 26660 }, { "epoch": 2.69, "grad_norm": 4.928062910367746, "learning_rate": 3.247587970924243e-07, "loss": 0.6612, "step": 26665 }, { "epoch": 2.69, "grad_norm": 8.925526303808986, "learning_rate": 3.237197936501002e-07, "loss": 0.6387, "step": 26670 }, { "epoch": 2.69, "grad_norm": 6.50319252883846, "learning_rate": 3.226823993113276e-07, "loss": 0.6456, "step": 26675 }, { "epoch": 2.69, "grad_norm": 4.515467389106821, "learning_rate": 3.216466144330749e-07, "loss": 0.6674, "step": 26680 }, { "epoch": 2.69, "grad_norm": 4.683024789220738, "learning_rate": 3.2061243937175304e-07, "loss": 0.6754, "step": 26685 }, { "epoch": 2.69, "grad_norm": 4.525353924162903, "learning_rate": 3.195798744832235e-07, "loss": 0.648, "step": 26690 }, { "epoch": 2.69, "grad_norm": 4.236326022567566, "learning_rate": 3.185489201227865e-07, "loss": 0.6259, "step": 26695 }, { "epoch": 2.69, "grad_norm": 7.172427585267906, "learning_rate": 3.1751957664519606e-07, "loss": 0.6693, "step": 26700 }, { "epoch": 2.69, "grad_norm": 4.397112012371753, "learning_rate": 3.164918444046461e-07, "loss": 0.6263, "step": 26705 }, { "epoch": 2.69, "grad_norm": 4.769481924908539, "learning_rate": 3.154657237547798e-07, "loss": 0.6951, "step": 26710 }, { "epoch": 2.69, "grad_norm": 4.80030276412543, "learning_rate": 3.144412150486831e-07, "loss": 0.6363, "step": 26715 }, { "epoch": 2.69, "grad_norm": 4.923919053517498, "learning_rate": 3.134183186388906e-07, "loss": 0.7015, "step": 26720 }, { "epoch": 2.69, "grad_norm": 4.634432469691726, "learning_rate": 3.1239703487737696e-07, "loss": 0.6231, "step": 26725 }, { "epoch": 2.69, "grad_norm": 4.6519209114755435, "learning_rate": 3.1137736411556705e-07, "loss": 0.6219, "step": 26730 }, { "epoch": 2.7, "grad_norm": 4.644362142363348, "learning_rate": 3.103593067043276e-07, "loss": 0.6476, "step": 26735 }, { "epoch": 2.7, "grad_norm": 5.0411318413340815, "learning_rate": 3.093428629939721e-07, "loss": 0.6551, "step": 26740 }, { "epoch": 2.7, "grad_norm": 5.615899235277585, "learning_rate": 3.0832803333425643e-07, "loss": 0.6275, "step": 26745 }, { "epoch": 2.7, "grad_norm": 5.031842335224812, "learning_rate": 3.0731481807438513e-07, "loss": 0.652, "step": 26750 }, { "epoch": 2.7, "grad_norm": 4.7918819848872864, "learning_rate": 3.063032175630015e-07, "loss": 0.6294, "step": 26755 }, { "epoch": 2.7, "grad_norm": 5.49577226983595, "learning_rate": 3.052932321481983e-07, "loss": 0.6443, "step": 26760 }, { "epoch": 2.7, "grad_norm": 5.230227096478425, "learning_rate": 3.0428486217750907e-07, "loss": 0.6361, "step": 26765 }, { "epoch": 2.7, "grad_norm": 4.67422220177396, "learning_rate": 3.032781079979147e-07, "loss": 0.6537, "step": 26770 }, { "epoch": 2.7, "grad_norm": 5.363855352539699, "learning_rate": 3.0227296995583797e-07, "loss": 0.6151, "step": 26775 }, { "epoch": 2.7, "grad_norm": 7.584201273966142, "learning_rate": 3.012694483971446e-07, "loss": 0.6498, "step": 26780 }, { "epoch": 2.7, "grad_norm": 4.896712764016411, "learning_rate": 3.0026754366714703e-07, "loss": 0.6589, "step": 26785 }, { "epoch": 2.7, "grad_norm": 5.134313710305708, "learning_rate": 2.9926725611059747e-07, "loss": 0.6656, "step": 26790 }, { "epoch": 2.7, "grad_norm": 4.870412747383471, "learning_rate": 2.982685860716966e-07, "loss": 0.6446, "step": 26795 }, { "epoch": 2.7, "grad_norm": 4.89087219092221, "learning_rate": 2.9727153389408347e-07, "loss": 0.6327, "step": 26800 }, { "epoch": 2.7, "grad_norm": 4.952435309248969, "learning_rate": 2.962760999208453e-07, "loss": 0.665, "step": 26805 }, { "epoch": 2.7, "grad_norm": 4.448454332529152, "learning_rate": 2.95282284494508e-07, "loss": 0.6392, "step": 26810 }, { "epoch": 2.7, "grad_norm": 5.802846152453651, "learning_rate": 2.9429008795704317e-07, "loss": 0.6402, "step": 26815 }, { "epoch": 2.7, "grad_norm": 5.889305219959239, "learning_rate": 2.932995106498637e-07, "loss": 0.6156, "step": 26820 }, { "epoch": 2.7, "grad_norm": 4.515546156661981, "learning_rate": 2.9231055291382816e-07, "loss": 0.6196, "step": 26825 }, { "epoch": 2.71, "grad_norm": 4.409786442384293, "learning_rate": 2.9132321508923424e-07, "loss": 0.615, "step": 26830 }, { "epoch": 2.71, "grad_norm": 4.82496443011332, "learning_rate": 2.903374975158257e-07, "loss": 0.666, "step": 26835 }, { "epoch": 2.71, "grad_norm": 5.321518340014227, "learning_rate": 2.893534005327858e-07, "loss": 0.6391, "step": 26840 }, { "epoch": 2.71, "grad_norm": 4.6860386188295795, "learning_rate": 2.883709244787414e-07, "loss": 0.6422, "step": 26845 }, { "epoch": 2.71, "grad_norm": 4.54933372316617, "learning_rate": 2.8739006969176087e-07, "loss": 0.6227, "step": 26850 }, { "epoch": 2.71, "grad_norm": 4.550003769397981, "learning_rate": 2.8641083650935765e-07, "loss": 0.6237, "step": 26855 }, { "epoch": 2.71, "grad_norm": 5.100015574627647, "learning_rate": 2.854332252684827e-07, "loss": 0.6396, "step": 26860 }, { "epoch": 2.71, "grad_norm": 4.526484394100703, "learning_rate": 2.844572363055326e-07, "loss": 0.6314, "step": 26865 }, { "epoch": 2.71, "grad_norm": 4.262718390081325, "learning_rate": 2.8348286995634247e-07, "loss": 0.6888, "step": 26870 }, { "epoch": 2.71, "grad_norm": 5.466091994999804, "learning_rate": 2.8251012655619417e-07, "loss": 0.6439, "step": 26875 }, { "epoch": 2.71, "grad_norm": 4.722071904748594, "learning_rate": 2.815390064398038e-07, "loss": 0.6373, "step": 26880 }, { "epoch": 2.71, "grad_norm": 5.302036926701708, "learning_rate": 2.8056950994133524e-07, "loss": 0.6513, "step": 26885 }, { "epoch": 2.71, "grad_norm": 4.580234389013792, "learning_rate": 2.796016373943905e-07, "loss": 0.6607, "step": 26890 }, { "epoch": 2.71, "grad_norm": 5.854842515682165, "learning_rate": 2.786353891320143e-07, "loss": 0.6205, "step": 26895 }, { "epoch": 2.71, "grad_norm": 5.375572061695649, "learning_rate": 2.7767076548669004e-07, "loss": 0.6762, "step": 26900 }, { "epoch": 2.71, "grad_norm": 4.422668495277672, "learning_rate": 2.7670776679034727e-07, "loss": 0.6533, "step": 26905 }, { "epoch": 2.71, "grad_norm": 6.482983874201208, "learning_rate": 2.757463933743493e-07, "loss": 0.6156, "step": 26910 }, { "epoch": 2.71, "grad_norm": 5.274977240043379, "learning_rate": 2.747866455695053e-07, "loss": 0.6541, "step": 26915 }, { "epoch": 2.71, "grad_norm": 4.630084332091941, "learning_rate": 2.7382852370606337e-07, "loss": 0.6466, "step": 26920 }, { "epoch": 2.71, "grad_norm": 4.469241103432817, "learning_rate": 2.7287202811371203e-07, "loss": 0.6348, "step": 26925 }, { "epoch": 2.72, "grad_norm": 6.203696119549017, "learning_rate": 2.7191715912158187e-07, "loss": 0.6029, "step": 26930 }, { "epoch": 2.72, "grad_norm": 5.2311056726551985, "learning_rate": 2.7096391705824067e-07, "loss": 0.6521, "step": 26935 }, { "epoch": 2.72, "grad_norm": 4.831648020213548, "learning_rate": 2.700123022516998e-07, "loss": 0.6684, "step": 26940 }, { "epoch": 2.72, "grad_norm": 4.001125628731334, "learning_rate": 2.6906231502940637e-07, "loss": 0.6459, "step": 26945 }, { "epoch": 2.72, "grad_norm": 6.581463486268398, "learning_rate": 2.6811395571825327e-07, "loss": 0.6374, "step": 26950 }, { "epoch": 2.72, "grad_norm": 4.827504175309236, "learning_rate": 2.671672246445678e-07, "loss": 0.6118, "step": 26955 }, { "epoch": 2.72, "grad_norm": 6.577015648069526, "learning_rate": 2.662221221341199e-07, "loss": 0.6472, "step": 26960 }, { "epoch": 2.72, "grad_norm": 5.8216784748881185, "learning_rate": 2.652786485121195e-07, "loss": 0.6551, "step": 26965 }, { "epoch": 2.72, "grad_norm": 4.206714745623948, "learning_rate": 2.643368041032135e-07, "loss": 0.6614, "step": 26970 }, { "epoch": 2.72, "grad_norm": 4.9204799966670425, "learning_rate": 2.6339658923148934e-07, "loss": 0.65, "step": 26975 }, { "epoch": 2.72, "grad_norm": 5.502378422522532, "learning_rate": 2.624580042204755e-07, "loss": 0.5966, "step": 26980 }, { "epoch": 2.72, "grad_norm": 4.841075778399853, "learning_rate": 2.615210493931369e-07, "loss": 0.6508, "step": 26985 }, { "epoch": 2.72, "grad_norm": 5.457993744591424, "learning_rate": 2.605857250718802e-07, "loss": 0.6604, "step": 26990 }, { "epoch": 2.72, "grad_norm": 5.407642117421727, "learning_rate": 2.596520315785489e-07, "loss": 0.6132, "step": 26995 }, { "epoch": 2.72, "grad_norm": 4.586783087603529, "learning_rate": 2.5871996923442555e-07, "loss": 0.6758, "step": 27000 }, { "epoch": 2.72, "grad_norm": 5.3373244259396495, "learning_rate": 2.5778953836023247e-07, "loss": 0.6464, "step": 27005 }, { "epoch": 2.72, "grad_norm": 4.6943349890765145, "learning_rate": 2.5686073927612964e-07, "loss": 0.6582, "step": 27010 }, { "epoch": 2.72, "grad_norm": 5.550383608738202, "learning_rate": 2.5593357230171645e-07, "loss": 0.6317, "step": 27015 }, { "epoch": 2.72, "grad_norm": 5.585911485678205, "learning_rate": 2.550080377560299e-07, "loss": 0.6652, "step": 27020 }, { "epoch": 2.72, "grad_norm": 4.611901149160534, "learning_rate": 2.540841359575458e-07, "loss": 0.6434, "step": 27025 }, { "epoch": 2.73, "grad_norm": 6.226867509992674, "learning_rate": 2.5316186722417756e-07, "loss": 0.6384, "step": 27030 }, { "epoch": 2.73, "grad_norm": 4.889591104976683, "learning_rate": 2.5224123187327753e-07, "loss": 0.6282, "step": 27035 }, { "epoch": 2.73, "grad_norm": 5.138212027862173, "learning_rate": 2.5132223022163395e-07, "loss": 0.6599, "step": 27040 }, { "epoch": 2.73, "grad_norm": 4.678815642637734, "learning_rate": 2.5040486258547603e-07, "loss": 0.6327, "step": 27045 }, { "epoch": 2.73, "grad_norm": 4.569598167796238, "learning_rate": 2.4948912928046797e-07, "loss": 0.6298, "step": 27050 }, { "epoch": 2.73, "grad_norm": 5.7706974750619535, "learning_rate": 2.4857503062171386e-07, "loss": 0.6319, "step": 27055 }, { "epoch": 2.73, "grad_norm": 6.3280113977072965, "learning_rate": 2.4766256692375367e-07, "loss": 0.6304, "step": 27060 }, { "epoch": 2.73, "grad_norm": 5.076679797067638, "learning_rate": 2.467517385005652e-07, "loss": 0.6282, "step": 27065 }, { "epoch": 2.73, "grad_norm": 5.1371346360423, "learning_rate": 2.4584254566556276e-07, "loss": 0.618, "step": 27070 }, { "epoch": 2.73, "grad_norm": 4.86654090029525, "learning_rate": 2.4493498873160046e-07, "loss": 0.6152, "step": 27075 }, { "epoch": 2.73, "grad_norm": 4.641826973860905, "learning_rate": 2.440290680109664e-07, "loss": 0.6607, "step": 27080 }, { "epoch": 2.73, "grad_norm": 4.154592587557687, "learning_rate": 2.4312478381538775e-07, "loss": 0.6139, "step": 27085 }, { "epoch": 2.73, "grad_norm": 4.593874514370757, "learning_rate": 2.422221364560279e-07, "loss": 0.6748, "step": 27090 }, { "epoch": 2.73, "grad_norm": 4.655599640633073, "learning_rate": 2.413211262434867e-07, "loss": 0.6744, "step": 27095 }, { "epoch": 2.73, "grad_norm": 4.777183373107945, "learning_rate": 2.404217534878006e-07, "loss": 0.6518, "step": 27100 }, { "epoch": 2.73, "grad_norm": 4.5609775207126, "learning_rate": 2.395240184984432e-07, "loss": 0.6391, "step": 27105 }, { "epoch": 2.73, "grad_norm": 4.5354817359318975, "learning_rate": 2.3862792158432403e-07, "loss": 0.6396, "step": 27110 }, { "epoch": 2.73, "grad_norm": 4.840120970779168, "learning_rate": 2.377334630537903e-07, "loss": 0.6534, "step": 27115 }, { "epoch": 2.73, "grad_norm": 4.754107720175892, "learning_rate": 2.3684064321462309e-07, "loss": 0.6446, "step": 27120 }, { "epoch": 2.73, "grad_norm": 4.446069918589517, "learning_rate": 2.3594946237404104e-07, "loss": 0.6295, "step": 27125 }, { "epoch": 2.74, "grad_norm": 5.89894032512749, "learning_rate": 2.3505992083869832e-07, "loss": 0.641, "step": 27130 }, { "epoch": 2.74, "grad_norm": 4.866888053961888, "learning_rate": 2.3417201891468677e-07, "loss": 0.6797, "step": 27135 }, { "epoch": 2.74, "grad_norm": 5.704806075752119, "learning_rate": 2.332857569075303e-07, "loss": 0.6402, "step": 27140 }, { "epoch": 2.74, "grad_norm": 4.795521106834384, "learning_rate": 2.3240113512219332e-07, "loss": 0.6492, "step": 27145 }, { "epoch": 2.74, "grad_norm": 4.505870048445661, "learning_rate": 2.3151815386307175e-07, "loss": 0.6641, "step": 27150 }, { "epoch": 2.74, "grad_norm": 4.294647387657272, "learning_rate": 2.3063681343399923e-07, "loss": 0.6221, "step": 27155 }, { "epoch": 2.74, "grad_norm": 4.379296908401169, "learning_rate": 2.297571141382443e-07, "loss": 0.6102, "step": 27160 }, { "epoch": 2.74, "grad_norm": 6.542098819508065, "learning_rate": 2.2887905627850926e-07, "loss": 0.6615, "step": 27165 }, { "epoch": 2.74, "grad_norm": 4.604680790205558, "learning_rate": 2.2800264015693464e-07, "loss": 0.6596, "step": 27170 }, { "epoch": 2.74, "grad_norm": 4.17649037933092, "learning_rate": 2.2712786607509308e-07, "loss": 0.624, "step": 27175 }, { "epoch": 2.74, "grad_norm": 4.553300967953392, "learning_rate": 2.262547343339949e-07, "loss": 0.6386, "step": 27180 }, { "epoch": 2.74, "grad_norm": 4.575406320081005, "learning_rate": 2.2538324523408318e-07, "loss": 0.6476, "step": 27185 }, { "epoch": 2.74, "grad_norm": 4.569759907083859, "learning_rate": 2.2451339907523684e-07, "loss": 0.6641, "step": 27190 }, { "epoch": 2.74, "grad_norm": 4.901676613919331, "learning_rate": 2.236451961567676e-07, "loss": 0.6235, "step": 27195 }, { "epoch": 2.74, "grad_norm": 4.735981562262451, "learning_rate": 2.2277863677742538e-07, "loss": 0.635, "step": 27200 }, { "epoch": 2.74, "grad_norm": 5.4045401013505705, "learning_rate": 2.2191372123539057e-07, "loss": 0.622, "step": 27205 }, { "epoch": 2.74, "grad_norm": 4.5875845615749595, "learning_rate": 2.2105044982828173e-07, "loss": 0.6534, "step": 27210 }, { "epoch": 2.74, "grad_norm": 5.110692783266679, "learning_rate": 2.2018882285314848e-07, "loss": 0.6655, "step": 27215 }, { "epoch": 2.74, "grad_norm": 4.163729086764986, "learning_rate": 2.1932884060647585e-07, "loss": 0.6263, "step": 27220 }, { "epoch": 2.74, "grad_norm": 5.04769979373338, "learning_rate": 2.1847050338418275e-07, "loss": 0.6169, "step": 27225 }, { "epoch": 2.75, "grad_norm": 5.0579975466535645, "learning_rate": 2.1761381148162286e-07, "loss": 0.6224, "step": 27230 }, { "epoch": 2.75, "grad_norm": 5.477945345762049, "learning_rate": 2.1675876519358264e-07, "loss": 0.631, "step": 27235 }, { "epoch": 2.75, "grad_norm": 4.502067958512376, "learning_rate": 2.159053648142828e-07, "loss": 0.6526, "step": 27240 }, { "epoch": 2.75, "grad_norm": 4.645772831828346, "learning_rate": 2.1505361063737795e-07, "loss": 0.6892, "step": 27245 }, { "epoch": 2.75, "grad_norm": 4.7562112410395585, "learning_rate": 2.1420350295595527e-07, "loss": 0.6234, "step": 27250 }, { "epoch": 2.75, "grad_norm": 4.715893602665484, "learning_rate": 2.1335504206253577e-07, "loss": 0.6438, "step": 27255 }, { "epoch": 2.75, "grad_norm": 4.788625339216968, "learning_rate": 2.1250822824907535e-07, "loss": 0.6887, "step": 27260 }, { "epoch": 2.75, "grad_norm": 5.742321681343637, "learning_rate": 2.116630618069604e-07, "loss": 0.6219, "step": 27265 }, { "epoch": 2.75, "grad_norm": 5.879362925821528, "learning_rate": 2.108195430270127e-07, "loss": 0.6526, "step": 27270 }, { "epoch": 2.75, "grad_norm": 5.045335088378256, "learning_rate": 2.0997767219948616e-07, "loss": 0.6327, "step": 27275 }, { "epoch": 2.75, "grad_norm": 5.009295957562433, "learning_rate": 2.09137449614068e-07, "loss": 0.6294, "step": 27280 }, { "epoch": 2.75, "grad_norm": 4.434427769144761, "learning_rate": 2.082988755598764e-07, "loss": 0.6671, "step": 27285 }, { "epoch": 2.75, "grad_norm": 4.990377036828536, "learning_rate": 2.0746195032546657e-07, "loss": 0.6371, "step": 27290 }, { "epoch": 2.75, "grad_norm": 4.797263478041765, "learning_rate": 2.0662667419882155e-07, "loss": 0.6445, "step": 27295 }, { "epoch": 2.75, "grad_norm": 5.001224407816903, "learning_rate": 2.0579304746735917e-07, "loss": 0.6829, "step": 27300 }, { "epoch": 2.75, "grad_norm": 5.193277092738359, "learning_rate": 2.049610704179311e-07, "loss": 0.6176, "step": 27305 }, { "epoch": 2.75, "grad_norm": 5.041487298048559, "learning_rate": 2.0413074333681893e-07, "loss": 0.6126, "step": 27310 }, { "epoch": 2.75, "grad_norm": 7.129523974749876, "learning_rate": 2.0330206650973683e-07, "loss": 0.6541, "step": 27315 }, { "epoch": 2.75, "grad_norm": 4.825415696213447, "learning_rate": 2.024750402218323e-07, "loss": 0.6706, "step": 27320 }, { "epoch": 2.75, "grad_norm": 6.6591871746914695, "learning_rate": 2.0164966475768432e-07, "loss": 0.6622, "step": 27325 }, { "epoch": 2.76, "grad_norm": 6.145263719644995, "learning_rate": 2.0082594040130353e-07, "loss": 0.635, "step": 27330 }, { "epoch": 2.76, "grad_norm": 4.636647280013002, "learning_rate": 2.000038674361332e-07, "loss": 0.6217, "step": 27335 }, { "epoch": 2.76, "grad_norm": 4.4739024748437695, "learning_rate": 1.9918344614504814e-07, "loss": 0.6514, "step": 27340 }, { "epoch": 2.76, "grad_norm": 5.0127000776778585, "learning_rate": 1.9836467681035365e-07, "loss": 0.6368, "step": 27345 }, { "epoch": 2.76, "grad_norm": 4.906114484345132, "learning_rate": 1.975475597137866e-07, "loss": 0.6439, "step": 27350 }, { "epoch": 2.76, "grad_norm": 4.916876018127332, "learning_rate": 1.967320951365187e-07, "loss": 0.6158, "step": 27355 }, { "epoch": 2.76, "grad_norm": 4.487110739372675, "learning_rate": 1.959182833591483e-07, "loss": 0.6318, "step": 27360 }, { "epoch": 2.76, "grad_norm": 4.636937516626221, "learning_rate": 1.951061246617092e-07, "loss": 0.6577, "step": 27365 }, { "epoch": 2.76, "grad_norm": 5.932916527673636, "learning_rate": 1.942956193236628e-07, "loss": 0.6169, "step": 27370 }, { "epoch": 2.76, "grad_norm": 4.89725898238156, "learning_rate": 1.934867676239044e-07, "loss": 0.6313, "step": 27375 }, { "epoch": 2.76, "grad_norm": 4.572865175199781, "learning_rate": 1.926795698407574e-07, "loss": 0.6603, "step": 27380 }, { "epoch": 2.76, "grad_norm": 4.9158419371051005, "learning_rate": 1.918740262519797e-07, "loss": 0.625, "step": 27385 }, { "epoch": 2.76, "grad_norm": 4.900828600336452, "learning_rate": 1.9107013713475674e-07, "loss": 0.6687, "step": 27390 }, { "epoch": 2.76, "grad_norm": 5.230901880339584, "learning_rate": 1.9026790276570728e-07, "loss": 0.6119, "step": 27395 }, { "epoch": 2.76, "grad_norm": 4.609157007664113, "learning_rate": 1.8946732342087825e-07, "loss": 0.6388, "step": 27400 }, { "epoch": 2.76, "grad_norm": 4.811633485820632, "learning_rate": 1.8866839937574876e-07, "loss": 0.6433, "step": 27405 }, { "epoch": 2.76, "grad_norm": 4.5620610799291015, "learning_rate": 1.8787113090522723e-07, "loss": 0.611, "step": 27410 }, { "epoch": 2.76, "grad_norm": 5.112562968547693, "learning_rate": 1.8707551828365368e-07, "loss": 0.6381, "step": 27415 }, { "epoch": 2.76, "grad_norm": 4.52468498356504, "learning_rate": 1.8628156178479794e-07, "loss": 0.6312, "step": 27420 }, { "epoch": 2.77, "grad_norm": 5.258889515886162, "learning_rate": 1.854892616818582e-07, "loss": 0.6233, "step": 27425 }, { "epoch": 2.77, "grad_norm": 6.628910817758399, "learning_rate": 1.8469861824746583e-07, "loss": 0.651, "step": 27430 }, { "epoch": 2.77, "grad_norm": 4.163731183357664, "learning_rate": 1.8390963175367983e-07, "loss": 0.6219, "step": 27435 }, { "epoch": 2.77, "grad_norm": 4.51936940513308, "learning_rate": 1.831223024719897e-07, "loss": 0.6593, "step": 27440 }, { "epoch": 2.77, "grad_norm": 4.8868413077096315, "learning_rate": 1.823366306733143e-07, "loss": 0.6509, "step": 27445 }, { "epoch": 2.77, "grad_norm": 4.137439186382453, "learning_rate": 1.8155261662800295e-07, "loss": 0.6079, "step": 27450 }, { "epoch": 2.77, "grad_norm": 6.193024837189629, "learning_rate": 1.8077026060583369e-07, "loss": 0.6733, "step": 27455 }, { "epoch": 2.77, "grad_norm": 4.470445592225861, "learning_rate": 1.7998956287601565e-07, "loss": 0.6343, "step": 27460 }, { "epoch": 2.77, "grad_norm": 4.9839207959687695, "learning_rate": 1.7921052370718505e-07, "loss": 0.6557, "step": 27465 }, { "epoch": 2.77, "grad_norm": 5.966319709791478, "learning_rate": 1.7843314336740913e-07, "loss": 0.6608, "step": 27470 }, { "epoch": 2.77, "grad_norm": 4.612224889221658, "learning_rate": 1.776574221241828e-07, "loss": 0.6375, "step": 27475 }, { "epoch": 2.77, "grad_norm": 5.1061979664003685, "learning_rate": 1.7688336024443197e-07, "loss": 0.6297, "step": 27480 }, { "epoch": 2.77, "grad_norm": 4.620897017042376, "learning_rate": 1.7611095799450973e-07, "loss": 0.6672, "step": 27485 }, { "epoch": 2.77, "grad_norm": 5.325307617224155, "learning_rate": 1.753402156402001e-07, "loss": 0.6339, "step": 27490 }, { "epoch": 2.77, "grad_norm": 5.107182911004926, "learning_rate": 1.7457113344671484e-07, "loss": 0.6398, "step": 27495 }, { "epoch": 2.77, "grad_norm": 6.10444693123381, "learning_rate": 1.7380371167869282e-07, "loss": 0.6638, "step": 27500 }, { "epoch": 2.77, "grad_norm": 4.388963100605161, "learning_rate": 1.730379506002039e-07, "loss": 0.6182, "step": 27505 }, { "epoch": 2.77, "grad_norm": 4.527852561965197, "learning_rate": 1.7227385047474676e-07, "loss": 0.6453, "step": 27510 }, { "epoch": 2.77, "grad_norm": 4.947131925278648, "learning_rate": 1.7151141156524554e-07, "loss": 0.6488, "step": 27515 }, { "epoch": 2.77, "grad_norm": 4.888298137702055, "learning_rate": 1.70750634134057e-07, "loss": 0.6201, "step": 27520 }, { "epoch": 2.78, "grad_norm": 5.068734448012588, "learning_rate": 1.6999151844296237e-07, "loss": 0.6648, "step": 27525 }, { "epoch": 2.78, "grad_norm": 5.719138525994361, "learning_rate": 1.6923406475317316e-07, "loss": 0.6649, "step": 27530 }, { "epoch": 2.78, "grad_norm": 5.014641777763894, "learning_rate": 1.684782733253276e-07, "loss": 0.645, "step": 27535 }, { "epoch": 2.78, "grad_norm": 4.405259394411325, "learning_rate": 1.6772414441949436e-07, "loss": 0.5895, "step": 27540 }, { "epoch": 2.78, "grad_norm": 4.434647710812157, "learning_rate": 1.6697167829516747e-07, "loss": 0.6287, "step": 27545 }, { "epoch": 2.78, "grad_norm": 4.570729319226043, "learning_rate": 1.6622087521126983e-07, "loss": 0.6165, "step": 27550 }, { "epoch": 2.78, "grad_norm": 4.708876473272213, "learning_rate": 1.6547173542615258e-07, "loss": 0.6634, "step": 27555 }, { "epoch": 2.78, "grad_norm": 5.443803641000985, "learning_rate": 1.6472425919759338e-07, "loss": 0.6735, "step": 27560 }, { "epoch": 2.78, "grad_norm": 5.503813881852303, "learning_rate": 1.6397844678279872e-07, "loss": 0.6524, "step": 27565 }, { "epoch": 2.78, "grad_norm": 5.262393504748039, "learning_rate": 1.632342984384011e-07, "loss": 0.6522, "step": 27570 }, { "epoch": 2.78, "grad_norm": 5.533398944217278, "learning_rate": 1.6249181442046235e-07, "loss": 0.6556, "step": 27575 }, { "epoch": 2.78, "grad_norm": 4.584461282149826, "learning_rate": 1.6175099498446866e-07, "loss": 0.6376, "step": 27580 }, { "epoch": 2.78, "grad_norm": 4.740322548336197, "learning_rate": 1.610118403853378e-07, "loss": 0.659, "step": 27585 }, { "epoch": 2.78, "grad_norm": 4.585743047346483, "learning_rate": 1.6027435087741072e-07, "loss": 0.6057, "step": 27590 }, { "epoch": 2.78, "grad_norm": 6.2909688966884225, "learning_rate": 1.5953852671445668e-07, "loss": 0.654, "step": 27595 }, { "epoch": 2.78, "grad_norm": 4.97205188031535, "learning_rate": 1.5880436814967258e-07, "loss": 0.627, "step": 27600 }, { "epoch": 2.78, "grad_norm": 4.519227048096504, "learning_rate": 1.5807187543568136e-07, "loss": 0.6219, "step": 27605 }, { "epoch": 2.78, "grad_norm": 4.361661472488166, "learning_rate": 1.5734104882453305e-07, "loss": 0.6861, "step": 27610 }, { "epoch": 2.78, "grad_norm": 4.8666409971276074, "learning_rate": 1.5661188856770537e-07, "loss": 0.6247, "step": 27615 }, { "epoch": 2.78, "grad_norm": 4.717116455709111, "learning_rate": 1.558843949161004e-07, "loss": 0.6532, "step": 27620 }, { "epoch": 2.79, "grad_norm": 4.630714814407945, "learning_rate": 1.5515856812004849e-07, "loss": 0.6157, "step": 27625 }, { "epoch": 2.79, "grad_norm": 5.28299970158232, "learning_rate": 1.544344084293059e-07, "loss": 0.6272, "step": 27630 }, { "epoch": 2.79, "grad_norm": 6.239301778257726, "learning_rate": 1.5371191609305502e-07, "loss": 0.6476, "step": 27635 }, { "epoch": 2.79, "grad_norm": 5.132571940847922, "learning_rate": 1.5299109135990531e-07, "loss": 0.6757, "step": 27640 }, { "epoch": 2.79, "grad_norm": 6.163828640077864, "learning_rate": 1.5227193447789167e-07, "loss": 0.646, "step": 27645 }, { "epoch": 2.79, "grad_norm": 5.623353690086022, "learning_rate": 1.5155444569447565e-07, "loss": 0.6297, "step": 27650 }, { "epoch": 2.79, "grad_norm": 4.618225671423743, "learning_rate": 1.5083862525654413e-07, "loss": 0.6644, "step": 27655 }, { "epoch": 2.79, "grad_norm": 4.8193840512997435, "learning_rate": 1.5012447341040903e-07, "loss": 0.6378, "step": 27660 }, { "epoch": 2.79, "grad_norm": 4.872251860166495, "learning_rate": 1.4941199040181152e-07, "loss": 0.6605, "step": 27665 }, { "epoch": 2.79, "grad_norm": 4.521733108340862, "learning_rate": 1.4870117647591441e-07, "loss": 0.6715, "step": 27670 }, { "epoch": 2.79, "grad_norm": 5.034286716147694, "learning_rate": 1.4799203187730982e-07, "loss": 0.6315, "step": 27675 }, { "epoch": 2.79, "grad_norm": 4.99830347398244, "learning_rate": 1.4728455685001253e-07, "loss": 0.6192, "step": 27680 }, { "epoch": 2.79, "grad_norm": 5.923759561043896, "learning_rate": 1.4657875163746448e-07, "loss": 0.6361, "step": 27685 }, { "epoch": 2.79, "grad_norm": 4.4891986428328545, "learning_rate": 1.4587461648253253e-07, "loss": 0.631, "step": 27690 }, { "epoch": 2.79, "grad_norm": 4.620859160067768, "learning_rate": 1.4517215162750841e-07, "loss": 0.626, "step": 27695 }, { "epoch": 2.79, "grad_norm": 4.350124688452219, "learning_rate": 1.4447135731411044e-07, "loss": 0.6229, "step": 27700 }, { "epoch": 2.79, "grad_norm": 5.222093872008338, "learning_rate": 1.4377223378348014e-07, "loss": 0.6814, "step": 27705 }, { "epoch": 2.79, "grad_norm": 4.804943687592125, "learning_rate": 1.430747812761868e-07, "loss": 0.6751, "step": 27710 }, { "epoch": 2.79, "grad_norm": 4.434005038612656, "learning_rate": 1.4237900003222172e-07, "loss": 0.6487, "step": 27715 }, { "epoch": 2.79, "grad_norm": 4.983682819390938, "learning_rate": 1.4168489029100395e-07, "loss": 0.6641, "step": 27720 }, { "epoch": 2.8, "grad_norm": 4.826412766166672, "learning_rate": 1.4099245229137414e-07, "loss": 0.6253, "step": 27725 }, { "epoch": 2.8, "grad_norm": 4.98851863031606, "learning_rate": 1.4030168627160112e-07, "loss": 0.6159, "step": 27730 }, { "epoch": 2.8, "grad_norm": 4.966095621552294, "learning_rate": 1.3961259246937642e-07, "loss": 0.6586, "step": 27735 }, { "epoch": 2.8, "grad_norm": 5.077498218523122, "learning_rate": 1.3892517112181646e-07, "loss": 0.6666, "step": 27740 }, { "epoch": 2.8, "grad_norm": 4.4972011043076145, "learning_rate": 1.3823942246546207e-07, "loss": 0.6735, "step": 27745 }, { "epoch": 2.8, "grad_norm": 4.6509709640658645, "learning_rate": 1.3755534673627947e-07, "loss": 0.6801, "step": 27750 }, { "epoch": 2.8, "grad_norm": 4.454353433000559, "learning_rate": 1.36872944169657e-07, "loss": 0.6128, "step": 27755 }, { "epoch": 2.8, "grad_norm": 5.563784938049496, "learning_rate": 1.361922150004097e-07, "loss": 0.6344, "step": 27760 }, { "epoch": 2.8, "grad_norm": 4.688473347214066, "learning_rate": 1.355131594627762e-07, "loss": 0.6378, "step": 27765 }, { "epoch": 2.8, "grad_norm": 4.675427454072095, "learning_rate": 1.3483577779041802e-07, "loss": 0.6397, "step": 27770 }, { "epoch": 2.8, "grad_norm": 5.560500501070449, "learning_rate": 1.3416007021642252e-07, "loss": 0.6303, "step": 27775 }, { "epoch": 2.8, "grad_norm": 5.279496533965027, "learning_rate": 1.334860369732993e-07, "loss": 0.6404, "step": 27780 }, { "epoch": 2.8, "grad_norm": 4.514573270455693, "learning_rate": 1.328136782929823e-07, "loss": 0.6456, "step": 27785 }, { "epoch": 2.8, "grad_norm": 4.712080775356427, "learning_rate": 1.3214299440683032e-07, "loss": 0.6441, "step": 27790 }, { "epoch": 2.8, "grad_norm": 5.380594002720372, "learning_rate": 1.3147398554562374e-07, "loss": 0.6455, "step": 27795 }, { "epoch": 2.8, "grad_norm": 4.474777394755265, "learning_rate": 1.3080665193956954e-07, "loss": 0.6088, "step": 27800 }, { "epoch": 2.8, "grad_norm": 5.393544527385068, "learning_rate": 1.301409938182968e-07, "loss": 0.6282, "step": 27805 }, { "epoch": 2.8, "grad_norm": 4.875236489949824, "learning_rate": 1.294770114108551e-07, "loss": 0.6368, "step": 27810 }, { "epoch": 2.8, "grad_norm": 6.864909001772833, "learning_rate": 1.2881470494572278e-07, "loss": 0.6636, "step": 27815 }, { "epoch": 2.8, "grad_norm": 5.021612888517245, "learning_rate": 1.2815407465079754e-07, "loss": 0.6444, "step": 27820 }, { "epoch": 2.81, "grad_norm": 7.051741591293166, "learning_rate": 1.2749512075340198e-07, "loss": 0.6464, "step": 27825 }, { "epoch": 2.81, "grad_norm": 5.710481817137641, "learning_rate": 1.268378434802814e-07, "loss": 0.6608, "step": 27830 }, { "epoch": 2.81, "grad_norm": 4.617873854790824, "learning_rate": 1.2618224305760596e-07, "loss": 0.6828, "step": 27835 }, { "epoch": 2.81, "grad_norm": 4.886618090636221, "learning_rate": 1.2552831971096412e-07, "loss": 0.6615, "step": 27840 }, { "epoch": 2.81, "grad_norm": 4.696371454416478, "learning_rate": 1.2487607366537258e-07, "loss": 0.6118, "step": 27845 }, { "epoch": 2.81, "grad_norm": 5.019821337329065, "learning_rate": 1.2422550514526678e-07, "loss": 0.6458, "step": 27850 }, { "epoch": 2.81, "grad_norm": 4.665430937369177, "learning_rate": 1.2357661437450873e-07, "loss": 0.6386, "step": 27855 }, { "epoch": 2.81, "grad_norm": 4.63196248195062, "learning_rate": 1.229294015763799e-07, "loss": 0.6203, "step": 27860 }, { "epoch": 2.81, "grad_norm": 4.628889587290443, "learning_rate": 1.2228386697358653e-07, "loss": 0.6239, "step": 27865 }, { "epoch": 2.81, "grad_norm": 4.365112314017564, "learning_rate": 1.2164001078825482e-07, "loss": 0.623, "step": 27870 }, { "epoch": 2.81, "grad_norm": 5.258002898744488, "learning_rate": 1.2099783324193647e-07, "loss": 0.6571, "step": 27875 }, { "epoch": 2.81, "grad_norm": 4.669424922639722, "learning_rate": 1.2035733455560305e-07, "loss": 0.6891, "step": 27880 }, { "epoch": 2.81, "grad_norm": 4.869084130597134, "learning_rate": 1.1971851494965104e-07, "loss": 0.6252, "step": 27885 }, { "epoch": 2.81, "grad_norm": 14.23946930823488, "learning_rate": 1.1908137464389625e-07, "loss": 0.651, "step": 27890 }, { "epoch": 2.81, "grad_norm": 6.59501403796409, "learning_rate": 1.1844591385757942e-07, "loss": 0.6481, "step": 27895 }, { "epoch": 2.81, "grad_norm": 4.9948682409578025, "learning_rate": 1.178121328093601e-07, "loss": 0.6465, "step": 27900 }, { "epoch": 2.81, "grad_norm": 4.395043259501831, "learning_rate": 1.1718003171732384e-07, "loss": 0.6535, "step": 27905 }, { "epoch": 2.81, "grad_norm": 5.313318406915275, "learning_rate": 1.1654961079897442e-07, "loss": 0.6322, "step": 27910 }, { "epoch": 2.81, "grad_norm": 5.613602971142326, "learning_rate": 1.1592087027123999e-07, "loss": 0.6677, "step": 27915 }, { "epoch": 2.81, "grad_norm": 5.073924506007503, "learning_rate": 1.152938103504686e-07, "loss": 0.6293, "step": 27920 }, { "epoch": 2.82, "grad_norm": 4.616028361075712, "learning_rate": 1.1466843125243322e-07, "loss": 0.638, "step": 27925 }, { "epoch": 2.82, "grad_norm": 5.381443254499906, "learning_rate": 1.140447331923239e-07, "loss": 0.6057, "step": 27930 }, { "epoch": 2.82, "grad_norm": 4.644498220335952, "learning_rate": 1.1342271638475455e-07, "loss": 0.6175, "step": 27935 }, { "epoch": 2.82, "grad_norm": 5.249951639838429, "learning_rate": 1.1280238104376173e-07, "loss": 0.6687, "step": 27940 }, { "epoch": 2.82, "grad_norm": 4.78029838029608, "learning_rate": 1.1218372738280137e-07, "loss": 0.6233, "step": 27945 }, { "epoch": 2.82, "grad_norm": 4.657499611806018, "learning_rate": 1.115667556147526e-07, "loss": 0.652, "step": 27950 }, { "epoch": 2.82, "grad_norm": 5.236393726741324, "learning_rate": 1.1095146595191397e-07, "loss": 0.6533, "step": 27955 }, { "epoch": 2.82, "grad_norm": 4.643644972620957, "learning_rate": 1.1033785860600666e-07, "loss": 0.6154, "step": 27960 }, { "epoch": 2.82, "grad_norm": 4.558633798199325, "learning_rate": 1.0972593378817176e-07, "loss": 0.6651, "step": 27965 }, { "epoch": 2.82, "grad_norm": 4.432431102445073, "learning_rate": 1.0911569170897252e-07, "loss": 0.6423, "step": 27970 }, { "epoch": 2.82, "grad_norm": 5.460147093023334, "learning_rate": 1.0850713257839152e-07, "loss": 0.6485, "step": 27975 }, { "epoch": 2.82, "grad_norm": 5.0266464235297015, "learning_rate": 1.0790025660583514e-07, "loss": 0.6477, "step": 27980 }, { "epoch": 2.82, "grad_norm": 4.702953698317971, "learning_rate": 1.0729506400012801e-07, "loss": 0.6596, "step": 27985 }, { "epoch": 2.82, "grad_norm": 4.1933909947174675, "learning_rate": 1.0669155496951633e-07, "loss": 0.5747, "step": 27990 }, { "epoch": 2.82, "grad_norm": 4.7820636291726055, "learning_rate": 1.0608972972166675e-07, "loss": 0.6255, "step": 27995 }, { "epoch": 2.82, "grad_norm": 5.026350823898908, "learning_rate": 1.0548958846366697e-07, "loss": 0.6381, "step": 28000 }, { "epoch": 2.82, "grad_norm": 4.826278656901001, "learning_rate": 1.0489113140202456e-07, "loss": 0.6491, "step": 28005 }, { "epoch": 2.82, "grad_norm": 5.047374508540337, "learning_rate": 1.0429435874266869e-07, "loss": 0.6206, "step": 28010 }, { "epoch": 2.82, "grad_norm": 5.188749544431611, "learning_rate": 1.0369927069094788e-07, "loss": 0.6763, "step": 28015 }, { "epoch": 2.83, "grad_norm": 5.4120748291853085, "learning_rate": 1.0310586745163276e-07, "loss": 0.6866, "step": 28020 }, { "epoch": 2.83, "grad_norm": 5.1115261193344494, "learning_rate": 1.0251414922891e-07, "loss": 0.6488, "step": 28025 }, { "epoch": 2.83, "grad_norm": 4.17494029759684, "learning_rate": 1.0192411622639175e-07, "loss": 0.596, "step": 28030 }, { "epoch": 2.83, "grad_norm": 4.61738215353897, "learning_rate": 1.0133576864710671e-07, "loss": 0.6573, "step": 28035 }, { "epoch": 2.83, "grad_norm": 4.420087106860671, "learning_rate": 1.0074910669350457e-07, "loss": 0.6262, "step": 28040 }, { "epoch": 2.83, "grad_norm": 4.711847300503816, "learning_rate": 1.0016413056745555e-07, "loss": 0.6349, "step": 28045 }, { "epoch": 2.83, "grad_norm": 4.972521433716798, "learning_rate": 9.95808404702503e-08, "loss": 0.6547, "step": 28050 }, { "epoch": 2.83, "grad_norm": 5.537633311092984, "learning_rate": 9.899923660259658e-08, "loss": 0.6912, "step": 28055 }, { "epoch": 2.83, "grad_norm": 5.051293484949038, "learning_rate": 9.841931916462433e-08, "loss": 0.6236, "step": 28060 }, { "epoch": 2.83, "grad_norm": 5.493774218309113, "learning_rate": 9.784108835588335e-08, "loss": 0.6791, "step": 28065 }, { "epoch": 2.83, "grad_norm": 4.576787830353115, "learning_rate": 9.726454437534116e-08, "loss": 0.6131, "step": 28070 }, { "epoch": 2.83, "grad_norm": 4.630404316217037, "learning_rate": 9.668968742138741e-08, "loss": 0.6616, "step": 28075 }, { "epoch": 2.83, "grad_norm": 4.891299088255928, "learning_rate": 9.611651769182828e-08, "loss": 0.6426, "step": 28080 }, { "epoch": 2.83, "grad_norm": 4.5114788096308756, "learning_rate": 9.554503538389215e-08, "loss": 0.6562, "step": 28085 }, { "epoch": 2.83, "grad_norm": 6.711102801133464, "learning_rate": 9.497524069422448e-08, "loss": 0.6548, "step": 28090 }, { "epoch": 2.83, "grad_norm": 5.066223190114222, "learning_rate": 9.440713381889233e-08, "loss": 0.6809, "step": 28095 }, { "epoch": 2.83, "grad_norm": 4.979158338870474, "learning_rate": 9.384071495337932e-08, "loss": 0.6261, "step": 28100 }, { "epoch": 2.83, "grad_norm": 4.64668267133173, "learning_rate": 9.327598429259122e-08, "loss": 0.6389, "step": 28105 }, { "epoch": 2.83, "grad_norm": 6.083600469147071, "learning_rate": 9.271294203085035e-08, "loss": 0.6395, "step": 28110 }, { "epoch": 2.83, "grad_norm": 4.465458441123022, "learning_rate": 9.215158836189897e-08, "loss": 0.6326, "step": 28115 }, { "epoch": 2.84, "grad_norm": 6.267730702068969, "learning_rate": 9.159192347889755e-08, "loss": 0.6629, "step": 28120 }, { "epoch": 2.84, "grad_norm": 5.391159402020457, "learning_rate": 9.103394757442818e-08, "loss": 0.6537, "step": 28125 }, { "epoch": 2.84, "grad_norm": 4.377867762700203, "learning_rate": 9.047766084048837e-08, "loss": 0.6667, "step": 28130 }, { "epoch": 2.84, "grad_norm": 5.430428637787642, "learning_rate": 8.992306346849721e-08, "loss": 0.6451, "step": 28135 }, { "epoch": 2.84, "grad_norm": 4.971622244790864, "learning_rate": 8.93701556492893e-08, "loss": 0.6494, "step": 28140 }, { "epoch": 2.84, "grad_norm": 4.812306458518884, "learning_rate": 8.881893757312132e-08, "loss": 0.6946, "step": 28145 }, { "epoch": 2.84, "grad_norm": 4.785301173240359, "learning_rate": 8.826940942966544e-08, "loss": 0.6429, "step": 28150 }, { "epoch": 2.84, "grad_norm": 5.8853191729513386, "learning_rate": 8.77215714080154e-08, "loss": 0.6692, "step": 28155 }, { "epoch": 2.84, "grad_norm": 5.361968654665912, "learning_rate": 8.717542369667986e-08, "loss": 0.6632, "step": 28160 }, { "epoch": 2.84, "grad_norm": 4.786993702451831, "learning_rate": 8.66309664835896e-08, "loss": 0.6265, "step": 28165 }, { "epoch": 2.84, "grad_norm": 4.65748116787266, "learning_rate": 8.608819995609085e-08, "loss": 0.6647, "step": 28170 }, { "epoch": 2.84, "grad_norm": 4.762331116389162, "learning_rate": 8.554712430094925e-08, "loss": 0.672, "step": 28175 }, { "epoch": 2.84, "grad_norm": 4.6782388279918115, "learning_rate": 8.50077397043475e-08, "loss": 0.6628, "step": 28180 }, { "epoch": 2.84, "grad_norm": 4.888639645100182, "learning_rate": 8.447004635188882e-08, "loss": 0.638, "step": 28185 }, { "epoch": 2.84, "grad_norm": 4.782015532131001, "learning_rate": 8.393404442859243e-08, "loss": 0.6418, "step": 28190 }, { "epoch": 2.84, "grad_norm": 4.625853443548703, "learning_rate": 8.33997341188958e-08, "loss": 0.6081, "step": 28195 }, { "epoch": 2.84, "grad_norm": 4.898338864474646, "learning_rate": 8.286711560665461e-08, "loss": 0.6188, "step": 28200 }, { "epoch": 2.84, "grad_norm": 5.4494813409120475, "learning_rate": 8.233618907514285e-08, "loss": 0.6067, "step": 28205 }, { "epoch": 2.84, "grad_norm": 4.97554379414742, "learning_rate": 8.180695470705157e-08, "loss": 0.6264, "step": 28210 }, { "epoch": 2.84, "grad_norm": 4.384806368994531, "learning_rate": 8.127941268448903e-08, "loss": 0.6668, "step": 28215 }, { "epoch": 2.85, "grad_norm": 4.543114379222686, "learning_rate": 8.075356318898331e-08, "loss": 0.6087, "step": 28220 }, { "epoch": 2.85, "grad_norm": 4.960937328784922, "learning_rate": 8.022940640147803e-08, "loss": 0.657, "step": 28225 }, { "epoch": 2.85, "grad_norm": 4.8604583820643255, "learning_rate": 7.970694250233502e-08, "loss": 0.6145, "step": 28230 }, { "epoch": 2.85, "grad_norm": 5.0096396603393805, "learning_rate": 7.91861716713338e-08, "loss": 0.6769, "step": 28235 }, { "epoch": 2.85, "grad_norm": 4.488894932987289, "learning_rate": 7.866709408767104e-08, "loss": 0.6487, "step": 28240 }, { "epoch": 2.85, "grad_norm": 5.000269938677299, "learning_rate": 7.814970992995996e-08, "loss": 0.6416, "step": 28245 }, { "epoch": 2.85, "grad_norm": 5.439658402094098, "learning_rate": 7.763401937623371e-08, "loss": 0.66, "step": 28250 }, { "epoch": 2.85, "grad_norm": 4.211970429252045, "learning_rate": 7.712002260393925e-08, "loss": 0.5953, "step": 28255 }, { "epoch": 2.85, "grad_norm": 5.0036940064815605, "learning_rate": 7.660771978994397e-08, "loss": 0.6751, "step": 28260 }, { "epoch": 2.85, "grad_norm": 5.004269407828965, "learning_rate": 7.609711111052964e-08, "loss": 0.6117, "step": 28265 }, { "epoch": 2.85, "grad_norm": 4.73802167825799, "learning_rate": 7.558819674139628e-08, "loss": 0.625, "step": 28270 }, { "epoch": 2.85, "grad_norm": 4.513531142950939, "learning_rate": 7.508097685766103e-08, "loss": 0.6253, "step": 28275 }, { "epoch": 2.85, "grad_norm": 4.782433144091685, "learning_rate": 7.457545163385815e-08, "loss": 0.6081, "step": 28280 }, { "epoch": 2.85, "grad_norm": 4.122275711517098, "learning_rate": 7.407162124393741e-08, "loss": 0.6561, "step": 28285 }, { "epoch": 2.85, "grad_norm": 4.99388262921669, "learning_rate": 7.356948586126789e-08, "loss": 0.6557, "step": 28290 }, { "epoch": 2.85, "grad_norm": 4.40740378136391, "learning_rate": 7.306904565863249e-08, "loss": 0.6308, "step": 28295 }, { "epoch": 2.85, "grad_norm": 4.894685108940628, "learning_rate": 7.25703008082329e-08, "loss": 0.6651, "step": 28300 }, { "epoch": 2.85, "grad_norm": 4.512397581048909, "learning_rate": 7.207325148168631e-08, "loss": 0.6553, "step": 28305 }, { "epoch": 2.85, "grad_norm": 4.426837513909502, "learning_rate": 7.1577897850027e-08, "loss": 0.6654, "step": 28310 }, { "epoch": 2.85, "grad_norm": 4.644005315701208, "learning_rate": 7.10842400837064e-08, "loss": 0.6559, "step": 28315 }, { "epoch": 2.86, "grad_norm": 4.608825459085924, "learning_rate": 7.059227835259086e-08, "loss": 0.6582, "step": 28320 }, { "epoch": 2.86, "grad_norm": 4.95551288211116, "learning_rate": 7.010201282596385e-08, "loss": 0.6505, "step": 28325 }, { "epoch": 2.86, "grad_norm": 6.002840384245793, "learning_rate": 6.961344367252654e-08, "loss": 0.6185, "step": 28330 }, { "epoch": 2.86, "grad_norm": 6.739543685579368, "learning_rate": 6.912657106039389e-08, "loss": 0.6302, "step": 28335 }, { "epoch": 2.86, "grad_norm": 4.368697153210344, "learning_rate": 6.8641395157098e-08, "loss": 0.656, "step": 28340 }, { "epoch": 2.86, "grad_norm": 5.085692854459775, "learning_rate": 6.815791612958866e-08, "loss": 0.6457, "step": 28345 }, { "epoch": 2.86, "grad_norm": 4.445170781602102, "learning_rate": 6.767613414422946e-08, "loss": 0.6764, "step": 28350 }, { "epoch": 2.86, "grad_norm": 4.554420320419269, "learning_rate": 6.719604936680168e-08, "loss": 0.6405, "step": 28355 }, { "epoch": 2.86, "grad_norm": 4.608501096926762, "learning_rate": 6.671766196250207e-08, "loss": 0.6301, "step": 28360 }, { "epoch": 2.86, "grad_norm": 4.375838707436465, "learning_rate": 6.624097209594338e-08, "loss": 0.6133, "step": 28365 }, { "epoch": 2.86, "grad_norm": 4.5510601164594044, "learning_rate": 6.57659799311533e-08, "loss": 0.648, "step": 28370 }, { "epoch": 2.86, "grad_norm": 4.507057003173995, "learning_rate": 6.529268563157775e-08, "loss": 0.6167, "step": 28375 }, { "epoch": 2.86, "grad_norm": 4.530000484593963, "learning_rate": 6.48210893600748e-08, "loss": 0.6149, "step": 28380 }, { "epoch": 2.86, "grad_norm": 4.561958919883844, "learning_rate": 6.435119127892187e-08, "loss": 0.6024, "step": 28385 }, { "epoch": 2.86, "grad_norm": 5.857815718909283, "learning_rate": 6.388299154981014e-08, "loss": 0.6474, "step": 28390 }, { "epoch": 2.86, "grad_norm": 4.530181675162402, "learning_rate": 6.341649033384633e-08, "loss": 0.6614, "step": 28395 }, { "epoch": 2.86, "grad_norm": 5.168044989441414, "learning_rate": 6.295168779155315e-08, "loss": 0.6458, "step": 28400 }, { "epoch": 2.86, "grad_norm": 4.6901341662647695, "learning_rate": 6.248858408286872e-08, "loss": 0.6399, "step": 28405 }, { "epoch": 2.86, "grad_norm": 4.4105278572727125, "learning_rate": 6.202717936714675e-08, "loss": 0.6443, "step": 28410 }, { "epoch": 2.86, "grad_norm": 4.419717470241641, "learning_rate": 6.15674738031563e-08, "loss": 0.6177, "step": 28415 }, { "epoch": 2.87, "grad_norm": 4.984848875763577, "learning_rate": 6.110946754908087e-08, "loss": 0.6572, "step": 28420 }, { "epoch": 2.87, "grad_norm": 5.196230562299611, "learning_rate": 6.065316076252103e-08, "loss": 0.6326, "step": 28425 }, { "epoch": 2.87, "grad_norm": 4.6588927974984395, "learning_rate": 6.01985536004901e-08, "loss": 0.6516, "step": 28430 }, { "epoch": 2.87, "grad_norm": 4.515798517057653, "learning_rate": 5.974564621941958e-08, "loss": 0.6212, "step": 28435 }, { "epoch": 2.87, "grad_norm": 4.920040257974786, "learning_rate": 5.929443877515317e-08, "loss": 0.6606, "step": 28440 }, { "epoch": 2.87, "grad_norm": 5.288818752985311, "learning_rate": 5.884493142295222e-08, "loss": 0.6362, "step": 28445 }, { "epoch": 2.87, "grad_norm": 4.6413709322516885, "learning_rate": 5.839712431749078e-08, "loss": 0.5756, "step": 28450 }, { "epoch": 2.87, "grad_norm": 4.351117395647321, "learning_rate": 5.7951017612858926e-08, "loss": 0.6283, "step": 28455 }, { "epoch": 2.87, "grad_norm": 4.561415313136726, "learning_rate": 5.750661146256165e-08, "loss": 0.6549, "step": 28460 }, { "epoch": 2.87, "grad_norm": 5.008280245016394, "learning_rate": 5.7063906019518277e-08, "loss": 0.6482, "step": 28465 }, { "epoch": 2.87, "grad_norm": 4.273730171002147, "learning_rate": 5.6622901436064746e-08, "loss": 0.5978, "step": 28470 }, { "epoch": 2.87, "grad_norm": 4.5930350497445405, "learning_rate": 5.6183597863948555e-08, "loss": 0.6405, "step": 28475 }, { "epoch": 2.87, "grad_norm": 5.114048875812452, "learning_rate": 5.57459954543349e-08, "loss": 0.6446, "step": 28480 }, { "epoch": 2.87, "grad_norm": 5.072325169261744, "learning_rate": 5.531009435780166e-08, "loss": 0.6248, "step": 28485 }, { "epoch": 2.87, "grad_norm": 4.572562477929448, "learning_rate": 5.487589472434274e-08, "loss": 0.6274, "step": 28490 }, { "epoch": 2.87, "grad_norm": 6.4104757417857945, "learning_rate": 5.444339670336474e-08, "loss": 0.6602, "step": 28495 }, { "epoch": 2.87, "grad_norm": 4.555281937287776, "learning_rate": 5.401260044369028e-08, "loss": 0.6843, "step": 28500 }, { "epoch": 2.87, "grad_norm": 4.332120683708192, "learning_rate": 5.358350609355634e-08, "loss": 0.6547, "step": 28505 }, { "epoch": 2.87, "grad_norm": 5.392595167719035, "learning_rate": 5.31561138006137e-08, "loss": 0.6257, "step": 28510 }, { "epoch": 2.87, "grad_norm": 5.111162902561142, "learning_rate": 5.2730423711927495e-08, "loss": 0.6605, "step": 28515 }, { "epoch": 2.88, "grad_norm": 5.485432804363226, "learning_rate": 5.230643597397722e-08, "loss": 0.6624, "step": 28520 }, { "epoch": 2.88, "grad_norm": 4.467263676798901, "learning_rate": 5.1884150732656737e-08, "loss": 0.6322, "step": 28525 }, { "epoch": 2.88, "grad_norm": 5.378733829067483, "learning_rate": 5.146356813327369e-08, "loss": 0.6203, "step": 28530 }, { "epoch": 2.88, "grad_norm": 4.93470561841416, "learning_rate": 5.104468832055065e-08, "loss": 0.6239, "step": 28535 }, { "epoch": 2.88, "grad_norm": 5.605602015042057, "learning_rate": 5.062751143862399e-08, "loss": 0.6922, "step": 28540 }, { "epoch": 2.88, "grad_norm": 4.825177520864215, "learning_rate": 5.021203763104332e-08, "loss": 0.6379, "step": 28545 }, { "epoch": 2.88, "grad_norm": 6.085782351837812, "learning_rate": 4.979826704077262e-08, "loss": 0.6138, "step": 28550 }, { "epoch": 2.88, "grad_norm": 6.327960087836945, "learning_rate": 4.9386199810190214e-08, "loss": 0.6185, "step": 28555 }, { "epoch": 2.88, "grad_norm": 4.725564634623508, "learning_rate": 4.8975836081088246e-08, "loss": 0.6457, "step": 28560 }, { "epoch": 2.88, "grad_norm": 5.797051546102331, "learning_rate": 4.856717599467209e-08, "loss": 0.6204, "step": 28565 }, { "epoch": 2.88, "grad_norm": 4.555268789277485, "learning_rate": 4.816021969156148e-08, "loss": 0.6455, "step": 28570 }, { "epoch": 2.88, "grad_norm": 4.6302031379590485, "learning_rate": 4.775496731178997e-08, "loss": 0.6175, "step": 28575 }, { "epoch": 2.88, "grad_norm": 4.398912320781518, "learning_rate": 4.735141899480433e-08, "loss": 0.6231, "step": 28580 }, { "epoch": 2.88, "grad_norm": 5.553316084687461, "learning_rate": 4.694957487946517e-08, "loss": 0.6552, "step": 28585 }, { "epoch": 2.88, "grad_norm": 4.666877163122385, "learning_rate": 4.65494351040463e-08, "loss": 0.6706, "step": 28590 }, { "epoch": 2.88, "grad_norm": 4.5545756201040595, "learning_rate": 4.615099980623594e-08, "loss": 0.6599, "step": 28595 }, { "epoch": 2.88, "grad_norm": 5.254769283834533, "learning_rate": 4.575426912313441e-08, "loss": 0.6596, "step": 28600 }, { "epoch": 2.88, "grad_norm": 5.205245943354018, "learning_rate": 4.5359243191258065e-08, "loss": 0.6479, "step": 28605 }, { "epoch": 2.88, "grad_norm": 4.379569371062987, "learning_rate": 4.496592214653317e-08, "loss": 0.6546, "step": 28610 }, { "epoch": 2.89, "grad_norm": 4.586698231517767, "learning_rate": 4.457430612430202e-08, "loss": 0.6606, "step": 28615 }, { "epoch": 2.89, "grad_norm": 4.551421074840228, "learning_rate": 4.4184395259319055e-08, "loss": 0.6152, "step": 28620 }, { "epoch": 2.89, "grad_norm": 5.2776623277684696, "learning_rate": 4.379618968575305e-08, "loss": 0.6255, "step": 28625 }, { "epoch": 2.89, "grad_norm": 5.500571494344823, "learning_rate": 4.340968953718327e-08, "loss": 0.6573, "step": 28630 }, { "epoch": 2.89, "grad_norm": 4.57162691956062, "learning_rate": 4.302489494660611e-08, "loss": 0.6461, "step": 28635 }, { "epoch": 2.89, "grad_norm": 5.088584468734806, "learning_rate": 4.264180604642787e-08, "loss": 0.6568, "step": 28640 }, { "epoch": 2.89, "grad_norm": 4.835489483073978, "learning_rate": 4.226042296846977e-08, "loss": 0.6373, "step": 28645 }, { "epoch": 2.89, "grad_norm": 4.995947787844424, "learning_rate": 4.1880745843964065e-08, "loss": 0.6473, "step": 28650 }, { "epoch": 2.89, "grad_norm": 6.055346263069701, "learning_rate": 4.150277480355902e-08, "loss": 0.6614, "step": 28655 }, { "epoch": 2.89, "grad_norm": 4.192967250079167, "learning_rate": 4.1126509977312266e-08, "loss": 0.5951, "step": 28660 }, { "epoch": 2.89, "grad_norm": 5.1990557125341, "learning_rate": 4.075195149469802e-08, "loss": 0.6582, "step": 28665 }, { "epoch": 2.89, "grad_norm": 4.760477686001706, "learning_rate": 4.0379099484600396e-08, "loss": 0.6702, "step": 28670 }, { "epoch": 2.89, "grad_norm": 4.6232514064036, "learning_rate": 4.000795407531788e-08, "loss": 0.6625, "step": 28675 }, { "epoch": 2.89, "grad_norm": 5.6543014868891825, "learning_rate": 3.963851539456054e-08, "loss": 0.6184, "step": 28680 }, { "epoch": 2.89, "grad_norm": 5.579939773683967, "learning_rate": 3.9270783569452794e-08, "loss": 0.6706, "step": 28685 }, { "epoch": 2.89, "grad_norm": 4.9146514121656235, "learning_rate": 3.89047587265301e-08, "loss": 0.6489, "step": 28690 }, { "epoch": 2.89, "grad_norm": 5.571272333715706, "learning_rate": 3.85404409917417e-08, "loss": 0.6581, "step": 28695 }, { "epoch": 2.89, "grad_norm": 4.444044399986175, "learning_rate": 3.817783049044899e-08, "loss": 0.6373, "step": 28700 }, { "epoch": 2.89, "grad_norm": 4.679007030962427, "learning_rate": 3.781692734742548e-08, "loss": 0.6619, "step": 28705 }, { "epoch": 2.89, "grad_norm": 4.484198835518726, "learning_rate": 3.745773168685796e-08, "loss": 0.654, "step": 28710 }, { "epoch": 2.9, "grad_norm": 4.51583489447258, "learning_rate": 3.7100243632344767e-08, "loss": 0.627, "step": 28715 }, { "epoch": 2.9, "grad_norm": 5.491214712554003, "learning_rate": 3.674446330689807e-08, "loss": 0.6442, "step": 28720 }, { "epoch": 2.9, "grad_norm": 4.871538819942215, "learning_rate": 3.639039083294105e-08, "loss": 0.6495, "step": 28725 }, { "epoch": 2.9, "grad_norm": 4.532573252897236, "learning_rate": 3.603802633230957e-08, "loss": 0.6503, "step": 28730 }, { "epoch": 2.9, "grad_norm": 4.605378611351734, "learning_rate": 3.5687369926252216e-08, "loss": 0.6521, "step": 28735 }, { "epoch": 2.9, "grad_norm": 4.854656221628359, "learning_rate": 3.533842173542967e-08, "loss": 0.6543, "step": 28740 }, { "epoch": 2.9, "grad_norm": 5.566033551316717, "learning_rate": 3.499118187991368e-08, "loss": 0.6348, "step": 28745 }, { "epoch": 2.9, "grad_norm": 4.536806510621569, "learning_rate": 3.464565047919033e-08, "loss": 0.6349, "step": 28750 }, { "epoch": 2.9, "grad_norm": 4.573169257564416, "learning_rate": 3.430182765215617e-08, "loss": 0.6367, "step": 28755 }, { "epoch": 2.9, "grad_norm": 5.124899593147868, "learning_rate": 3.395971351712046e-08, "loss": 0.622, "step": 28760 }, { "epoch": 2.9, "grad_norm": 5.411106375879562, "learning_rate": 3.361930819180404e-08, "loss": 0.6182, "step": 28765 }, { "epoch": 2.9, "grad_norm": 4.701918083291764, "learning_rate": 3.328061179334041e-08, "loss": 0.6617, "step": 28770 }, { "epoch": 2.9, "grad_norm": 4.451394066867098, "learning_rate": 3.294362443827415e-08, "loss": 0.6506, "step": 28775 }, { "epoch": 2.9, "grad_norm": 4.966870524830984, "learning_rate": 3.260834624256304e-08, "loss": 0.6466, "step": 28780 }, { "epoch": 2.9, "grad_norm": 4.952005053396158, "learning_rate": 3.227477732157536e-08, "loss": 0.6261, "step": 28785 }, { "epoch": 2.9, "grad_norm": 4.530753020996644, "learning_rate": 3.1942917790092064e-08, "loss": 0.6375, "step": 28790 }, { "epoch": 2.9, "grad_norm": 4.760861224352346, "learning_rate": 3.1612767762305706e-08, "loss": 0.6499, "step": 28795 }, { "epoch": 2.9, "grad_norm": 4.295427145204775, "learning_rate": 3.1284327351820964e-08, "loss": 0.6451, "step": 28800 }, { "epoch": 2.9, "grad_norm": 4.941272981204676, "learning_rate": 3.095759667165299e-08, "loss": 0.6474, "step": 28805 }, { "epoch": 2.9, "grad_norm": 5.0352796812221525, "learning_rate": 3.0632575834230184e-08, "loss": 0.6505, "step": 28810 }, { "epoch": 2.91, "grad_norm": 4.187736867890282, "learning_rate": 3.030926495139142e-08, "loss": 0.6327, "step": 28815 }, { "epoch": 2.91, "grad_norm": 4.441092784267658, "learning_rate": 2.998766413438881e-08, "loss": 0.6631, "step": 28820 }, { "epoch": 2.91, "grad_norm": 4.575506974996182, "learning_rate": 2.9667773493883833e-08, "loss": 0.6408, "step": 28825 }, { "epoch": 2.91, "grad_norm": 4.9177492504304015, "learning_rate": 2.9349593139950116e-08, "loss": 0.6361, "step": 28830 }, { "epoch": 2.91, "grad_norm": 6.001552291691809, "learning_rate": 2.903312318207452e-08, "loss": 0.6109, "step": 28835 }, { "epoch": 2.91, "grad_norm": 6.813831831113115, "learning_rate": 2.8718363729153264e-08, "loss": 0.5963, "step": 28840 }, { "epoch": 2.91, "grad_norm": 4.465535623922916, "learning_rate": 2.8405314889494718e-08, "loss": 0.6458, "step": 28845 }, { "epoch": 2.91, "grad_norm": 4.728693209432457, "learning_rate": 2.8093976770819377e-08, "loss": 0.6618, "step": 28850 }, { "epoch": 2.91, "grad_norm": 4.665203123879959, "learning_rate": 2.7784349480257654e-08, "loss": 0.6137, "step": 28855 }, { "epoch": 2.91, "grad_norm": 5.069120938493329, "learning_rate": 2.747643312435211e-08, "loss": 0.6607, "step": 28860 }, { "epoch": 2.91, "grad_norm": 5.394533717027899, "learning_rate": 2.717022780905687e-08, "loss": 0.6679, "step": 28865 }, { "epoch": 2.91, "grad_norm": 4.495963076629805, "learning_rate": 2.686573363973599e-08, "loss": 0.6757, "step": 28870 }, { "epoch": 2.91, "grad_norm": 7.649924820960543, "learning_rate": 2.6562950721167324e-08, "loss": 0.6546, "step": 28875 }, { "epoch": 2.91, "grad_norm": 5.541609873383282, "learning_rate": 2.6261879157536417e-08, "loss": 0.6147, "step": 28880 }, { "epoch": 2.91, "grad_norm": 5.56383200283721, "learning_rate": 2.5962519052442627e-08, "loss": 0.6423, "step": 28885 }, { "epoch": 2.91, "grad_norm": 4.566909873140656, "learning_rate": 2.566487050889521e-08, "loss": 0.6306, "step": 28890 }, { "epoch": 2.91, "grad_norm": 5.792659340433287, "learning_rate": 2.536893362931503e-08, "loss": 0.6826, "step": 28895 }, { "epoch": 2.91, "grad_norm": 4.966064284959946, "learning_rate": 2.5074708515532843e-08, "loss": 0.625, "step": 28900 }, { "epoch": 2.91, "grad_norm": 5.277741643462846, "learning_rate": 2.4782195268792663e-08, "loss": 0.662, "step": 28905 }, { "epoch": 2.91, "grad_norm": 4.4850580438407865, "learning_rate": 2.449139398974676e-08, "loss": 0.6188, "step": 28910 }, { "epoch": 2.92, "grad_norm": 4.677554062649509, "learning_rate": 2.4202304778460083e-08, "loss": 0.6213, "step": 28915 }, { "epoch": 2.92, "grad_norm": 4.664419136392676, "learning_rate": 2.3914927734408068e-08, "loss": 0.7018, "step": 28920 }, { "epoch": 2.92, "grad_norm": 4.547867417991105, "learning_rate": 2.3629262956476605e-08, "loss": 0.6345, "step": 28925 }, { "epoch": 2.92, "grad_norm": 4.245861658306631, "learning_rate": 2.334531054296263e-08, "loss": 0.6428, "step": 28930 }, { "epoch": 2.92, "grad_norm": 5.527071288715775, "learning_rate": 2.306307059157409e-08, "loss": 0.6533, "step": 28935 }, { "epoch": 2.92, "grad_norm": 5.01586437245096, "learning_rate": 2.2782543199429407e-08, "loss": 0.6258, "step": 28940 }, { "epoch": 2.92, "grad_norm": 5.136739091567763, "learning_rate": 2.250372846305804e-08, "loss": 0.6246, "step": 28945 }, { "epoch": 2.92, "grad_norm": 4.407009944313833, "learning_rate": 2.222662647839935e-08, "loss": 0.6838, "step": 28950 }, { "epoch": 2.92, "grad_norm": 4.477006030237568, "learning_rate": 2.1951237340804287e-08, "loss": 0.6309, "step": 28955 }, { "epoch": 2.92, "grad_norm": 4.92534364353323, "learning_rate": 2.1677561145034275e-08, "loss": 0.6635, "step": 28960 }, { "epoch": 2.92, "grad_norm": 5.0919001765583545, "learning_rate": 2.1405597985260096e-08, "loss": 0.656, "step": 28965 }, { "epoch": 2.92, "grad_norm": 4.620281001158985, "learning_rate": 2.1135347955064666e-08, "loss": 0.6142, "step": 28970 }, { "epoch": 2.92, "grad_norm": 4.52863045661348, "learning_rate": 2.0866811147440825e-08, "loss": 0.6213, "step": 28975 }, { "epoch": 2.92, "grad_norm": 4.328487665223623, "learning_rate": 2.0599987654791877e-08, "loss": 0.6211, "step": 28980 }, { "epoch": 2.92, "grad_norm": 4.585527272224497, "learning_rate": 2.0334877568930488e-08, "loss": 0.6704, "step": 28985 }, { "epoch": 2.92, "grad_norm": 4.881744284413331, "learning_rate": 2.0071480981082025e-08, "loss": 0.6598, "step": 28990 }, { "epoch": 2.92, "grad_norm": 5.006635771364114, "learning_rate": 1.980979798188065e-08, "loss": 0.5994, "step": 28995 }, { "epoch": 2.92, "grad_norm": 5.228311640387504, "learning_rate": 1.9549828661371562e-08, "loss": 0.6527, "step": 29000 }, { "epoch": 2.92, "grad_norm": 5.266788354535082, "learning_rate": 1.9291573109008754e-08, "loss": 0.6247, "step": 29005 }, { "epoch": 2.92, "grad_norm": 4.670303037452094, "learning_rate": 1.903503141365892e-08, "loss": 0.6328, "step": 29010 }, { "epoch": 2.93, "grad_norm": 5.461366194656828, "learning_rate": 1.8780203663597562e-08, "loss": 0.6534, "step": 29015 }, { "epoch": 2.93, "grad_norm": 4.666554491042867, "learning_rate": 1.852708994651009e-08, "loss": 0.6293, "step": 29020 }, { "epoch": 2.93, "grad_norm": 4.600295377045515, "learning_rate": 1.82756903494935e-08, "loss": 0.6302, "step": 29025 }, { "epoch": 2.93, "grad_norm": 5.060027863989442, "learning_rate": 1.80260049590536e-08, "loss": 0.6439, "step": 29030 }, { "epoch": 2.93, "grad_norm": 4.7525193226534554, "learning_rate": 1.7778033861106657e-08, "loss": 0.6362, "step": 29035 }, { "epoch": 2.93, "grad_norm": 5.346645034935819, "learning_rate": 1.7531777140980534e-08, "loss": 0.6455, "step": 29040 }, { "epoch": 2.93, "grad_norm": 4.530727983211217, "learning_rate": 1.7287234883410775e-08, "loss": 0.6448, "step": 29045 }, { "epoch": 2.93, "grad_norm": 4.409577499950719, "learning_rate": 1.7044407172543964e-08, "loss": 0.6295, "step": 29050 }, { "epoch": 2.93, "grad_norm": 4.738767443957085, "learning_rate": 1.6803294091937707e-08, "loss": 0.6209, "step": 29055 }, { "epoch": 2.93, "grad_norm": 4.795117614097678, "learning_rate": 1.6563895724558966e-08, "loss": 0.6724, "step": 29060 }, { "epoch": 2.93, "grad_norm": 4.948788391002489, "learning_rate": 1.632621215278296e-08, "loss": 0.6555, "step": 29065 }, { "epoch": 2.93, "grad_norm": 5.1906257152362905, "learning_rate": 1.609024345839816e-08, "loss": 0.6347, "step": 29070 }, { "epoch": 2.93, "grad_norm": 4.519593142681205, "learning_rate": 1.5855989722600164e-08, "loss": 0.6477, "step": 29075 }, { "epoch": 2.93, "grad_norm": 7.336439870075627, "learning_rate": 1.5623451025995605e-08, "loss": 0.6656, "step": 29080 }, { "epoch": 2.93, "grad_norm": 5.152827841994058, "learning_rate": 1.5392627448601038e-08, "loss": 0.6836, "step": 29085 }, { "epoch": 2.93, "grad_norm": 4.919461978854965, "learning_rate": 1.5163519069842924e-08, "loss": 0.641, "step": 29090 }, { "epoch": 2.93, "grad_norm": 5.28278762714753, "learning_rate": 1.4936125968555982e-08, "loss": 0.6402, "step": 29095 }, { "epoch": 2.93, "grad_norm": 4.664234099410667, "learning_rate": 1.4710448222987617e-08, "loss": 0.6693, "step": 29100 }, { "epoch": 2.93, "grad_norm": 4.5214278499396485, "learning_rate": 1.448648591079238e-08, "loss": 0.6272, "step": 29105 }, { "epoch": 2.93, "grad_norm": 5.322543422565335, "learning_rate": 1.4264239109035293e-08, "loss": 0.6445, "step": 29110 }, { "epoch": 2.94, "grad_norm": 5.001090059520327, "learning_rate": 1.4043707894191294e-08, "loss": 0.6431, "step": 29115 }, { "epoch": 2.94, "grad_norm": 4.858119709583894, "learning_rate": 1.382489234214579e-08, "loss": 0.6427, "step": 29120 }, { "epoch": 2.94, "grad_norm": 4.12636538178491, "learning_rate": 1.3607792528192443e-08, "loss": 0.664, "step": 29125 }, { "epoch": 2.94, "grad_norm": 4.381062848659886, "learning_rate": 1.3392408527034828e-08, "loss": 0.6266, "step": 29130 }, { "epoch": 2.94, "grad_norm": 4.636254099359382, "learning_rate": 1.3178740412786995e-08, "loss": 0.6124, "step": 29135 }, { "epoch": 2.94, "grad_norm": 5.89566771162359, "learning_rate": 1.2966788258971797e-08, "loss": 0.6728, "step": 29140 }, { "epoch": 2.94, "grad_norm": 4.962272202172932, "learning_rate": 1.275655213852145e-08, "loss": 0.6557, "step": 29145 }, { "epoch": 2.94, "grad_norm": 4.58680564099952, "learning_rate": 1.2548032123777531e-08, "loss": 0.6365, "step": 29150 }, { "epoch": 2.94, "grad_norm": 4.482177696174969, "learning_rate": 1.23412282864932e-08, "loss": 0.6259, "step": 29155 }, { "epoch": 2.94, "grad_norm": 4.556075353290765, "learning_rate": 1.2136140697827647e-08, "loss": 0.6113, "step": 29160 }, { "epoch": 2.94, "grad_norm": 4.9236493920375715, "learning_rate": 1.1932769428352753e-08, "loss": 0.6693, "step": 29165 }, { "epoch": 2.94, "grad_norm": 4.378265065373981, "learning_rate": 1.1731114548046984e-08, "loss": 0.6324, "step": 29170 }, { "epoch": 2.94, "grad_norm": 5.684460127715717, "learning_rate": 1.1531176126300948e-08, "loss": 0.6659, "step": 29175 }, { "epoch": 2.94, "grad_norm": 4.441044263152972, "learning_rate": 1.1332954231912385e-08, "loss": 0.656, "step": 29180 }, { "epoch": 2.94, "grad_norm": 5.058056139113183, "learning_rate": 1.1136448933090072e-08, "loss": 0.6624, "step": 29185 }, { "epoch": 2.94, "grad_norm": 6.786763547952368, "learning_rate": 1.0941660297449919e-08, "loss": 0.64, "step": 29190 }, { "epoch": 2.94, "grad_norm": 4.766601665819574, "learning_rate": 1.0748588392019976e-08, "loss": 0.655, "step": 29195 }, { "epoch": 2.94, "grad_norm": 4.627987229644329, "learning_rate": 1.0557233283235436e-08, "loss": 0.6978, "step": 29200 }, { "epoch": 2.94, "grad_norm": 5.066294768413905, "learning_rate": 1.0367595036941402e-08, "loss": 0.6804, "step": 29205 }, { "epoch": 2.95, "grad_norm": 5.36893000113714, "learning_rate": 1.0179673718392347e-08, "loss": 0.6507, "step": 29210 }, { "epoch": 2.95, "grad_norm": 4.440729392593793, "learning_rate": 9.993469392251542e-09, "loss": 0.621, "step": 29215 }, { "epoch": 2.95, "grad_norm": 5.157361517178861, "learning_rate": 9.808982122591626e-09, "loss": 0.6287, "step": 29220 }, { "epoch": 2.95, "grad_norm": 5.203885568995115, "learning_rate": 9.62621197289515e-09, "loss": 0.6518, "step": 29225 }, { "epoch": 2.95, "grad_norm": 4.6360921751758095, "learning_rate": 9.445159006052918e-09, "loss": 0.6565, "step": 29230 }, { "epoch": 2.95, "grad_norm": 4.43492102869121, "learning_rate": 9.26582328436454e-09, "loss": 0.6023, "step": 29235 }, { "epoch": 2.95, "grad_norm": 4.347866074081188, "learning_rate": 9.088204869540096e-09, "loss": 0.6087, "step": 29240 }, { "epoch": 2.95, "grad_norm": 4.382927803473537, "learning_rate": 8.912303822697366e-09, "loss": 0.6657, "step": 29245 }, { "epoch": 2.95, "grad_norm": 5.906534231293559, "learning_rate": 8.738120204363487e-09, "loss": 0.6746, "step": 29250 }, { "epoch": 2.95, "grad_norm": 4.626001112094817, "learning_rate": 8.565654074476071e-09, "loss": 0.6432, "step": 29255 }, { "epoch": 2.95, "grad_norm": 4.613228889261147, "learning_rate": 8.394905492378758e-09, "loss": 0.6669, "step": 29260 }, { "epoch": 2.95, "grad_norm": 4.950407492252555, "learning_rate": 8.225874516827325e-09, "loss": 0.6557, "step": 29265 }, { "epoch": 2.95, "grad_norm": 4.8617479981023735, "learning_rate": 8.05856120598525e-09, "loss": 0.64, "step": 29270 }, { "epoch": 2.95, "grad_norm": 5.431349888918077, "learning_rate": 7.892965617423698e-09, "loss": 0.6653, "step": 29275 }, { "epoch": 2.95, "grad_norm": 4.570237562146757, "learning_rate": 7.729087808124868e-09, "loss": 0.6714, "step": 29280 }, { "epoch": 2.95, "grad_norm": 4.504146337990504, "learning_rate": 7.56692783447921e-09, "loss": 0.6527, "step": 29285 }, { "epoch": 2.95, "grad_norm": 4.851906925953283, "learning_rate": 7.406485752284864e-09, "loss": 0.6225, "step": 29290 }, { "epoch": 2.95, "grad_norm": 5.8402884513015625, "learning_rate": 7.247761616750449e-09, "loss": 0.6469, "step": 29295 }, { "epoch": 2.95, "grad_norm": 4.4351542588978035, "learning_rate": 7.0907554824922734e-09, "loss": 0.6303, "step": 29300 }, { "epoch": 2.95, "grad_norm": 4.64055372052512, "learning_rate": 6.935467403536567e-09, "loss": 0.6229, "step": 29305 }, { "epoch": 2.96, "grad_norm": 4.894591370143973, "learning_rate": 6.781897433317808e-09, "loss": 0.7172, "step": 29310 }, { "epoch": 2.96, "grad_norm": 4.798009572789954, "learning_rate": 6.630045624678727e-09, "loss": 0.6643, "step": 29315 }, { "epoch": 2.96, "grad_norm": 4.366269183216473, "learning_rate": 6.479912029872526e-09, "loss": 0.6645, "step": 29320 }, { "epoch": 2.96, "grad_norm": 5.12951703426055, "learning_rate": 6.331496700558437e-09, "loss": 0.6074, "step": 29325 }, { "epoch": 2.96, "grad_norm": 4.854777420180117, "learning_rate": 6.1847996878072745e-09, "loss": 0.5839, "step": 29330 }, { "epoch": 2.96, "grad_norm": 4.656105622679166, "learning_rate": 6.039821042096439e-09, "loss": 0.6215, "step": 29335 }, { "epoch": 2.96, "grad_norm": 5.39358766372186, "learning_rate": 5.896560813313801e-09, "loss": 0.6232, "step": 29340 }, { "epoch": 2.96, "grad_norm": 4.391871972235465, "learning_rate": 5.755019050754373e-09, "loss": 0.6248, "step": 29345 }, { "epoch": 2.96, "grad_norm": 5.191262489667674, "learning_rate": 5.6151958031230855e-09, "loss": 0.6542, "step": 29350 }, { "epoch": 2.96, "grad_norm": 4.596109269185588, "learning_rate": 5.477091118532562e-09, "loss": 0.637, "step": 29355 }, { "epoch": 2.96, "grad_norm": 4.630244059530306, "learning_rate": 5.34070504450479e-09, "loss": 0.6157, "step": 29360 }, { "epoch": 2.96, "grad_norm": 4.727987744073469, "learning_rate": 5.2060376279700065e-09, "loss": 0.6494, "step": 29365 }, { "epoch": 2.96, "grad_norm": 4.508158304269665, "learning_rate": 5.073088915267255e-09, "loss": 0.647, "step": 29370 }, { "epoch": 2.96, "grad_norm": 4.8063412094916815, "learning_rate": 4.941858952143275e-09, "loss": 0.6464, "step": 29375 }, { "epoch": 2.96, "grad_norm": 4.577388459120201, "learning_rate": 4.812347783755278e-09, "loss": 0.6468, "step": 29380 }, { "epoch": 2.96, "grad_norm": 5.693548063025151, "learning_rate": 4.6845554546676164e-09, "loss": 0.6603, "step": 29385 }, { "epoch": 2.96, "grad_norm": 4.680480106578478, "learning_rate": 4.558482008852894e-09, "loss": 0.6685, "step": 29390 }, { "epoch": 2.96, "grad_norm": 4.4956655850957965, "learning_rate": 4.434127489693629e-09, "loss": 0.6422, "step": 29395 }, { "epoch": 2.96, "grad_norm": 4.334691883331565, "learning_rate": 4.311491939979484e-09, "loss": 0.666, "step": 29400 }, { "epoch": 2.96, "grad_norm": 4.838608087764427, "learning_rate": 4.190575401910036e-09, "loss": 0.661, "step": 29405 }, { "epoch": 2.97, "grad_norm": 7.5438973303204495, "learning_rate": 4.071377917091446e-09, "loss": 0.7081, "step": 29410 }, { "epoch": 2.97, "grad_norm": 4.435445651075055, "learning_rate": 3.95389952654035e-09, "loss": 0.6411, "step": 29415 }, { "epoch": 2.97, "grad_norm": 4.562288771628546, "learning_rate": 3.838140270680524e-09, "loss": 0.638, "step": 29420 }, { "epoch": 2.97, "grad_norm": 6.483748229835458, "learning_rate": 3.7241001893451035e-09, "loss": 0.6452, "step": 29425 }, { "epoch": 2.97, "grad_norm": 4.857673991541536, "learning_rate": 3.611779321774367e-09, "loss": 0.6119, "step": 29430 }, { "epoch": 2.97, "grad_norm": 5.737987373281746, "learning_rate": 3.501177706618508e-09, "loss": 0.6412, "step": 29435 }, { "epoch": 2.97, "grad_norm": 4.5811841822383785, "learning_rate": 3.392295381935418e-09, "loss": 0.6375, "step": 29440 }, { "epoch": 2.97, "grad_norm": 4.978688056473059, "learning_rate": 3.2851323851906812e-09, "loss": 0.6638, "step": 29445 }, { "epoch": 2.97, "grad_norm": 4.967654656326927, "learning_rate": 3.179688753259802e-09, "loss": 0.671, "step": 29450 }, { "epoch": 2.97, "grad_norm": 4.528084776273715, "learning_rate": 3.075964522425423e-09, "loss": 0.6668, "step": 29455 }, { "epoch": 2.97, "grad_norm": 5.7681723490814, "learning_rate": 2.9739597283789946e-09, "loss": 0.6578, "step": 29460 }, { "epoch": 2.97, "grad_norm": 5.237788531018208, "learning_rate": 2.8736744062202173e-09, "loss": 0.6294, "step": 29465 }, { "epoch": 2.97, "grad_norm": 5.369903138483945, "learning_rate": 2.775108590457598e-09, "loss": 0.6641, "step": 29470 }, { "epoch": 2.97, "grad_norm": 4.349803726300211, "learning_rate": 2.6782623150067857e-09, "loss": 0.6181, "step": 29475 }, { "epoch": 2.97, "grad_norm": 4.870894509436404, "learning_rate": 2.583135613193344e-09, "loss": 0.6202, "step": 29480 }, { "epoch": 2.97, "grad_norm": 5.561984308798503, "learning_rate": 2.4897285177494234e-09, "loss": 0.6406, "step": 29485 }, { "epoch": 2.97, "grad_norm": 4.970137297751924, "learning_rate": 2.3980410608170914e-09, "loss": 0.6224, "step": 29490 }, { "epoch": 2.97, "grad_norm": 5.930025831692994, "learning_rate": 2.3080732739455545e-09, "loss": 0.6585, "step": 29495 }, { "epoch": 2.97, "grad_norm": 6.05574857651503, "learning_rate": 2.2198251880922726e-09, "loss": 0.6353, "step": 29500 }, { "epoch": 2.97, "grad_norm": 4.581775862440681, "learning_rate": 2.1332968336240654e-09, "loss": 0.6416, "step": 29505 }, { "epoch": 2.98, "grad_norm": 4.370277333475543, "learning_rate": 2.0484882403148943e-09, "loss": 0.6805, "step": 29510 }, { "epoch": 2.98, "grad_norm": 5.3428214713148945, "learning_rate": 1.965399437347526e-09, "loss": 0.6417, "step": 29515 }, { "epoch": 2.98, "grad_norm": 4.8639952216571185, "learning_rate": 1.8840304533118694e-09, "loss": 0.6714, "step": 29520 }, { "epoch": 2.98, "grad_norm": 5.333033103133333, "learning_rate": 1.8043813162083036e-09, "loss": 0.631, "step": 29525 }, { "epoch": 2.98, "grad_norm": 4.919789592117598, "learning_rate": 1.7264520534426844e-09, "loss": 0.6757, "step": 29530 }, { "epoch": 2.98, "grad_norm": 4.901987244613265, "learning_rate": 1.6502426918313387e-09, "loss": 0.6216, "step": 29535 }, { "epoch": 2.98, "grad_norm": 4.979375934899607, "learning_rate": 1.575753257597734e-09, "loss": 0.6487, "step": 29540 }, { "epoch": 2.98, "grad_norm": 4.651082696096523, "learning_rate": 1.502983776373035e-09, "loss": 0.6217, "step": 29545 }, { "epoch": 2.98, "grad_norm": 4.466242260270571, "learning_rate": 1.431934273197766e-09, "loss": 0.6561, "step": 29550 }, { "epoch": 2.98, "grad_norm": 4.667264339911075, "learning_rate": 1.3626047725195934e-09, "loss": 0.6647, "step": 29555 }, { "epoch": 2.98, "grad_norm": 5.900916909243711, "learning_rate": 1.2949952981949899e-09, "loss": 0.6604, "step": 29560 }, { "epoch": 2.98, "grad_norm": 4.553663231238028, "learning_rate": 1.2291058734881234e-09, "loss": 0.6462, "step": 29565 }, { "epoch": 2.98, "grad_norm": 6.09318450504494, "learning_rate": 1.1649365210714137e-09, "loss": 0.6316, "step": 29570 }, { "epoch": 2.98, "grad_norm": 4.814946633549386, "learning_rate": 1.1024872630255311e-09, "loss": 0.6211, "step": 29575 }, { "epoch": 2.98, "grad_norm": 5.257440542630988, "learning_rate": 1.0417581208393979e-09, "loss": 0.6522, "step": 29580 }, { "epoch": 2.98, "grad_norm": 4.669418086862083, "learning_rate": 9.827491154096315e-10, "loss": 0.6647, "step": 29585 }, { "epoch": 2.98, "grad_norm": 4.696641863133449, "learning_rate": 9.254602670416557e-10, "loss": 0.6464, "step": 29590 }, { "epoch": 2.98, "grad_norm": 4.579217604058113, "learning_rate": 8.698915954480358e-10, "loss": 0.619, "step": 29595 }, { "epoch": 2.98, "grad_norm": 4.632195703141335, "learning_rate": 8.160431197495878e-10, "loss": 0.6176, "step": 29600 }, { "epoch": 2.98, "grad_norm": 5.773687529065707, "learning_rate": 7.639148584764889e-10, "loss": 0.653, "step": 29605 }, { "epoch": 2.99, "grad_norm": 4.402195898389152, "learning_rate": 7.135068295649472e-10, "loss": 0.6091, "step": 29610 }, { "epoch": 2.99, "grad_norm": 4.683534156836418, "learning_rate": 6.648190503610874e-10, "loss": 0.6599, "step": 29615 }, { "epoch": 2.99, "grad_norm": 5.25776544357083, "learning_rate": 6.178515376181748e-10, "loss": 0.6392, "step": 29620 }, { "epoch": 2.99, "grad_norm": 4.690548389862812, "learning_rate": 5.726043074977262e-10, "loss": 0.6222, "step": 29625 }, { "epoch": 2.99, "grad_norm": 5.384653894013326, "learning_rate": 5.290773755689537e-10, "loss": 0.6298, "step": 29630 }, { "epoch": 2.99, "grad_norm": 4.766865932934473, "learning_rate": 4.872707568093216e-10, "loss": 0.6399, "step": 29635 }, { "epoch": 2.99, "grad_norm": 6.264078290847827, "learning_rate": 4.471844656050994e-10, "loss": 0.6358, "step": 29640 }, { "epoch": 2.99, "grad_norm": 5.789134124948843, "learning_rate": 4.088185157491431e-10, "loss": 0.6618, "step": 29645 }, { "epoch": 2.99, "grad_norm": 5.066132528261486, "learning_rate": 3.7217292044422484e-10, "loss": 0.6361, "step": 29650 }, { "epoch": 2.99, "grad_norm": 4.259020902234293, "learning_rate": 3.372476922991474e-10, "loss": 0.6293, "step": 29655 }, { "epoch": 2.99, "grad_norm": 5.152132238576465, "learning_rate": 3.040428433315201e-10, "loss": 0.6497, "step": 29660 }, { "epoch": 2.99, "grad_norm": 7.046910457321705, "learning_rate": 2.7255838496775824e-10, "loss": 0.6266, "step": 29665 }, { "epoch": 2.99, "grad_norm": 4.462013754364072, "learning_rate": 2.42794328041418e-10, "loss": 0.6296, "step": 29670 }, { "epoch": 2.99, "grad_norm": 4.547167003537467, "learning_rate": 2.1475068279430688e-10, "loss": 0.6, "step": 29675 }, { "epoch": 2.99, "grad_norm": 4.4896715999147565, "learning_rate": 1.884274588759283e-10, "loss": 0.6708, "step": 29680 }, { "epoch": 2.99, "grad_norm": 5.0500035500995555, "learning_rate": 1.6382466534459184e-10, "loss": 0.6508, "step": 29685 }, { "epoch": 2.99, "grad_norm": 4.448348736657129, "learning_rate": 1.4094231066574816e-10, "loss": 0.6573, "step": 29690 }, { "epoch": 2.99, "grad_norm": 4.191738441045034, "learning_rate": 1.1978040271309887e-10, "loss": 0.6245, "step": 29695 }, { "epoch": 2.99, "grad_norm": 4.598097897991266, "learning_rate": 1.0033894876859685e-10, "loss": 0.6464, "step": 29700 }, { "epoch": 2.99, "grad_norm": 4.963954418786347, "learning_rate": 8.261795552189089e-11, "loss": 0.6265, "step": 29705 }, { "epoch": 3.0, "grad_norm": 4.68072279151534, "learning_rate": 6.661742907088098e-11, "loss": 0.6927, "step": 29710 }, { "epoch": 3.0, "grad_norm": 5.287053203230731, "learning_rate": 5.23373749217182e-11, "loss": 0.6326, "step": 29715 }, { "epoch": 3.0, "grad_norm": 4.513974310633034, "learning_rate": 3.9777797987139435e-11, "loss": 0.6449, "step": 29720 }, { "epoch": 3.0, "grad_norm": 4.508111634160351, "learning_rate": 2.893870258979803e-11, "loss": 0.6469, "step": 29725 }, { "epoch": 3.0, "grad_norm": 4.394271709957407, "learning_rate": 1.982009245948824e-11, "loss": 0.6508, "step": 29730 }, { "epoch": 3.0, "grad_norm": 5.741623800409336, "learning_rate": 1.2421970733145217e-11, "loss": 0.6351, "step": 29735 }, { "epoch": 3.0, "grad_norm": 4.517673242269964, "learning_rate": 6.744339957065471e-12, "loss": 0.6477, "step": 29740 }, { "epoch": 3.0, "grad_norm": 4.693274488472275, "learning_rate": 2.7872020846864135e-12, "loss": 0.6142, "step": 29745 }, { "epoch": 3.0, "grad_norm": 5.337435074057215, "learning_rate": 5.505584776965833e-13, "loss": 0.6744, "step": 29750 }, { "epoch": 3.0, "eval_loss": 1.1189379692077637, "eval_runtime": 25.4202, "eval_samples_per_second": 31.707, "eval_steps_per_second": 3.973, "step": 29754 }, { "epoch": 3.0, "step": 29754, "total_flos": 1024319199117312.0, "train_loss": 1.0088004046184542, "train_runtime": 70733.587, "train_samples_per_second": 6.731, "train_steps_per_second": 0.421 } ], "logging_steps": 5, "max_steps": 29754, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1024319199117312.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }